def test_algorithm_z(self): text_1 = "#baabab" text_2 = "#ababaa" fr = FourRussiansHelpers(lcs_delete_cost_function, lcs_insert_cost_function, substitute_cost_function2) m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1, text_2) storage = fr.algorithm_y(m, A, step_size_bound) P, Q = fr.algorithm_z(m, storage, text_1, text_2) _, diff_between_rows, diff_between_columns = get_full_matrices( fr, text_1, text_2, lcs_delete_cost_function, lcs_insert_cost_function) for i in range(1, 4): for j in range(1, 4): self.assertEqual(P[i][j], [diff_between_rows[(i - 1) * m + 1][j * m], diff_between_rows[i * m][j * m]]) self.assertEqual(Q[i][j], [diff_between_columns[i * m][(j - 1) * m + 1], diff_between_columns[i * m][j * m]])
def test_edit_distance(self): text_1 = "#baabab" text_2 = "#ababaa" fr = FourRussiansHelpers(lcs_delete_cost_function, lcs_insert_cost_function, substitute_cost_function2) m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1, text_2) storage = fr.algorithm_y(m, A, step_size_bound) edit_distance = fr.get_edit_distance(m, text_1, text_2, storage) self.assertEqual(edit_distance, 4)
def four_russians(text_1, text_2): """ Calculates longest common subsequence of strings A and B as a special case of four russians edit distance algorithm """ fr = FourRussiansHelpers(lcs_delete_cost_function, lcs_insert_cost_function, lcs_substitute_cost_function) m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters( text_1, text_2) storage = fr.algorithm_y(m, A, step_size_bound) lcs, length = fr.get_lcs(m, text_1, text_2, storage) return lcs, length
def check_lcs(self, text_1, text_2, expected_lcs): expected_length = len(expected_lcs) fr = FourRussiansHelpers(lcs_delete_cost_function, lcs_insert_cost_function, lcs_substitute_cost_function) m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1, text_2) storage = fr.algorithm_y(m, A, step_size_bound) P, Q = fr.algorithm_z(m, storage, text_1, text_2) lcs = fr.restore_lcs(text_1, text_2, P, Q, m) self.assertEqual(len(lcs), expected_length, "Expected lcs of length {0}, actual lcs of length {1}. " "Actual result: {2}, expected result: {3}" .format(expected_length, len(lcs), lcs, expected_lcs)) self.assertEqual(lcs, expected_lcs)
def edit_distance_four_russians( text_1, text_2, delete_cost_function=lcs_delete_cost_function, insert_cost_function=lcs_insert_cost_function, substitute_cost_function=lcs_substitute_cost_function): """ Algorithm proposed by William J. Masek and Michael S. Paterson, using the method of "Four Russians """ fr = FourRussiansHelpers(delete_cost_function, insert_cost_function, substitute_cost_function) m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters( text_1, text_2) storage = fr.algorithm_y(m, A, step_size_bound) cost = fr.get_edit_distance(m, text_1, text_2, storage) return cost
def test_algorithm_y(self): text_1 = "#baabab" text_2 = "#ababaa" fr = FourRussiansHelpers(lcs_delete_cost_function, lcs_insert_cost_function, substitute_cost_function2) m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1, text_2) storage = fr.algorithm_y(m, A, step_size_bound) _, diff_between_rows, diff_between_columns = get_full_matrices( fr, text_1, text_2, lcs_delete_cost_function, lcs_insert_cost_function) for i in range(0, 5): for j in range(0, 5): self.assertEqual(storage [text_1[i + 1:i + 3]] [text_2[j + 1:j + 3]] [ (diff_between_rows[i + 1][j], diff_between_rows[i + 2][j]) ] [ (diff_between_columns[i][j + 1], diff_between_columns[i][j + 2]) ], ( [ diff_between_rows[i + 1][j + 2], diff_between_rows[i + 2][j + 2] ], [ diff_between_columns[i + 2][j + 1], diff_between_columns[i + 2][j + 2] ] ))