def test_algorithm_z(self):
    text_1 = "#baabab"
    text_2 = "#ababaa"

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    P, Q = fr.algorithm_z(m, storage, text_1, text_2)

    _, diff_between_rows, diff_between_columns = get_full_matrices(
        fr, text_1, text_2,
        lcs_delete_cost_function,
        lcs_insert_cost_function)

    for i in range(1, 4):
      for j in range(1, 4):
        self.assertEqual(P[i][j],
                         [diff_between_rows[(i - 1) * m + 1][j * m],
                          diff_between_rows[i * m][j * m]])
        self.assertEqual(Q[i][j],
                         [diff_between_columns[i * m][(j - 1) * m + 1],
                          diff_between_columns[i * m][j * m]])
  def test_edit_distance(self):
    text_1 = "#baabab"
    text_2 = "#ababaa"

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    edit_distance = fr.get_edit_distance(m, text_1, text_2, storage)

    self.assertEqual(edit_distance, 4)
예제 #3
0
def four_russians(text_1, text_2):
    """ Calculates longest common subsequence of strings A and B
  as a special case of four russians edit distance algorithm """

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             lcs_substitute_cost_function)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(
        text_1, text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    lcs, length = fr.get_lcs(m, text_1, text_2, storage)

    return lcs, length
  def check_lcs(self, text_1, text_2, expected_lcs):
    expected_length = len(expected_lcs)
    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             lcs_substitute_cost_function)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    P, Q = fr.algorithm_z(m, storage, text_1, text_2)

    lcs = fr.restore_lcs(text_1, text_2, P, Q, m)
    self.assertEqual(len(lcs), expected_length,
                     "Expected lcs of length {0}, actual lcs of length {1}. "
                     "Actual result: {2}, expected result: {3}"
                     .format(expected_length, len(lcs), lcs, expected_lcs))
    self.assertEqual(lcs, expected_lcs)
예제 #5
0
def edit_distance_four_russians(
        text_1,
        text_2,
        delete_cost_function=lcs_delete_cost_function,
        insert_cost_function=lcs_insert_cost_function,
        substitute_cost_function=lcs_substitute_cost_function):
    """ Algorithm proposed by William J. Masek and Michael S. Paterson,
  using the method of "Four Russians """

    fr = FourRussiansHelpers(delete_cost_function, insert_cost_function,
                             substitute_cost_function)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(
        text_1, text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    cost = fr.get_edit_distance(m, text_1, text_2, storage)
    return cost
  def test_algorithm_y(self):
    text_1 = "#baabab"
    text_2 = "#ababaa"

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)

    _, diff_between_rows, diff_between_columns = get_full_matrices(
        fr, text_1, text_2,
        lcs_delete_cost_function,
        lcs_insert_cost_function)

    for i in range(0, 5):
      for j in range(0, 5):
        self.assertEqual(storage
                         [text_1[i + 1:i + 3]]
                         [text_2[j + 1:j + 3]]
                         [
                             (diff_between_rows[i + 1][j],
                              diff_between_rows[i + 2][j])
                         ]
                         [
                             (diff_between_columns[i][j + 1],
                              diff_between_columns[i][j + 2])
                         ],
                         (
                             [
                                 diff_between_rows[i + 1][j + 2],
                                 diff_between_rows[i + 2][j + 2]
                             ],
                             [
                                 diff_between_columns[i + 2][j + 1],
                                 diff_between_columns[i + 2][j + 2]
                             ]
                         ))