def test_algorithm_z(self):
    text_1 = "#baabab"
    text_2 = "#ababaa"

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    P, Q = fr.algorithm_z(m, storage, text_1, text_2)

    _, diff_between_rows, diff_between_columns = get_full_matrices(
        fr, text_1, text_2,
        lcs_delete_cost_function,
        lcs_insert_cost_function)

    for i in range(1, 4):
      for j in range(1, 4):
        self.assertEqual(P[i][j],
                         [diff_between_rows[(i - 1) * m + 1][j * m],
                          diff_between_rows[i * m][j * m]])
        self.assertEqual(Q[i][j],
                         [diff_between_columns[i * m][(j - 1) * m + 1],
                          diff_between_columns[i * m][j * m]])
  def test_edit_distance(self):
    text_1 = "#baabab"
    text_2 = "#ababaa"

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    edit_distance = fr.get_edit_distance(m, text_1, text_2, storage)

    self.assertEqual(edit_distance, 4)
Example #3
0
def four_russians(text_1, text_2):
    """ Calculates longest common subsequence of strings A and B
  as a special case of four russians edit distance algorithm """

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             lcs_substitute_cost_function)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(
        text_1, text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    lcs, length = fr.get_lcs(m, text_1, text_2, storage)

    return lcs, length
  def test_restore_matrix(self):
    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    restored_matrix = fr.restore_matrix(
        "#baabab", "#ababaa",
        [0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6], 6)
    expected_matrix = [[0, 1, 2, 3, 4, 5, 6],
                       [1, 2, 1, 2, 3, 4, 5],
                       [2, 1, 2, 1, 2, 3, 4],
                       [3, 2, 2, 2, 2, 2, 3],
                       [4, 3, 2, 3, 2, 3, 4],
                       [5, 4, 3, 2, 3, 2, 3],
                       [6, 5, 4, 3, 2, 3, 4]]

    self.assertEqual(restored_matrix, expected_matrix)
Example #5
0
def edit_distance_four_russians(
        text_1,
        text_2,
        delete_cost_function=lcs_delete_cost_function,
        insert_cost_function=lcs_insert_cost_function,
        substitute_cost_function=lcs_substitute_cost_function):
    """ Algorithm proposed by William J. Masek and Michael S. Paterson,
  using the method of "Four Russians """

    fr = FourRussiansHelpers(delete_cost_function, insert_cost_function,
                             substitute_cost_function)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(
        text_1, text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    cost = fr.get_edit_distance(m, text_1, text_2, storage)
    return cost
 def test_get_all_strings(self):
   fr = FourRussiansHelpers(lcs_delete_cost_function,
                            lcs_insert_cost_function,
                            lcs_substitute_cost_function)
   self.check_get_all_strings(fr, 2, ['a', 'b'], '#',
                              ["#aa", "#ab", "#ba", "#bb"])
   self.check_get_all_strings(fr, 3, ['z'], '?',
                              ["?zzz"])
 def test_storage(self):
   fr = FourRussiansHelpers(lcs_delete_cost_function,
                            lcs_insert_cost_function,
                            lcs_substitute_cost_function)
   storage = {}
   self.check_storage(fr, [0, -1], [1, 0], "ab", "ba",
                      [0, 1], [0, -1], storage)
   self.check_storage(fr, [-10, -100], [1, 11], "ababa", "ba",
                      [1, 1], [1, -1], storage)
   self.check_storage(fr, [0, -1], [1, 11], "ababa", "ba",
                      [1, 1], [1, -1], storage)
   self.check_storage(fr, [0, -1], [1, 0], "ab", "ba",
                      [0, 1], [0, -1], storage)
 def test_prepare_parameters(self):
   fr = FourRussiansHelpers(lcs_delete_cost_function,
                            lcs_insert_cost_function,
                            lcs_substitute_cost_function)
   self.check_prepare_parameters(fr, "#abab", "#abab",
                                 2, ['a', 'b'],
                                 "#abab", "#abab")
   self.check_prepare_parameters(fr, "#ababa", "#abc",
                                 2, ['a', 'b', 'c'],
                                 "#ababa#", "#abc#")
   self.check_prepare_parameters(fr, "#11111111", "#000",
                                 3, ['0', '1'],
                                 "#11111111#", "#000")
  def test_algorithm_y(self):
    text_1 = "#baabab"
    text_2 = "#ababaa"

    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             substitute_cost_function2)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)

    _, diff_between_rows, diff_between_columns = get_full_matrices(
        fr, text_1, text_2,
        lcs_delete_cost_function,
        lcs_insert_cost_function)

    for i in range(0, 5):
      for j in range(0, 5):
        self.assertEqual(storage
                         [text_1[i + 1:i + 3]]
                         [text_2[j + 1:j + 3]]
                         [
                             (diff_between_rows[i + 1][j],
                              diff_between_rows[i + 2][j])
                         ]
                         [
                             (diff_between_columns[i][j + 1],
                              diff_between_columns[i][j + 2])
                         ],
                         (
                             [
                                 diff_between_rows[i + 1][j + 2],
                                 diff_between_rows[i + 2][j + 2]
                             ],
                             [
                                 diff_between_columns[i + 2][j + 1],
                                 diff_between_columns[i + 2][j + 2]
                             ]
                         ))
  def test_restore_lcs_part(self):
    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             lcs_substitute_cost_function)

    reversed_lcs, i, j = fr.restore_lcs_part(
        "#aa", "#ab",
        [0, 1, 1], [0, 1, 1], 2, 2, 2)
    self.assertEqual(reversed_lcs, "a")
    self.assertEqual(i, 0)
    self.assertEqual(j, 0)

    reversed_lcs, i, j = fr.restore_lcs_part(
        "#aaccc", "#aabc#",
        [0, 1, 1, 1, 1, 1], [0, 1, 1, 1, 1, 1], 5, 5, 5)
    self.assertEqual(reversed_lcs[::-1], "aac")
    self.assertEqual(i, 0)
    self.assertEqual(j, 0)

    reversed_lcs, i, j = fr.restore_lcs_part(
        "#aaa", "#bbb", [0, 1, 1, 1], [0, 1, 1, 1], 3, 3, 3)
    self.assertEqual(reversed_lcs, "")
    self.assertEqual(i, 0)
    self.assertEqual(j, 3)
  def check_lcs(self, text_1, text_2, expected_lcs):
    expected_length = len(expected_lcs)
    fr = FourRussiansHelpers(lcs_delete_cost_function,
                             lcs_insert_cost_function,
                             lcs_substitute_cost_function)
    m, A, step_size_bound, text_1, text_2 = fr.prepare_parameters(text_1,
                                                                  text_2)
    storage = fr.algorithm_y(m, A, step_size_bound)
    P, Q = fr.algorithm_z(m, storage, text_1, text_2)

    lcs = fr.restore_lcs(text_1, text_2, P, Q, m)
    self.assertEqual(len(lcs), expected_length,
                     "Expected lcs of length {0}, actual lcs of length {1}. "
                     "Actual result: {2}, expected result: {3}"
                     .format(expected_length, len(lcs), lcs, expected_lcs))
    self.assertEqual(lcs, expected_lcs)