def _get_alignment_result(self, fkp, row, col, reference, output):
        """
        we are trying to get all alignment result by the fkp, from the cell where row and col point us, to the
        end where the first cell. Don't get confused by the row and col.

        :param fkp: (f(k,p)array from the ukk, a two dimensional array having
        max_k rows whose indices correspond to d(i,j) array diagonal numbers and max_p columns whose indices range
        from -1 to the largest possible d(i,j) array cell value.)
        :param row: we should start getting alignment by this row. (the row is in the col)
        :param col: we should start getting alignment by this row. (the col is in the row)
        :param reference: reference string
        :param output: output string
        :return: alignment_result
        """

        alignment_result = AlignmentResult()
        count_for_output = 0
        reach_first_cell = False
        while not reach_first_cell:
            # we will only stop when it is first cell
            reach_first_cell, row, col, count_for_output = self._get_me_the_result_by_looping_through_each_col(
                fkp, row, col, count_for_output, reference, output,
                alignment_result)

        alignment_result.merge_none_tokens()
        return alignment_result
Ejemplo n.º 2
0
 def test_4(self):
     """
     """
     result = self.calculator.get_distance('b', 'batman')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(
         ref_token="b",
         output_tokens=["b", "a", "t", "m", "a", "n"],
         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 3
0
 def test_4(self):
     """
     """
     result = self.calculator.get_distance('b', 'batman')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(
         ref_token="b",
         output_tokens=["b", "a", "t", "m", "a", "n"],
         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 4
0
    def test_files(self):
        """
        """
        file1 = open('r_file.txt', 'r')
        R = file1.read()

        file2 = open('o_file.txt', 'r')
        O = file2.read()

        result = self.wer_calculator.get_distance(R, O)
        print(result.distance)
        self.assertEqual(result.distance, 373)

        expected_alignment_result = AlignmentResult()
        expected_alignment_result.load_from_file('resultoffilr.txt',
                                                 expected_alignment_result)
        distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
        )
        print(distance, substitution, insertion, deletion)
Ejemplo n.º 5
0
 def test_first(self):
     """
     """
     result = self.calculator.get_distance('abc', 'dfg')
     self.assertEqual(result.distance, 3)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["d"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="b",
                                         output_tokens=["f"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="c",
                                         output_tokens=["g"],
                                         add_to_left=False)
     # result.alignment_result == expected_alignment_result
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 6
0
    def test_first(self):
        """
        """
        result = self.calculator.get_distance('abc', 'dfg')
        self.assertEqual(result.distance, 3)
        expected_alignment_result = AlignmentResult()
        expected_alignment_result.add_token(ref_token="a",
                                            output_tokens=["d"],
                                            add_to_left=False)
        expected_alignment_result.add_token(ref_token="b",
                                            output_tokens=["f"],
                                            add_to_left=False)
        expected_alignment_result.add_token(ref_token="c",
                                            output_tokens=["g"],
                                            add_to_left=False)
        # result.alignment_result == expected_alignment_result
        distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
        )
        print(distance, substitution, insertion, deletion)

        print(result.alignment_result)
        print(expected_alignment_result)
        self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 7
0
 def test_2nd(self):
     """
     """
     result = self.calculator.get_distance('AV', 'Abc')
     self.assertEqual(result.distance, 2)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="A",
                                         output_tokens=["A", "b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="V",
                                         output_tokens=["c"],
                                         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 8
0
 def test_2nd(self):
     """
     """
     result = self.calculator.get_distance('AV', 'Abc')
     self.assertEqual(result.distance, 2)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="A",
                                         output_tokens=["A", "b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="V",
                                         output_tokens=["c"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
from transcription_compare.levenshtein_distance_calculator import UKKLevenshteinDistanceCalculator
from transcription_compare.tokenizer import CharacterTokenizer, WordTokenizer
from transcription_compare.utils import SimpleReferenceCombinationGenerator

from transcription_compare.results import AlignmentResult
alignment_result = AlignmentResult()
alignment_result.add_token(ref_token=None,
                           output_tokens=["1"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["2"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["3"],
                           add_to_left=False)
alignment_result.add_token(ref_token="1",
                           output_tokens=["4"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["5"],
                           add_to_left=False)
alignment_result.add_token(ref_token="ha",
                           output_tokens=["in", "and", "some"],
                           add_to_left=False)
alignment_result.add_token(ref_token="someday",
                           output_tokens=["days"],
                           add_to_left=False)
alignment_result.add_token(ref_token="one",
                           output_tokens=["1"],
                           add_to_left=False)
alignment_result.add_token(ref_token="two",
Ejemplo n.º 10
0
            if distance < old_distance:
                old_distance = distance
                tmp_result = x
    if tmp_result is None:
        return None
    calculator2 = UKKLevenshteinDistanceCalculator(tokenizer=WordTokenizer(),
                                                   get_alignment_result=True)
    update_result = calculator2.get_distance(tmp_result,
                                             output_string).alignment_result
    return update_result


p = inflect.engine()

alignment_result = AlignmentResult()

alignment_result.add_token(ref_token="w",
                           output_tokens=["w"],
                           add_to_left=False)
alignment_result.add_token(ref_token="5",
                           output_tokens=["e"],
                           add_to_left=False)
alignment_result.add_token(ref_token="r",
                           output_tokens=["r"],
                           add_to_left=False)
alignment_result.add_token(ref_token="g", output_tokens=[], add_to_left=False)
alignment_result.add_token(ref_token="2", output_tokens=[], add_to_left=False)
alignment_result.add_token("1", ["one"])

alignment_result.add_token("21", ["twenty-one", 'a', 'c'], add_to_left=False)
Ejemplo n.º 11
0
 def test_5(self):
     """
     """
     result = self.calculator.get_distance('AVERY', 'GARVEY')
     self.assertEqual(result.distance, 3)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="A",
                                         output_tokens=["G", "A", "R"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="V",
                                         output_tokens=["V"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="E",
                                         output_tokens=["E"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="R",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="Y",
                                         output_tokens=["Y"],
                                         add_to_left=False)
     expected_alignment_result.merge_none_tokens()
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 12
0
 def test_9(self):
     """
     """
     result = self.calculator.get_distance('helloa a a ?', 'HHHHHHHoooooo')
     print(result.alignment_result)
     print(result.distance)
     self.assertEqual(result.distance, 13)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["H", "H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="o",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="?",
                                         output_tokens=["o"],
                                         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 13
0
 def test_9(self):
     """
     """
     result = self.calculator.get_distance('happyeveryday', 'happybirthday')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="v",
                                         output_tokens=["i"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["r"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=["t"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="d",
                                         output_tokens=["d"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 14
0
 def test_9(self):
     """
     """
     result = self.calculator.get_distance('helloa a a ?', 'HHHHHHHoooooo')
     self.assertEqual(result.distance, 13)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["H", "H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="o",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["H"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token=" ",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="?",
                                         output_tokens=["o"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 15
0
 def test_8(self):
     """
     """
     result = self.calculator.get_distance('jijizhazha', 'hahahaaaa???')
     self.assertEqual(result.distance, 10)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="j",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="i",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="j",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="i",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="z",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="z",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a", "?", "?", "?"],
                                         add_to_left=False)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 16
0
 def test_7(self):
     """
     """
     result = self.calculator.get_distance('werewolf', 'were  wolf')
     self.assertEqual(result.distance, 2)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="w",
                                         output_tokens=["w"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["e"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=["r"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["e", " ", " "],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="w",
                                         output_tokens=["w"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="o",
                                         output_tokens=["o"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="l",
                                         output_tokens=["l"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="f",
                                         output_tokens=["f"],
                                         add_to_left=False)
     expected_alignment_result.merge_none_tokens()
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
from transcription_compare.levenshtein_distance_calculator import UKKLevenshteinDistanceCalculator
from transcription_compare.tokenizer import CharacterTokenizer, WordTokenizer
from transcription_compare.results import AlignmentResult
from transcription_compare.results import AlignedToken
alignment_result = AlignmentResult()
alignment_result.add_token(ref_token=None,
                           output_tokens=["1"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["2"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["3"],
                           add_to_left=False)
alignment_result.add_token(ref_token="1",
                           output_tokens=["4"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["5"],
                           add_to_left=False)
alignment_result.add_token(ref_token="ha",
                           output_tokens=["in", "and", "some"],
                           add_to_left=False)
alignment_result.add_token(ref_token="someday",
                           output_tokens=["days"],
                           add_to_left=False)
alignment_result.add_token(ref_token="one",
                           output_tokens=["1"],
                           add_to_left=False)
alignment_result.add_token(ref_token="two",
                           output_tokens=["2"],
Ejemplo n.º 18
0
 def test_6(self):
     """
     """
     result = self.calculator.get_distance('ernest', 'nester')
     self.assertEqual(result.distance, 4)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=[],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="n",
                                         output_tokens=["n"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["e"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="s",
                                         output_tokens=["s"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="t",
                                         output_tokens=["t", "e", "r"],
                                         add_to_left=False)
     expected_alignment_result.merge_none_tokens()
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)
Ejemplo n.º 19
0
import inflect
p = inflect.engine()
from transcription_compare.levenshtein_distance_calculator import UKKLevenshteinDistanceCalculator
from transcription_compare.tokenizer import CharacterTokenizer, WordTokenizer
from transcription_compare.utils import SimpleReferenceCombinationGenerator

from transcription_compare.results import AlignmentResult
alignment_result = AlignmentResult()
alignment_result.add_token(ref_token=None,
                           output_tokens=["1"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["2"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["3"],
                           add_to_left=False)
alignment_result.add_token(ref_token="1",
                           output_tokens=["4"],
                           add_to_left=False)
alignment_result.add_token(ref_token=None,
                           output_tokens=["5"],
                           add_to_left=False)
alignment_result.add_token(ref_token="ha",
                           output_tokens=["in", "and", "some"],
                           add_to_left=False)
alignment_result.add_token(ref_token="someday",
                           output_tokens=["days"],
                           add_to_left=False)
alignment_result.add_token(ref_token="one",
                           output_tokens=["1"],
Ejemplo n.º 20
0
 def test_10(self):
     """
     """
     result = self.calculator.get_distance('happyeveryday', 'happybirthday')
     self.assertEqual(result.distance, 5)
     expected_alignment_result = AlignmentResult()
     expected_alignment_result.add_token(ref_token="h",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="p",
                                         output_tokens=["p"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["b"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="v",
                                         output_tokens=["i"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="e",
                                         output_tokens=["r"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="r",
                                         output_tokens=["t"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["h"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="d",
                                         output_tokens=["d"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="a",
                                         output_tokens=["a"],
                                         add_to_left=False)
     expected_alignment_result.add_token(ref_token="y",
                                         output_tokens=["y"],
                                         add_to_left=False)
     distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance(
     )
     print(distance, substitution, insertion, deletion)
     print(result.alignment_result)
     print(expected_alignment_result)
     self.assertEqual(result.alignment_result, expected_alignment_result)