def test_5(self): """ """ result = self.calculator.get_distance('AVERY', 'GARVEY') self.assertEqual(result.distance, 3) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="A", output_tokens=["G", "A", "R"], add_to_left=False) expected_alignment_result.add_token(ref_token="V", output_tokens=["V"], add_to_left=False) expected_alignment_result.add_token(ref_token="E", output_tokens=["E"], add_to_left=False) expected_alignment_result.add_token(ref_token="R", output_tokens=[], add_to_left=False) expected_alignment_result.add_token(ref_token="Y", output_tokens=["Y"], add_to_left=False) expected_alignment_result.merge_none_tokens() print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_4(self): """ """ result = self.calculator.get_distance('b', 'batman') self.assertEqual(result.distance, 5) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token( ref_token="b", output_tokens=["b", "a", "t", "m", "a", "n"], add_to_left=False) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_2nd(self): """ """ result = self.calculator.get_distance('AV', 'Abc') self.assertEqual(result.distance, 2) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="A", output_tokens=["A", "b"], add_to_left=False) expected_alignment_result.add_token(ref_token="V", output_tokens=["c"], add_to_left=False) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_4(self): """ """ result = self.calculator.get_distance('b', 'batman') self.assertEqual(result.distance, 5) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token( ref_token="b", output_tokens=["b", "a", "t", "m", "a", "n"], add_to_left=False) distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance( ) print(distance, substitution, insertion, deletion) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_2nd(self): """ """ result = self.calculator.get_distance('AV', 'Abc') self.assertEqual(result.distance, 2) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="A", output_tokens=["A", "b"], add_to_left=False) expected_alignment_result.add_token(ref_token="V", output_tokens=["c"], add_to_left=False) distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance( ) print(distance, substitution, insertion, deletion) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_first(self): """ """ result = self.calculator.get_distance('abc', 'dfg') self.assertEqual(result.distance, 3) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="a", output_tokens=["d"], add_to_left=False) expected_alignment_result.add_token(ref_token="b", output_tokens=["f"], add_to_left=False) expected_alignment_result.add_token(ref_token="c", output_tokens=["g"], add_to_left=False) # result.alignment_result == expected_alignment_result print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_first(self): """ """ result = self.calculator.get_distance('abc', 'dfg') self.assertEqual(result.distance, 3) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="a", output_tokens=["d"], add_to_left=False) expected_alignment_result.add_token(ref_token="b", output_tokens=["f"], add_to_left=False) expected_alignment_result.add_token(ref_token="c", output_tokens=["g"], add_to_left=False) # result.alignment_result == expected_alignment_result distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance( ) print(distance, substitution, insertion, deletion) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
tmp_result = x if tmp_result is None: return None calculator2 = UKKLevenshteinDistanceCalculator(tokenizer=WordTokenizer(), get_alignment_result=True) update_result = calculator2.get_distance(tmp_result, output_string).alignment_result return update_result p = inflect.engine() alignment_result = AlignmentResult() alignment_result.add_token(ref_token="w", output_tokens=["w"], add_to_left=False) alignment_result.add_token(ref_token="5", output_tokens=["e"], add_to_left=False) alignment_result.add_token(ref_token="r", output_tokens=["r"], add_to_left=False) alignment_result.add_token(ref_token="g", output_tokens=[], add_to_left=False) alignment_result.add_token(ref_token="2", output_tokens=[], add_to_left=False) alignment_result.add_token("1", ["one"]) alignment_result.add_token("21", ["twenty-one", 'a', 'c'], add_to_left=False) alignment_result.add_token("312", ["three", "one", "two"], add_to_left=False) alignment_result.add_token(ref_token="e",
def test_9(self): """ """ result = self.calculator.get_distance('happyeveryday', 'happybirthday') self.assertEqual(result.distance, 5) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="h", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="p", output_tokens=["p"], add_to_left=False) expected_alignment_result.add_token(ref_token="p", output_tokens=["p"], add_to_left=False) expected_alignment_result.add_token(ref_token="y", output_tokens=["y"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["b"], add_to_left=False) expected_alignment_result.add_token(ref_token="v", output_tokens=["i"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["r"], add_to_left=False) expected_alignment_result.add_token(ref_token="r", output_tokens=["t"], add_to_left=False) expected_alignment_result.add_token(ref_token="y", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="d", output_tokens=["d"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="y", output_tokens=["y"], add_to_left=False) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_9(self): """ """ result = self.calculator.get_distance('helloa a a ?', 'HHHHHHHoooooo') self.assertEqual(result.distance, 13) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="h", output_tokens=["H", "H"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="l", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="l", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="o", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token=" ", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token=" ", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token=" ", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="?", output_tokens=["o"], add_to_left=False) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_8(self): """ """ result = self.calculator.get_distance('jijizhazha', 'hahahaaaa???') self.assertEqual(result.distance, 10) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="j", output_tokens=[], add_to_left=False) expected_alignment_result.add_token(ref_token="i", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="j", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="i", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="z", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="h", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="z", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="h", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["a", "?", "?", "?"], add_to_left=False) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_7(self): """ """ result = self.calculator.get_distance('werewolf', 'were wolf') self.assertEqual(result.distance, 2) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="w", output_tokens=["w"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["e"], add_to_left=False) expected_alignment_result.add_token(ref_token="r", output_tokens=["r"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["e", " ", " "], add_to_left=False) expected_alignment_result.add_token(ref_token="w", output_tokens=["w"], add_to_left=False) expected_alignment_result.add_token(ref_token="o", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="l", output_tokens=["l"], add_to_left=False) expected_alignment_result.add_token(ref_token="f", output_tokens=["f"], add_to_left=False) expected_alignment_result.merge_none_tokens() print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_6(self): """ """ result = self.calculator.get_distance('ernest', 'nester') self.assertEqual(result.distance, 4) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="e", output_tokens=[], add_to_left=False) expected_alignment_result.add_token(ref_token="r", output_tokens=[], add_to_left=False) expected_alignment_result.add_token(ref_token="n", output_tokens=["n"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["e"], add_to_left=False) expected_alignment_result.add_token(ref_token="s", output_tokens=["s"], add_to_left=False) expected_alignment_result.add_token(ref_token="t", output_tokens=["t", "e", "r"], add_to_left=False) expected_alignment_result.merge_none_tokens() print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
import inflect p = inflect.engine() from transcription_compare.levenshtein_distance_calculator import UKKLevenshteinDistanceCalculator from transcription_compare.tokenizer import CharacterTokenizer, WordTokenizer from transcription_compare.utils import SimpleReferenceCombinationGenerator from transcription_compare.results import AlignmentResult alignment_result = AlignmentResult() alignment_result.add_token(ref_token=None, output_tokens=["1"], add_to_left=False) alignment_result.add_token(ref_token=None, output_tokens=["2"], add_to_left=False) alignment_result.add_token(ref_token=None, output_tokens=["3"], add_to_left=False) alignment_result.add_token(ref_token="1", output_tokens=["4"], add_to_left=False) alignment_result.add_token(ref_token=None, output_tokens=["5"], add_to_left=False) alignment_result.add_token(ref_token="ha", output_tokens=["in", "and", "some"], add_to_left=False) alignment_result.add_token(ref_token="someday", output_tokens=["days"], add_to_left=False) alignment_result.add_token(ref_token="one", output_tokens=["1"],
def test_10(self): """ """ result = self.calculator.get_distance('happyeveryday', 'happybirthday') self.assertEqual(result.distance, 5) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="h", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="p", output_tokens=["p"], add_to_left=False) expected_alignment_result.add_token(ref_token="p", output_tokens=["p"], add_to_left=False) expected_alignment_result.add_token(ref_token="y", output_tokens=["y"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["b"], add_to_left=False) expected_alignment_result.add_token(ref_token="v", output_tokens=["i"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["r"], add_to_left=False) expected_alignment_result.add_token(ref_token="r", output_tokens=["t"], add_to_left=False) expected_alignment_result.add_token(ref_token="y", output_tokens=["h"], add_to_left=False) expected_alignment_result.add_token(ref_token="d", output_tokens=["d"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["a"], add_to_left=False) expected_alignment_result.add_token(ref_token="y", output_tokens=["y"], add_to_left=False) distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance( ) print(distance, substitution, insertion, deletion) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)
def test_9(self): """ """ result = self.calculator.get_distance('helloa a a ?', 'HHHHHHHoooooo') print(result.alignment_result) print(result.distance) self.assertEqual(result.distance, 13) expected_alignment_result = AlignmentResult() expected_alignment_result.add_token(ref_token="h", output_tokens=["H", "H"], add_to_left=False) expected_alignment_result.add_token(ref_token="e", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="l", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="l", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="o", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["H"], add_to_left=False) expected_alignment_result.add_token(ref_token=" ", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token=" ", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="a", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token=" ", output_tokens=["o"], add_to_left=False) expected_alignment_result.add_token(ref_token="?", output_tokens=["o"], add_to_left=False) distance, substitution, insertion, deletion = expected_alignment_result.calculate_three_kinds_of_distance( ) print(distance, substitution, insertion, deletion) print(result.alignment_result) print(expected_alignment_result) self.assertEqual(result.alignment_result, expected_alignment_result)