예제 #1
0
    def test_double_deletion(self):
        a = "a b c".split()
        b = "a".split()

        expected = [
            (['a', 'b', 'c'], ['a']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #2
0
    def test_single_deletion_reversed(self):
        a = "a b".split()
        b = "b".split()

        expected = [
            (['a', 'b'], ['b']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #3
0
    def test_double_insertion_reversed(self):
        a = "b".split()
        b = "c a b".split()

        expected = [
            (['b'], ['c', 'a', 'b']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #4
0
    def test_substitution_only(self):
        a = "a".split()
        b = "b".split()

        expected = [
            (['a'], ['b']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #5
0
    def test_single_insertion(self):
        a = "a".split()
        b = "a b".split()

        expected = [
            (['a'], ['a', 'b']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #6
0
    def test_single_element(self):
        a = "a".split()
        b = "a".split()

        expected = [
            (['a'], ['a']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #7
0
    def test_trivial(self):
        a = "a b".split()
        b = "a b".split()

        expected = [
            (['a'], ['a']),
            (['b'], ['b']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #8
0
    def test_inversion_at_end(self):
        a = "a b c d".split()
        b = "a b d c".split()

        expected = [
            (['a'], ['a']),
            (['b'], ['b']),
            (['c'], ['d']),
            (['d'], ['c']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #9
0
    def test_double_substituion(self):
        a = "a b c d".split()
        b = "a x y d".split()

        expected = [
            (['a'], ['a']),
            (['b'], ['x']),
            (['c'], ['y']),
            (['d'], ['d']),
        ]

        self.assertEqual(align(a, b), expected)
예제 #10
0
    def test_inner_insertion(self):
        a = "a d".split()
        b = "a b c d".split()

        expected_1 = [
            (['a'], ['a', 'b', 'c']),
            (['d'], ['d']),
        ]
        expected_2 = [
            (['a'], ['a']),
            (['d'], ['b', 'c', 'd']),
        ]

        self.assertIn(align(a, b), [expected_1, expected_2])
    oov_hits: Dict[int, List[Tuple[str, List[str]]]] = {}

    references = {}
    with open(args.text_references) as f:
        for line in f:
            fields = line.split()
            references[fields[0]] = fields[1:]

    candidate_possible_words = []
    for line in sys.stdin:
        fields = line.split()
        _, utt_id, _, _, _ = parse_oov_id(fields[0])

        candidate_line = fields[1:]
        reference_line = references[utt_id]
        alignment = align(reference_line, candidate_line)
        mismatches = extract_mismatch(alignment)
        oov_mismatch = find_in_mismatches(mismatches, "<UNK-OI>")

        total_ref_len += len(reference_line)
        total_nb_errors += number_of_errors(mismatches)
        matching_oovs = intersection(oov_list, oov_mismatch[0])

        if len(matching_oovs) in oov_hits:
            oov_hits[len(matching_oovs)].append((utt_id, matching_oovs))
        else:
            oov_hits[len(matching_oovs)] = [(utt_id, matching_oovs)]

        candidate_possible_words.append(oov_mismatch[0])
        print(fields[0], oov_mismatch[0], '--', oov_mismatch[1])