Esempio n. 1
0
    def test_deletion_only_reversed(self):
        ali = [
            (['begin'], ['begin']),
            (['a', 'b'], ['b']),
            (['end'], ['end']),
        ]
        expectation = [
            (['a'], []),
        ]

        self.assertEqual(extract_mismatch(ali), expectation)
Esempio n. 2
0
    def test_substitution_with_deletion(self):
        ali = [
            (['begin'], ['begin']),
            (['a', 'b'], ['c']),
            (['end'], ['end']),
        ]
        expectation = [
            (['a', 'b'], ['c']),
        ]

        self.assertEqual(extract_mismatch(ali), expectation)
Esempio n. 3
0
    def test_insertion_only(self):
        ali = [
            (['begin'], ['begin']),
            (['a'], ['a', 'b']),
            (['end'], ['end']),
        ]
        expectation = [
            ([], ['b']),
        ]

        self.assertEqual(extract_mismatch(ali), expectation)
Esempio n. 4
0
    def test_double_substitution(self):
        ali = [
            (['a'], ['a']),
            (['a'], ['b']),
            (['a'], ['b']),
            (['a'], ['a']),
            (['a'], ['b']),
        ]
        expectation = [(['a', 'a'], ['b', 'b']), (['a'], ['b'])]

        self.assertEqual(extract_mismatch(ali), expectation)
Esempio n. 5
0
    def test_trivial(self):
        ali = [(['a'], ['a'])]
        expectation = []

        self.assertEqual(extract_mismatch(ali), expectation)
    references = {}
    with open(args.text_references) as f:
        for line in f:
            fields = line.split()
            references[fields[0]] = fields[1:]

    candidate_possible_words = []
    for line in sys.stdin:
        fields = line.split()
        _, utt_id, _, _, _ = parse_oov_id(fields[0])

        candidate_line = fields[1:]
        reference_line = references[utt_id]
        alignment = align(reference_line, candidate_line)
        mismatches = extract_mismatch(alignment)
        oov_mismatch = find_in_mismatches(mismatches, "<UNK-OI>")

        total_ref_len += len(reference_line)
        total_nb_errors += number_of_errors(mismatches)
        matching_oovs = intersection(oov_list, oov_mismatch[0])

        if len(matching_oovs) in oov_hits:
            oov_hits[len(matching_oovs)].append((utt_id, matching_oovs))
        else:
            oov_hits[len(matching_oovs)] = [(utt_id, matching_oovs)]

        candidate_possible_words.append(oov_mismatch[0])
        print(fields[0], oov_mismatch[0], '--', oov_mismatch[1])

    with open(args.reference_file, 'w') as f: