def test_deletion_only_reversed(self): ali = [ (['begin'], ['begin']), (['a', 'b'], ['b']), (['end'], ['end']), ] expectation = [ (['a'], []), ] self.assertEqual(extract_mismatch(ali), expectation)
def test_substitution_with_deletion(self): ali = [ (['begin'], ['begin']), (['a', 'b'], ['c']), (['end'], ['end']), ] expectation = [ (['a', 'b'], ['c']), ] self.assertEqual(extract_mismatch(ali), expectation)
def test_insertion_only(self): ali = [ (['begin'], ['begin']), (['a'], ['a', 'b']), (['end'], ['end']), ] expectation = [ ([], ['b']), ] self.assertEqual(extract_mismatch(ali), expectation)
def test_double_substitution(self): ali = [ (['a'], ['a']), (['a'], ['b']), (['a'], ['b']), (['a'], ['a']), (['a'], ['b']), ] expectation = [(['a', 'a'], ['b', 'b']), (['a'], ['b'])] self.assertEqual(extract_mismatch(ali), expectation)
def test_trivial(self): ali = [(['a'], ['a'])] expectation = [] self.assertEqual(extract_mismatch(ali), expectation)
references = {} with open(args.text_references) as f: for line in f: fields = line.split() references[fields[0]] = fields[1:] candidate_possible_words = [] for line in sys.stdin: fields = line.split() _, utt_id, _, _, _ = parse_oov_id(fields[0]) candidate_line = fields[1:] reference_line = references[utt_id] alignment = align(reference_line, candidate_line) mismatches = extract_mismatch(alignment) oov_mismatch = find_in_mismatches(mismatches, "<UNK-OI>") total_ref_len += len(reference_line) total_nb_errors += number_of_errors(mismatches) matching_oovs = intersection(oov_list, oov_mismatch[0]) if len(matching_oovs) in oov_hits: oov_hits[len(matching_oovs)].append((utt_id, matching_oovs)) else: oov_hits[len(matching_oovs)] = [(utt_id, matching_oovs)] candidate_possible_words.append(oov_mismatch[0]) print(fields[0], oov_mismatch[0], '--', oov_mismatch[1]) with open(args.reference_file, 'w') as f: