Example #1
0
    def test_two_summaries_end_inserting(self):
        partial_1 = ErrorsSummary.from_lists(list('ab'), list('acd'))
        partial_2 = ErrorsSummary.from_lists(list('ab'), list('abc'))
        summary = ErrorsSummary.aggregate([partial_1, partial_2])
        self.assertEqual(summary.nb_errors, 3)
        self.assertEqual(summary.nb_subs, 1)
        self.assertEqual(summary.nb_inss, 2)
        self.assertEqual(summary.nb_dels, 0)
        self.assertEqual(summary.ref_len, 4)
        self.assertEqual(summary.nb_lines_summarized, 2)

        self.assertEqual(summary.confusions['a']['a'], 2)
        self.assertEqual(summary.confusions['b']['b'], 1)

        unmatched_b_correctly_matched_to_c = (summary.confusions['b']['c'] == 1
                                              and summary.confusions[None]['d']
                                              == 1)

        unmatched_b_correctly_matched_to_d = (
            summary.confusions[None]['c'] == 1
            and summary.confusions['b']['d'] == 1)
        self.assertTrue(unmatched_b_correctly_matched_to_c
                        or unmatched_b_correctly_matched_to_d)

        self.assertEqual(
            sum(sum(k.values()) for k in summary.confusions.values()),
            len('acd') + len('abc'))
        self.assertEqual(summary.ending_errors.pure_deletions, 0)
        self.assertEqual(summary.ending_errors.mixed_deletions, 0)
        self.assertEqual(summary.ending_errors.pure_insertions, 1)
        self.assertEqual(summary.ending_errors.mixed_insertions, 1)
        self.assertEqual(summary.ending_errors.correct, 0)
Example #2
0
    def test_one_errorneuous_one_perfect(self):
        partial_1 = ErrorsSummary.from_lists(list('abcd'), list('abb'))
        partial_2 = ErrorsSummary.from_lists(list('ab'), list('ab'))
        summary = ErrorsSummary.aggregate([partial_1, partial_2])
        self.assertEqual(summary.nb_errors, 2)
        self.assertEqual(summary.nb_subs, 1)
        self.assertEqual(summary.nb_inss, 0)
        self.assertEqual(summary.nb_dels, 1)
        self.assertEqual(summary.ref_len, 6)
        self.assertEqual(summary.nb_lines_summarized, 2)

        self.assertEqual(summary.confusions['a']['a'], 2)
        self.assertEqual(summary.confusions['b']['a'], 0)
        self.assertEqual(summary.confusions['b']['b'], 2)

        extra_b_correctly_matched_to_c = (summary.confusions['c']['b'] == 1 and
                                          summary.confusions['d'][None] == 1)

        extra_b_correctly_matched_to_d = (summary.confusions['c'][None] == 1
                                          and summary.confusions['d']['b']
                                          == 1)

        self.assertTrue(extra_b_correctly_matched_to_c
                        or extra_b_correctly_matched_to_d)
        self.assertEqual(
            sum(sum(k.values()) for k in summary.confusions.values()),
            len('abcd') + len('ab'))
        self.assertEqual(summary.ending_errors.pure_deletions, 0)
        self.assertEqual(summary.ending_errors.mixed_deletions, 1)
        self.assertEqual(summary.ending_errors.correct, 1)
Example #3
0
    def __init__(self, filename, ground_truth, transcription):
        self.filename = filename
        self.ground_truth = ground_truth
        self.transcription = transcription

        self.char_errors = ErrorsSummary.from_lists(list(ground_truth),
                                                    list(transcription))
        self.word_error = ErrorsSummary.from_lists(ground_truth.split(),
                                                   transcription.split())
Example #4
0
 def test_start_deletion(self):
     ref = 'ab'
     hyp = 'b'
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.ending_errors.pure_deletions, 0)
     self.assertEqual(summary.ending_errors.mixed_deletions, 0)
     self.assertEqual(summary.ending_errors.correct, 1)
Example #5
0
 def test_empty_ref_match(self):
     ref = ''
     hyp = ''
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.ending_errors.pure_deletions, 0)
     self.assertEqual(summary.ending_errors.mixed_deletions, 0)
     self.assertEqual(summary.ending_errors.correct, 1)
Example #6
0
 def test_deletion(self):
     ref = 'ab'
     hyp = 'a'
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.confusions['a']['a'], 1)
     self.assertEqual(summary.confusions['b']['b'], 0)
     self.assertEqual(summary.confusions['b'][None], 1)
Example #7
0
 def test_substitution_only(self):
     ref = 'ab'
     hyp = 'cd'
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.ending_errors.pure_deletions, 0)
     self.assertEqual(summary.ending_errors.mixed_deletions, 0)
     self.assertEqual(summary.ending_errors.correct, 0)
     self.assertEqual(summary.ending_errors.pure_substitutions, 1)
Example #8
0
 def test_empty_ref_match(self):
     ref = ''
     hyp = ''
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.nb_errors, 0)
     self.assertEqual(summary.nb_subs, 0)
     self.assertEqual(summary.nb_inss, 0)
     self.assertEqual(summary.nb_dels, 0)
     self.assertEqual(summary.ref_len, 0)
     self.assertEqual(summary.nb_lines_summarized, 1)
Example #9
0
 def test_substitution(self):
     ref = 'ab'
     hyp = 'ac'
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.nb_errors, 1)
     self.assertEqual(summary.nb_subs, 1)
     self.assertEqual(summary.nb_inss, 0)
     self.assertEqual(summary.nb_dels, 0)
     self.assertEqual(summary.ref_len, 2)
     self.assertEqual(summary.nb_lines_summarized, 1)
Example #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--ground-truth', type=str, required=True)
    parser.add_argument('--transcriptions', type=str, required=True)
    parser.add_argument('--output', type=str, required=False, default=None)
    parser.add_argument('--char-confusion',
                        type=str,
                        required=False,
                        default=None)
    parser.add_argument('--word-confusion',
                        type=str,
                        required=False,
                        default=None)
    parser.add_argument(
        '--human-readable',
        action='store_true',
        required=False,
        help='make per-line output easier to process by humans')
    args = parser.parse_args()

    ground_truths = load_transcriptions(args.ground_truth)
    transcriptions = load_transcriptions(args.transcriptions)

    lines, all_matched = match_lines(transcriptions, ground_truths)
    char_summary = ErrorsSummary.aggregate(
        [line.char_errors for line in lines])
    word_summary = ErrorsSummary.aggregate([line.word_error for line in lines])

    print("CER:", char_summary, '' if all_matched else "[PARTIAL]")
    print("WER:", word_summary, '' if all_matched else "[PARTIAL]")

    if args.output is not None:
        save(sort(lines), args.output, args.human_readable)

    if args.char_confusion is not None:
        save_confusions(char_summary, args.char_confusion)

    if args.word_confusion is not None:
        save_confusions(word_summary, args.word_confusion)

    return 0
Example #11
0
    def test_one_substituting_one_perfect(self):
        partial_1 = ErrorsSummary.from_lists(list('abcd'), list('abxy'))
        partial_2 = ErrorsSummary.from_lists(list('ab'), list('ab'))
        summary = ErrorsSummary.aggregate([partial_1, partial_2])
        self.assertEqual(summary.nb_errors, 2)
        self.assertEqual(summary.nb_subs, 2)
        self.assertEqual(summary.nb_inss, 0)
        self.assertEqual(summary.nb_dels, 0)
        self.assertEqual(summary.ref_len, 6)
        self.assertEqual(summary.nb_lines_summarized, 2)

        self.assertEqual(summary.confusions['a']['a'], 2)
        self.assertEqual(summary.confusions['b']['b'], 2)
        self.assertEqual(summary.confusions['c']['x'], 1)
        self.assertEqual(summary.confusions['d']['y'], 1)

        self.assertEqual(
            sum(sum(k.values()) for k in summary.confusions.values()),
            len('abcd') + len('ab'))
        self.assertEqual(summary.ending_errors.pure_deletions, 0)
        self.assertEqual(summary.ending_errors.mixed_deletions, 0)
        self.assertEqual(summary.ending_errors.pure_substitutions, 1)
        self.assertEqual(summary.ending_errors.correct, 1)
Example #12
0
 def test_match(self):
     ref = 'ab'
     hyp = 'ab'
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.confusions['a']['a'], 1)
     self.assertEqual(summary.confusions['b']['b'], 1)
Example #13
0
 def test_empty_ref_match(self):
     ref = ''
     hyp = ''
     summary = ErrorsSummary.from_lists(list(ref), list(hyp))
     self.assertEqual(summary.confusions, {})