def test_dev_set(self): """Unit test for dev set.""" reference_file = 'dev_as_references.json' candidate_file = 'dev_first_sentence_as_candidates.json' scores = compute_metrics_from_files(generate_directory(reference_file), generate_directory(candidate_file), MAX_BLEU_ORDER) self.assertEqual("%.5f" % scores['bleu_1'], '0.17634') self.assertEqual("%.5f" % scores['bleu_2'], '0.11419') self.assertEqual("%.5f" % scores['bleu_3'], '0.08906') self.assertEqual("%.5f" % scores['bleu_4'], '0.07623') self.assertEqual("%.5f" % scores['rouge_l'], '0.12077')
def test_sample(self): """Unit test for sample references and candidates.""" reference_file = 'sample_references.json' candidate_file = 'sample_candidates.json' scores = compute_metrics_from_files(generate_directory(reference_file), generate_directory(candidate_file), MAX_BLEU_ORDER) self.assertEqual("%.5f" % scores['bleu_1'], '0.00852') self.assertEqual("%.5f" % scores['bleu_2'], '0.00000') self.assertEqual("%.5f" % scores['bleu_3'], '0.00000') self.assertEqual("%.5f" % scores['bleu_4'], '0.00000') self.assertEqual("%.5f" % scores['rouge_l'], '0.03093')
def test_no_answer(self): """Unit test for no-answer query.""" reference_file = 'no_answer_test_references.json' candidate_file = 'no_answer_test_candidates.json' scores = compute_metrics_from_files(generate_directory(reference_file), generate_directory(candidate_file), MAX_BLEU_ORDER) self.assertEqual("%.5f" % scores['bleu_1'], '0.00000') self.assertEqual("%.5f" % scores['bleu_2'], '0.00000') self.assertEqual("%.5f" % scores['bleu_3'], '0.00000') self.assertEqual("%.5f" % scores['bleu_4'], '0.00000') self.assertEqual("%.5f" % scores['rouge_l'], '0.00000')
def test_same_answer(self): """Unit test for references and candidates sharing same answers.""" reference_file = 'same_answer_test_references.json' candidate_file = 'same_answer_test_candidates.json' scores = compute_metrics_from_files(generate_directory(reference_file), generate_directory(candidate_file), MAX_BLEU_ORDER) self.assertEqual("%.5f" % scores['bleu_1'], '1.00000') self.assertEqual("%.5f" % scores['bleu_2'], '1.00000') self.assertEqual("%.5f" % scores['bleu_3'], '1.00000') self.assertEqual("%.5f" % scores['bleu_4'], '1.00000') self.assertEqual("%.5f" % scores['rouge_l'], '1.00000')