def test_hong2014(self): duc2004 = load_references(_duc2004_file_path) centroid = load_summaries(_centroid_file_path) use_porter_stemmer = True remove_stopwords = False compute_rouge_l = True max_words = 100 rouge = Rouge(max_ngram=2, use_porter_stemmer=use_porter_stemmer, remove_stopwords=remove_stopwords, max_words=max_words, compute_rouge_l=compute_rouge_l) python_rouge = PythonRouge(use_porter_stemmer=use_porter_stemmer, remove_stopwords=remove_stopwords, max_words=max_words, compute_rouge_l=compute_rouge_l) expected_metrics, _ = rouge.evaluate(centroid, duc2004) actual_metrics, _ = python_rouge.evaluate(centroid, duc2004) assert math.isclose(expected_metrics['rouge-1']['precision'], actual_metrics['python-rouge-1']['precision'], abs_tol=1e-2) assert math.isclose(expected_metrics['rouge-1']['recall'], actual_metrics['python-rouge-1']['recall'], abs_tol=2e-2) assert math.isclose(expected_metrics['rouge-1']['f1'], actual_metrics['python-rouge-1']['f1'], abs_tol=2e-2) assert math.isclose(expected_metrics['rouge-2']['precision'], actual_metrics['python-rouge-2']['precision'], abs_tol=1e-2) assert math.isclose(expected_metrics['rouge-2']['recall'], actual_metrics['python-rouge-2']['recall'], abs_tol=1e-2) assert math.isclose(expected_metrics['rouge-2']['f1'], actual_metrics['python-rouge-2']['f1'], abs_tol=1e-2) # Rouge-L is a little further off, but still reasonably close enough that I'm not worried assert math.isclose(expected_metrics['rouge-l']['precision'], actual_metrics['python-rouge-l']['precision'], abs_tol=1e-1) assert math.isclose(expected_metrics['rouge-l']['recall'], actual_metrics['python-rouge-l']['recall'], abs_tol=1e-1) assert math.isclose(expected_metrics['rouge-l']['f1'], actual_metrics['python-rouge-l']['f1'], abs_tol=1e-1)
def test_python_rouge_multiling(self): use_porter_stemmer = True remove_stopwords = False compute_rouge_l = True max_words = 100 rouge = Rouge(max_ngram=2, use_porter_stemmer=use_porter_stemmer, remove_stopwords=remove_stopwords, max_words=max_words, compute_rouge_l=compute_rouge_l) python_rouge = PythonRouge(use_porter_stemmer=use_porter_stemmer, remove_stopwords=remove_stopwords, max_words=max_words, compute_rouge_l=compute_rouge_l) expected_metrics, _ = rouge.evaluate(self.summaries, self.references_list) actual_metrics, _ = python_rouge.evaluate(self.summaries, self.references_list) self.assert_same_as_rouge(actual_metrics, expected_metrics)