def test_hong2014(self):
        duc2004 = load_references(_duc2004_file_path)
        centroid = load_summaries(_centroid_file_path)

        use_porter_stemmer = True
        remove_stopwords = False
        compute_rouge_l = True
        max_words = 100
        rouge = Rouge(max_ngram=2,
                      use_porter_stemmer=use_porter_stemmer,
                      remove_stopwords=remove_stopwords,
                      max_words=max_words,
                      compute_rouge_l=compute_rouge_l)
        python_rouge = PythonRouge(use_porter_stemmer=use_porter_stemmer,
                                   remove_stopwords=remove_stopwords,
                                   max_words=max_words,
                                   compute_rouge_l=compute_rouge_l)
        expected_metrics, _ = rouge.evaluate(centroid, duc2004)
        actual_metrics, _ = python_rouge.evaluate(centroid, duc2004)
        assert math.isclose(expected_metrics['rouge-1']['precision'],
                            actual_metrics['python-rouge-1']['precision'],
                            abs_tol=1e-2)
        assert math.isclose(expected_metrics['rouge-1']['recall'],
                            actual_metrics['python-rouge-1']['recall'],
                            abs_tol=2e-2)
        assert math.isclose(expected_metrics['rouge-1']['f1'],
                            actual_metrics['python-rouge-1']['f1'],
                            abs_tol=2e-2)
        assert math.isclose(expected_metrics['rouge-2']['precision'],
                            actual_metrics['python-rouge-2']['precision'],
                            abs_tol=1e-2)
        assert math.isclose(expected_metrics['rouge-2']['recall'],
                            actual_metrics['python-rouge-2']['recall'],
                            abs_tol=1e-2)
        assert math.isclose(expected_metrics['rouge-2']['f1'],
                            actual_metrics['python-rouge-2']['f1'],
                            abs_tol=1e-2)
        # Rouge-L is a little further off, but still reasonably close enough that I'm not worried
        assert math.isclose(expected_metrics['rouge-l']['precision'],
                            actual_metrics['python-rouge-l']['precision'],
                            abs_tol=1e-1)
        assert math.isclose(expected_metrics['rouge-l']['recall'],
                            actual_metrics['python-rouge-l']['recall'],
                            abs_tol=1e-1)
        assert math.isclose(expected_metrics['rouge-l']['f1'],
                            actual_metrics['python-rouge-l']['f1'],
                            abs_tol=1e-1)
Esempio n. 2
0
    def test_python_rouge_multiling(self):
        use_porter_stemmer = True
        remove_stopwords = False
        compute_rouge_l = True
        max_words = 100

        rouge = Rouge(max_ngram=2,
                      use_porter_stemmer=use_porter_stemmer,
                      remove_stopwords=remove_stopwords,
                      max_words=max_words,
                      compute_rouge_l=compute_rouge_l)
        python_rouge = PythonRouge(use_porter_stemmer=use_porter_stemmer,
                                   remove_stopwords=remove_stopwords,
                                   max_words=max_words,
                                   compute_rouge_l=compute_rouge_l)
        expected_metrics, _ = rouge.evaluate(self.summaries,
                                             self.references_list)
        actual_metrics, _ = python_rouge.evaluate(self.summaries,
                                                  self.references_list)
        self.assert_same_as_rouge(actual_metrics, expected_metrics)