def testRougeLNonConsecutive(self):
     scorer = rouge_scorer.RougeScorer(["rougeL"])
     result = scorer.score("testing one two", "testing two")
     self.assertAlmostEqual(1, result["rougeL"].precision)
     self.assertAlmostEqual(2 / 3, result["rougeL"].recall)
     self.assertAlmostEqual(4 / 5, result["rougeL"].fmeasure)
 def testInvalidRougeTypes(self, rouge_type):
   with self.assertRaises(ValueError):
     scorer = rouge_scorer.RougeScorer([rouge_type])
     scorer.score("testing one two", "testing")
 def testRougeEmpty(self, rouge_type):
     scorer = rouge_scorer.RougeScorer([rouge_type])
     result = scorer.score("testing one two", "")
     self.assertAlmostEqual(0, result[rouge_type].precision)
     self.assertAlmostEqual(0, result[rouge_type].recall)
     self.assertAlmostEqual(0, result[rouge_type].fmeasure)
 def testRouge2(self):
     scorer = rouge_scorer.RougeScorer(["rouge2"])
     result = scorer.score("testing one two", "testing one")
     self.assertAlmostEqual(1, result["rouge2"].precision)
     self.assertAlmostEqual(1 / 2, result["rouge2"].recall)
     self.assertAlmostEqual(2 / 3, result["rouge2"].fmeasure)
 def testValidRougeTypes(self, rouge_type):
     scorer = rouge_scorer.RougeScorer([rouge_type])
     result = scorer.score("testing one two", "testing")
     self.assertSameElements(list(result.keys()), [rouge_type])
from rouge import rouge_scorer
from rouge import scoring
from summae import p2s_eval
from summae import util

FLAGS = flags.FLAGS

flags.DEFINE_string('data_dir', '.', 'Data directory.')
flags.DEFINE_string('eval_subset', 'test',
                    'which subset (valid/test) to eval/decode.')
flags.DEFINE_string('output_dir', '/tmp/12342',
                    'local directory to save extractive oracle')
flags.DEFINE_string('vocab_file', '',
                    'Subword vocab file.')  # for detok first sentence

my_rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'],
                                           use_stemmer=True)


def get_extracts(s):
    # get 5 sentences as the extractive baselines
    sents = s.feature_lists.feature_list['untokenized_sentences'].feature
    assert len(sents) == 5
    return tuple([sents[i].bytes_list.value[0] for i in range(5)])


def human_ave(summ_list):
    """Average pairwise rouge between two human summaries."""
    agg = scoring.BootstrapAggregator()
    for s1_id, s1 in enumerate(summ_list):
        for s2_id, s2 in enumerate(summ_list):
            if s1_id >= s2_id:  # only compute for s1_id < s2_id
Beispiel #7
0
 def testAssertsOnInvalidInputFiles(self):
     scorer = rouge_scorer.RougeScorer(["rouge1"], False)
     with self.assertRaises(ValueError):
         io.compute_scores_and_write_to_csv("invalid*", "invalid*",
                                            "invalid", scorer,
                                            scoring.BootstrapAggregator())
 def testRouge1Multi(self):
     scorer = rouge_scorer.RougeScorer(["rouge1"])
     result = scorer.score_multi(["testing one two"], "testing")
     self.assertAlmostEqual(1, result["rouge1"].precision)
     self.assertAlmostEqual(1 / 3, result["rouge1"].recall)
     self.assertAlmostEqual(1 / 2, result["rouge1"].fmeasure)
Beispiel #9
0
 def __init__(self):
     self.metrics = ['rouge1', 'rouge2', 'rougeL']
     self.main_metric = 'rougeL'
     self.scorer = rouge_scorer.RougeScorer(self.metrics, use_stemmer=True)