def testConfidenceIntervalsAgainstRouge155WithStemmingMultiLine(self): scorer = rouge_scorer.RougeScorer( ["rouge1", "rouge2", "rougeLsum"], use_stemmer=True) aggregator = scoring.BootstrapAggregator() t_files = [os.path.join(test_util.PYROUGE_DIR, 'target_multi.%d.txt' % i) for i in range(0, 250)] p_files = [os.path.join(test_util.PYROUGE_DIR, 'prediction_multi.%d.txt' % i) for i in range(0, 250)] targets = [test_util.get_text(x) for x in t_files] predictions = [test_util.get_text(x) for x in p_files] assert len(targets) == len(predictions) assert len(targets) == 250 for target, prediction in zip(targets, predictions): aggregator.add_scores(scorer.score(target, prediction)) result = aggregator.aggregate() # DIR = testdata/pyrouge_evaluate_plain_text_files # pyrouge_evaluate_plain_text_files -s $DIR -sfp "prediction_multi.(.*).txt" # -m $DIR -mfp target_multi.#ID#.txt self.assertSimilarAggregates((0.58963, 0.59877, 0.60822), # P (0.37327, 0.38091, 0.38914), # R (0.45607, 0.46411, 0.47244), # F result["rouge1"]) self.assertSimilarAggregates((0.35429, 0.36516, 0.37665), # P (0.22341, 0.23109, 0.23916), # R (0.27312, 0.28209, 0.29133), # F result["rouge2"]) self.assertSimilarAggregates((0.58604, 0.59491, 0.60444), # P (0.37084, 0.37846, 0.38671), # R (0.45305, 0.46113, 0.46946), # F result["rougeLsum"])
def testRougeLSumAgainstRouge155WithStemming(self): scorer = rouge_scorer.RougeScorer(["rougeLsum"], use_stemmer=True) target = test_util.get_text( os.path.join(test_util.PYROUGE_DIR, "target_multi.0.txt")) prediction = test_util.get_text( os.path.join(test_util.PYROUGE_DIR, "prediction_multi.0.txt")) result = scorer.score(target, prediction) self.assertAlmostEqual(0.36538, result["rougeLsum"].recall, places=5) self.assertAlmostEqual(0.66667, result["rougeLsum"].precision, places=5) self.assertAlmostEqual(0.47205, result["rougeLsum"].fmeasure, places=5)