Ejemplo n.º 1
0
  def testConfidenceIntervalsAgainstRouge155WithStemmingMultiLine(self):
    scorer = rouge_scorer.RougeScorer(
        ["rouge1", "rouge2", "rougeLsum"], use_stemmer=True)
    aggregator = scoring.BootstrapAggregator()
    t_files = [os.path.join(test_util.PYROUGE_DIR, 'target_multi.%d.txt' % i) for i in range(0, 250)]
    p_files = [os.path.join(test_util.PYROUGE_DIR, 'prediction_multi.%d.txt' % i) for i in range(0, 250)]

    targets = [test_util.get_text(x) for x in t_files]
    predictions = [test_util.get_text(x) for x in p_files]
    assert len(targets) == len(predictions)
    assert len(targets) == 250
    for target, prediction in zip(targets, predictions):
      aggregator.add_scores(scorer.score(target, prediction))
    result = aggregator.aggregate()

    # DIR = testdata/pyrouge_evaluate_plain_text_files
    #  pyrouge_evaluate_plain_text_files -s $DIR -sfp "prediction_multi.(.*).txt"
    #    -m $DIR -mfp target_multi.#ID#.txt
    self.assertSimilarAggregates((0.58963, 0.59877, 0.60822),    # P
                                 (0.37327, 0.38091, 0.38914),    # R
                                 (0.45607, 0.46411, 0.47244),    # F
                                 result["rouge1"])
    self.assertSimilarAggregates((0.35429, 0.36516, 0.37665),    # P
                                 (0.22341, 0.23109, 0.23916),    # R
                                 (0.27312, 0.28209, 0.29133),    # F
                                 result["rouge2"])
    self.assertSimilarAggregates((0.58604, 0.59491, 0.60444),    # P
                                 (0.37084, 0.37846, 0.38671),    # R
                                 (0.45305, 0.46113, 0.46946),    # F
                                 result["rougeLsum"])
Ejemplo n.º 2
0
  def testRougeLSumAgainstRouge155WithStemming(self):
    scorer = rouge_scorer.RougeScorer(["rougeLsum"], use_stemmer=True)

    target = test_util.get_text(
        os.path.join(test_util.PYROUGE_DIR, "target_multi.0.txt"))
    prediction = test_util.get_text(
        os.path.join(test_util.PYROUGE_DIR, "prediction_multi.0.txt"))
    result = scorer.score(target, prediction)

    self.assertAlmostEqual(0.36538, result["rougeLsum"].recall, places=5)
    self.assertAlmostEqual(0.66667, result["rougeLsum"].precision, places=5)
    self.assertAlmostEqual(0.47205, result["rougeLsum"].fmeasure, places=5)