Example #1
0
def cal_ROUGE(generated, reference, is_corpus=False):
    # ref and sample are both dict
    # scorers = [
    #     (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
    #     (Meteor(),"METEOR"),
    #     (Rouge(), "ROUGE_L"),
    #     (Cider(), "CIDEr")
    # ]
    # output rouge 1-4 and rouge L and rouge L from pycocoevaluate

    ROUGEscore = [0.0] * 6
    for idx, g in enumerate(generated):
        score = [0.0] * 6
        if is_corpus:
            for order in range(4):
                score[order] = rouge_n(g.split(),
                                       [x.split() for x in reference[0]],
                                       order + 1, 0.5)
            score[4] = rouge_l(g.split(), [x.split() for x in reference[0]],
                               0.5)
            score[5], _ = Rouge().compute_score(reference, {0: [g]})

        else:
            for order in range(4):
                score[order] = rouge_n(g.split(), [reference[0][idx].split()],
                                       order + 1, 0.5)
            score[4] = rouge_l(g.split(), [reference[0][idx].split()], 0.5)
            score[5], _ = Rouge().compute_score({0: [reference[0][idx]]},
                                                {0: [g]})
            #pdb.set_trace()
        #print g, score
        ROUGEscore = [r + score[idx] for idx, r in enumerate(ROUGEscore)]
        #BLEUscore += nltk.translate.bleu_score.sentence_bleu(reference, g, weight)
    ROUGEscore = [r / len(generated) for r in ROUGEscore]
    return ROUGEscore
Example #2
0
def get_saliency(target, templates):
  """
  Input: The targets is a list of word-id's and templates a list of list of word-id's
  Returns the saliency which is rouge-1(tar,temp) + rouge-2(tar,temp) for each in batch.
  Note that the begin and end tokens are present as well.C
  """

  r_scores = []
  for template in templates:
    r1 = rougescore.rouge_n(target, [template], 1, 0.5)
    r2 = rougescore.rouge_n(target, [template], 2, 0.5)
    r_scores.append(r1+r2)

  return r_scores
Example #3
0
 def test_rouge_with_word_limit(self):
     data = self.load_test_data()
     rouge = RougeCalculator(stopwords=True, word_limit=5)
     for eval_id in data:
         summaries = data[eval_id]["summaries"]
         references = data[eval_id]["references"]
         for n in [1, 2]:
             for s in summaries:
                 baseline = Pythonrouge(summary_file_exist=False,
                                        summary=[[s]],
                                        reference=[[[r]
                                                    for r in references]],
                                        n_gram=n,
                                        recall_only=False,
                                        length_limit=True,
                                        length=5,
                                        word_level=True,
                                        stemming=False,
                                        stopwords=True)
                 b1_v = baseline.calc_score()
                 b2_v = rouge_n(rouge.tokenize(s),
                                [rouge.tokenize(r) for r in references], n,
                                0.5)
                 v = rouge.rouge_n(s, references, n)
                 self.assertLess(abs(b2_v - v), 1e-5)
                 self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v),
                                 1e-5)  # noqa
Example #4
0
 def test_rouge(self):
     data = self.load_test_data()
     rouge = RougeCalculator(stopwords=False, lang="zh")
     for eval_id in data:
         summaries = data[eval_id]["summaries"]
         references = data[eval_id]["references"]
         for n in [1, 2]:
             for s in summaries:
                 v = rouge.rouge_n(self._compress(s),
                                   self._compress(references), n)
                 b_v = rouge_n(self._split(s),
                               [self._split(r) for r in references], n, 0.5)
                 self.assertLess(abs(b_v - v), 1e-5)
Example #5
0
    def test_rouge_with_stop_words(self):
        data = self.load_test_data()
        rouge = RougeCalculator(stopwords=True, lang="zh")

        def split(text):
            words = self._split(text)
            words = [w for w in words if not rouge._lang.is_stop_word(w)]
            return words

        for eval_id in data:
            summaries = data[eval_id]["summaries"]
            references = data[eval_id]["references"]
            for n in [1, 2]:
                for s in summaries:
                    v = rouge.rouge_n(s, references, n)
                    b_v = rouge_n(split(s), [split(r) for r in references], n,
                                  0.5)
                    self.assertLess(abs(b_v - v), 1e-5)