def cal_ROUGE(generated, reference, is_corpus=False): # ref and sample are both dict # scorers = [ # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), # (Meteor(),"METEOR"), # (Rouge(), "ROUGE_L"), # (Cider(), "CIDEr") # ] # output rouge 1-4 and rouge L and rouge L from pycocoevaluate ROUGEscore = [0.0] * 6 for idx, g in enumerate(generated): score = [0.0] * 6 if is_corpus: for order in range(4): score[order] = rouge_n(g.split(), [x.split() for x in reference[0]], order + 1, 0.5) score[4] = rouge_l(g.split(), [x.split() for x in reference[0]], 0.5) score[5], _ = Rouge().compute_score(reference, {0: [g]}) else: for order in range(4): score[order] = rouge_n(g.split(), [reference[0][idx].split()], order + 1, 0.5) score[4] = rouge_l(g.split(), [reference[0][idx].split()], 0.5) score[5], _ = Rouge().compute_score({0: [reference[0][idx]]}, {0: [g]}) #pdb.set_trace() #print g, score ROUGEscore = [r + score[idx] for idx, r in enumerate(ROUGEscore)] #BLEUscore += nltk.translate.bleu_score.sentence_bleu(reference, g, weight) ROUGEscore = [r / len(generated) for r in ROUGEscore] return ROUGEscore
def get_saliency(target, templates): """ Input: The targets is a list of word-id's and templates a list of list of word-id's Returns the saliency which is rouge-1(tar,temp) + rouge-2(tar,temp) for each in batch. Note that the begin and end tokens are present as well.C """ r_scores = [] for template in templates: r1 = rougescore.rouge_n(target, [template], 1, 0.5) r2 = rougescore.rouge_n(target, [template], 2, 0.5) r_scores.append(r1+r2) return r_scores
def test_rouge_with_word_limit(self): data = self.load_test_data() rouge = RougeCalculator(stopwords=True, word_limit=5) for eval_id in data: summaries = data[eval_id]["summaries"] references = data[eval_id]["references"] for n in [1, 2]: for s in summaries: baseline = Pythonrouge(summary_file_exist=False, summary=[[s]], reference=[[[r] for r in references]], n_gram=n, recall_only=False, length_limit=True, length=5, word_level=True, stemming=False, stopwords=True) b1_v = baseline.calc_score() b2_v = rouge_n(rouge.tokenize(s), [rouge.tokenize(r) for r in references], n, 0.5) v = rouge.rouge_n(s, references, n) self.assertLess(abs(b2_v - v), 1e-5) self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa
def test_rouge(self): data = self.load_test_data() rouge = RougeCalculator(stopwords=False, lang="zh") for eval_id in data: summaries = data[eval_id]["summaries"] references = data[eval_id]["references"] for n in [1, 2]: for s in summaries: v = rouge.rouge_n(self._compress(s), self._compress(references), n) b_v = rouge_n(self._split(s), [self._split(r) for r in references], n, 0.5) self.assertLess(abs(b_v - v), 1e-5)
def test_rouge_with_stop_words(self): data = self.load_test_data() rouge = RougeCalculator(stopwords=True, lang="zh") def split(text): words = self._split(text) words = [w for w in words if not rouge._lang.is_stop_word(w)] return words for eval_id in data: summaries = data[eval_id]["summaries"] references = data[eval_id]["references"] for n in [1, 2]: for s in summaries: v = rouge.rouge_n(s, references, n) b_v = rouge_n(split(s), [split(r) for r in references], n, 0.5) self.assertLess(abs(b_v - v), 1e-5)