Esempio n. 1
0
    def test_sentence_score_L(self):
        reference = 'It \'s my living town'
        hypothesis = 'I went to the Mars from my living town.'
        scorer = rouge.Rouge()

        rouge_calc = RougeCalculator(stopwords=False, lang="en")
        expect = rouge_calc.rouge_l(hypothesis, reference)
        eq_(expect, scorer.sentence_score(reference, hypothesis, mode='L'))
Esempio n. 2
0
    def test_sentence_score_N_2(self):
        reference = 'I went to Mars'
        hypothesis = 'I went to the Mars from my living town.'
        scorer = rouge.Rouge()

        rouge_calc = RougeCalculator(stopwords=False, lang="en")
        expect = rouge_calc.rouge_n(hypothesis, reference, n=2)
        eq_(expect, scorer.sentence_score(reference, hypothesis, n=2))
Esempio n. 3
0
def evaluate(net, criterion, vocab, data_iter, train_next=True):
    net.eval()
    reviews = []
    refs = []
    sums = []
    loss, r1, r2, rl = .0, .0, .0, .0
    rouge = RougeCalculator(stopwords=False, lang="en")
    for batch in tqdm(data_iter):
        src, trg, src_embed, trg_embed, src_mask, src_lens, trg_lens, src_text, trg_text = vocab.read_batch(
            batch)
        pre_output = net(src,
                         trg,
                         src_embed,
                         trg_embed,
                         vocab.word_num,
                         src_mask,
                         src_lens,
                         trg_lens,
                         test=True)
        output = torch.log(pre_output.view(-1, pre_output.size(-1)) + 1e-20)
        trg_output = trg.view(-1)
        loss += criterion(output, trg_output).data.item() / len(src_lens)
        reviews.extend(src_text)
        refs.extend(trg_text)
        pre_output[:, :, 3] = float('-inf')
        rst = torch.argmax(pre_output, dim=-1).tolist()
        for i, summary in enumerate(rst):
            cur_sum = ['']
            for idx in summary:
                if idx == vocab.EOS_IDX:
                    break
                w = vocab.id_word(idx)
                cur_sum.append(w)
            cur_sum = ' '.join(cur_sum).strip()
            if len(cur_sum) == 0:
                cur_sum = '<EMP>'
            sums.append(cur_sum)
            r1 += rouge.rouge_n(cur_sum, trg_text[i], n=1)
            r2 += rouge.rouge_n(cur_sum, trg_text[i], n=2)
            rl += rouge.rouge_l(cur_sum, trg_text[i])
    for i in example_idx:
        print('> %s' % reviews[i])
        print('= %s' % refs[i])
        print('< %s\n' % sums[i])
    if not train_next:
        with open(args.output_dir + args.load_model, 'w') as f:
            for review, ref, summary in zip(reviews, refs, sums):
                f.write('> %s\n' % review)
                f.write('= %s\n' % ref)
                f.write('< %s\n\n' % summary)
    loss /= len(data_iter)
    r1 /= len(sums)
    r2 /= len(sums)
    rl /= len(sums)
    if train_next:
        net.train()
    return loss, r1, r2, rl
 def __init__(self, vocabulary, model, optimizer, criterion, phi):
     self.model = model
     self.optimizer = optimizer
     self.criterion = criterion
     self.vocabulary = vocabulary
     self.rouge = RougeCalculator(stopwords=False,
                                  lang="en",
                                  stemming=False)
     self.phi = phi
Esempio n. 5
0
def cal_rouge(summa, refer):
    rouge = RougeCalculator(stopwords=True, lang="en")

    rouge_1 = rouge.rouge_n(summary=summa, references=refer, n=1)

    rouge_2 = rouge.rouge_n(summary=summa, references=[refer], n=2)

    rouge_l = rouge.rouge_l(summary=summa, references=[refer])

    print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}".format(
        rouge_1, rouge_2, rouge_l).replace(", ", "\n"))
Esempio n. 6
0
def evaluate_rouge_n(summary, references, n=2):
    rouge = RougeCalculator(stopwords=True, lang="ja")
    if type(n) == type(1):
        return rouge.rouge_n(summary=summary, references=references, n=n)
    else:
        if n == "l":
            return rouge.rouge_l(summary=summary, references=references)
        elif n == "be":
            return rouge.rouge_be(summary=summary, references=references)
        else:
            pass
Esempio n. 7
0
def calc_rouge(original_sentences, predict_sentences):
    rouge_1 = 0.0
    rouge_2 = 0.0
    for original, predict in zip(original_sentences, predict_sentences):
        # Remove padding
        original, predict = original.replace("<PAD>", "").strip(), predict.replace("<PAD>", "").strip()
        rouge = RougeCalculator(stopwords=True, lang="en")
        r1 = rouge.rouge_1(summary=predict, references=original)
        r2 = rouge.rouge_2(summary=predict, references=original)
        rouge_1 += r1
        rouge_2 += r2
    return rouge_1, rouge_2
Esempio n. 8
0
    def __init__(self,
                 hugging_face_model_name: str = "distilbert-base-uncased-distilled-squad",
                 tokenizer_name: str = "distilbert-base-uncased",
                 cuda_is_available: bool = True):
        cuda_is_available = 0 if cuda_is_available else -1

        self.__reader = TransformersReader(model=hugging_face_model_name,
                                           tokenizer=tokenizer_name,
                                           context_window_size=512,
                                           use_gpu=cuda_is_available)

        self.__rouge = RougeCalculator(stopwords=False)
Esempio n. 9
0
 def test_rouge(self):
     data = self.load_test_data()
     rouge = RougeCalculator(stopwords=False, lang="zh")
     for eval_id in data:
         summaries = data[eval_id]["summaries"]
         references = data[eval_id]["references"]
         for n in [1, 2]:
             for s in summaries:
                 v = rouge.rouge_n(self._compress(s),
                                   self._compress(references), n)
                 b_v = rouge_n(self._split(s),
                               [self._split(r) for r in references], n, 0.5)
                 self.assertLess(abs(b_v - v), 1e-5)
Esempio n. 10
0
def evaluate_rouge(summary, references, n=1, lang="zh"):
    rouge_calc = RougeCalculator(stopwords=True, lang=lang)
    assert len(summary) == len(references), "number of summary and references should be equal"

    rouges = []
    for s, rs in zip(summary, references):
        if n == 'l':
            rouge_n = rouge_calc.rouge_l(s, rs)
        else:
            rouge_n = rouge_calc.rouge_n(s, rs, n)
        rouges.append(rouge_n)
    rouge_avg = sum(rouges) /  len(rouges)
    return rouge_avg, rouges
Esempio n. 11
0
def score_model(test_pairs, model, model_id):
    scores = [0, 0, 0]
    rouge_calc = RougeCalculator(stopwords=False, lang="en")
    results = predict_from_data(test_pairs, _range=(0, 5000), model=model)
    for k in results:
        el = results[k]
        scores[0] += rouge_calc.rouge_1(el['beam'].split('EOS')[0],
                                        el['ref'].split('EOS')[0])
        scores[1] += rouge_calc.rouge_2(el['beam'].split('EOS')[0],
                                        el['ref'].split('EOS')[0])
        scores[2] += rouge_calc.rouge_l(el['beam'].split('EOS')[0],
                                        el['ref'].split('EOS')[0])
    print(model_id.split("@"), round(scores[0] / len(results), 3),
          round(scores[1] / len(results), 3), round(scores[2] / len(results),
                                                    3))
Esempio n. 12
0
def calculate(summary, reference):
    rouge = RougeCalculator(lang="en")
    scores = []
    for n, alpha in product([1, 2], [1, 0, 0.5]):
        scores.append(
            rouge.rouge_n(summary=summary,
                          references=reference,
                          n=n,
                          alpha=alpha))

    for alpha in [1, 0, 0.5]:
        scores.append(
            rouge.rouge_l(summary=summary, references=reference, alpha=alpha))

    return scores
Esempio n. 13
0
def myeval(valid_x, valid_y, vocab, model):
    rouge = RougeCalculator(stopwords=True, lang="zh")
    bleu_ch = BLEUCalculator(lang="zh")

    model.eval()
    eval_batch_num = 0
    sum_rouge_1 = 0
    sum_rouge_2 = 0
    sum_rouge_L = 0
    score_ch = 0
    sum_loss = 0
    limit = 63
    logging.info('Evaluating on %d minibatches...' % limit)
    i2w = {key: value for value, key in vocab.items()}
    ckpt_file = args.ckpt_file[9:]
    fout_pred = open(os.path.join('tmp/systems', '%s.txt' % ckpt_file), "w")
    fout_y = open(os.path.join('tmp/models', 'ref_%s.txt' % ckpt_file), "w")
    while eval_batch_num < limit:
        with torch.no_grad():
            loss = run_batch(valid_x, valid_y, model)
            sum_loss += loss
            _, x = valid_x.next_batch()
            pred = greedy(model, x, vocab)
            _, y = valid_y.next_batch()
            y = y[:,1:].tolist()
            for idx in range(len(pred)):
                line_pred = [i2w[tok] for tok in pred[idx] if tok != vocab[config.end_tok] and tok != vocab[config.pad_tok]]
                line_y = [i2w[tok] for tok in y[idx] if tok != vocab[config.end_tok] and tok != vocab[config.pad_tok]]
                fout_pred.write(" ".join(line_pred) + "\n")
                fout_y.write(" ".join(line_y) + "\n")
                sum_rouge_1 += rouge.rouge_n(references=" ".join(line_y),summary=" ".join(line_pred),n=1)
                sum_rouge_2 += rouge.rouge_n(references=" ".join(line_y),summary=" ".join(line_pred),n=2)
                sum_rouge_L += rouge.rouge_l(references=" ".join(line_y),summary=" ".join(line_pred))
                score_ch += bleu_ch.bleu(" ".join(line_y), " ".join(line_pred))
            eval_batch_num += 1
    fout_pred.close()
    fout_y.close()
    avg_rouge_1 = sum_rouge_1/(len(pred) * limit)
    avg_rouge_2 = sum_rouge_2/(len(pred) * limit)
    avg_rouge_L = sum_rouge_L/(len(pred) * limit)
    avg_bleu_ch = score_ch/(len(pred) * limit)
    avg_loss = sum_loss/limit
    print("ROUGE_1 = ",avg_rouge_1)
    print("ROUGE_2 = ",avg_rouge_2)
    print("ROUGE_L = ",avg_rouge_L)
    print("BLEU = ", avg_bleu_ch)
    print("Perplexity = ", math.pow(2, avg_loss))
    model.train()
Esempio n. 14
0
class RougeNCalc:
    def __init__(self):
        self.rouge = RougeCalculator(stopwords=True, lang="ja")

    def __call__(self, summary, reference):
        score = self.rouge.rouge_n(summary, reference, n=1)
        return score
Esempio n. 15
0
def avg_rouge(ref_dir, dec_dir, n):
    ref_files = os.path.join(ref_dir, "*reference.txt")
    filelist = glob.glob(ref_files)
    scores_list = []
    for ref_file in filelist:
        basename = os.path.basename(ref_file)
        number = basename.split("_")[0]
        dec_file = os.path.join(dec_dir, "{}_decoded.txt".format(number))
        dec_cont = gen_sentence(dec_file)
        ref_cont = gen_sentence(ref_file)
        """第一种Rouge"""
        # if n == 'l':
        #     dec_cont = ''.join([i + ' ' for i in dec_cont])
        #     ref_cont = ''.join([i + ' ' for i in ref_cont])
        #     rouge = Rouge()
        #     score = rouge.get_scores(dec_cont, ref_cont)
        #     scores_list.append(score[0]['rouge-l']['f'])
        # else:
        #     score = compute_rouge_n(dec_cont, ref_cont, n)
        #     scores_list.append(score)
        """第二种Rouge"""
        # dec_cont = ''.join([i + ' ' for i in dec_cont])
        # ref_cont = ''.join([i + ' ' for i in ref_cont])
        #
        # rouge = Rouge()
        # score = rouge.get_scores(dec_cont, ref_cont)
        # if n == 1:
        #     scores_list.append(score[0]['rouge-1']['f'])
        # elif n == 2:
        #     scores_list.append(score[0]['rouge-2']['f'])
        # elif n == 'l':
        #     scores_list.append(score[0]['rouge-l']['f'])
        """第三种Rouge"""
        rouge = RougeCalculator(stopwords=True, lang="zh")
        dec_cont = ''.join([i + ' ' for i in dec_cont])
        ref_cont = ''.join([i + ' ' for i in ref_cont])
        if n == 1:
            rouge_1 = rouge.rouge_n(dec_cont, ref_cont, n=1)
            scores_list.append(rouge_1)
        elif n == 2:
            rouge_2 = rouge.rouge_n(dec_cont, ref_cont, n=2)
            scores_list.append(rouge_2)
        elif n == 'l':
            rouge_l = rouge.rouge_l(dec_cont, ref_cont)
            scores_list.append(rouge_l)

    return sum(scores_list) / len(scores_list)
Esempio n. 16
0
def main(args):
    rougeone_list = []
    rougetwo_list = []
    rougel_list = []
    rouge4one = RougeCalculator(stopwords=True, lang=args.lang)
    rouge4other = RougeCalculator(stopwords=False, lang=args.lang)
    if args.keyword:
        kr = KeywordRemover(args.keyword)
    with open(args.system_out) as sf, \
            open(args.reference) as rf:
        for i, (so, re) in enumerate(zip(sf, rf)):
            print(i, end='\r', flush=True)
            if args.keyword:
                so = kr(so)
                re = kr(re)
            rougeone_list.append(
                rouge4one.rouge_1(summary=so, references=re, alpha=args.alpha))
            rougetwo_list.append(
                rouge4other.rouge_2(summary=so,
                                    references=re,
                                    alpha=args.alpha))
            rougel_list.append(
                rouge4one.rouge_l(summary=so, references=re, alpha=args.alpha))
    print('ROUGE-1\t%.6f' % (np.average(rougeone_list)))
    print('ROUGE-2\t%.6f' % (np.average(rougetwo_list)))
    print('ROUGE-L\t%.6f' % (np.average(rougel_list)))
Esempio n. 17
0
    def test_rouge_with_stop_words(self):
        data = self.load_test_data()
        rouge = RougeCalculator(stopwords=True, lang="zh")

        def split(text):
            words = self._split(text)
            words = [w for w in words if not rouge._lang.is_stop_word(w)]
            return words

        for eval_id in data:
            summaries = data[eval_id]["summaries"]
            references = data[eval_id]["references"]
            for n in [1, 2]:
                for s in summaries:
                    v = rouge.rouge_n(s, references, n)
                    b_v = rouge_n(split(s), [split(r) for r in references], n,
                                  0.5)
                    self.assertLess(abs(b_v - v), 1e-5)
Esempio n. 18
0
 def __init__(self,
              metrics: List[str] = [
                  "rouge_1", "rouge_2", "rouge_l", "rouge_be", "bleu"
              ],
              lang: str = "en",
              stopwords: bool = True,
              stemming: bool = True,
              use_porter=True):
     if use_porter:
         self.rouge = RougeCalculator(stopwords=stopwords,
                                      stemming=stemming,
                                      lang="en-porter")
     else:
         self.rouge = RougeCalculator(stopwords=stopwords,
                                      stemming=stemming,
                                      lang="en")
     self.bleu = BLEUCalculator(lang=lang)
     self.metrics = sorted(metrics)
Esempio n. 19
0
def calc_rouge(machine_summery, reference_summery, debug_print=False):
    rouge = RougeCalculator(stopwords=True, lang="en")

    rouge_1 = rouge.rouge_n(summary=machine_summery,
                            references=reference_summery,
                            n=1)

    rouge_2 = rouge.rouge_n(summary=machine_summery,
                            references=reference_summery,
                            n=2)

    rouge_l = rouge.rouge_l(summary=machine_summery,
                            references=reference_summery)

    if debug_print:
        print(
            "current sentences results:\nROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}"
            .format(rouge_1, rouge_2, rouge_l).replace(", ", "\n"))

    return rouge_1, rouge_2, rouge_l
Esempio n. 20
0
    def test_custom_lang(self):
        class Custom(BaseLang):
            def __init__(self):
                super(Custom, self).__init__("cs")

            def tokenize(self, text):
                return text.split("/")

        lang = Custom()
        rouge = RougeCalculator(lang=lang)
        rouge_score = rouge.rouge_n(
            summary="I/went/to/the/Mars/from/my/living/town.",
            references="I/went/to/Mars",
            n=1)

        bleu = BLEUCalculator(lang=lang)
        bleu_score = bleu.bleu("I/am/waiting/on/the/beach",
                               "He/is/walking/on/the/beach")

        self.assertGreater(rouge_score, 0)
        self.assertGreater(bleu_score, 0)
Esempio n. 21
0
    def __init__(self,
                 metrics=None,
                 stats=None,
                 stem=False,
                 remove_stop=False):
        self.metrics = metrics if metrics is not None \
            else Rouge.DEFAULT_METRICS
        self.stats = stats if stats is not None \
            else Rouge.DEFAULT_STATS

        self.rouge_calc = RougeCalculator(stopwords=remove_stop,
                                          stemming=stem,
                                          lang="en")

        for m in self.metrics:
            if m not in Rouge.AVAILABLE_METRICS:
                raise ValueError("Unknown metric '%s'" % m)

        for s in self.stats:
            if s not in Rouge.AVAILABLE_STATS:
                raise ValueError("Unknown stat '%s'" % s)
Esempio n. 22
0
def evaluate():
    from sumeval.metrics.rouge import RougeCalculator

    rouge = RougeCalculator(stopwords=True, lang="zh")

    rouge_1 = rouge.rouge_n(summary="I went to the Mars from my living town.",
                            references="I went to Mars",
                            n=1)

    rouge_2 = rouge.rouge_n(
        summary="I went to the Mars from my living town.",
        references=["I went to Mars", "It's my living town"],
        n=2)

    rouge_l = rouge.rouge_l(
        summary="I went to the Mars from my living town.",
        references=["I went to Mars", "It's my living town"])

    # You need spaCy to calculate ROUGE-BE

    rouge_be = rouge.rouge_be(
        summary="I went to the Mars from my living town.",
        references=["I went to Mars", "It's my living town"])

    print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
        rouge_1, rouge_2, rouge_l, rouge_be).replace(", ", "\n"))
def eval_rouges(refrence_summary, model_summary):
    # refrence_summary = "tokyo shares close up #.## percent"
    # model_summary = "tokyo stocks close up # percent to fresh record high"

    rouge = RougeCalculator(stopwords=True, lang="en")

    rouge_1 = rouge.rouge_n(
        summary=model_summary,
        references=refrence_summary,
        n=1)

    rouge_2 = rouge.rouge_n(
        summary=model_summary,
        references=[refrence_summary],
        n=2)

    rouge_l = rouge.rouge_l(
        summary=model_summary,
        references=[refrence_summary])

    # You need spaCy to calculate ROUGE-BE

    rouge_be = rouge.rouge_be(
        summary=model_summary,
        references=[refrence_summary])

    bleu = BLEUCalculator()
    bleu_score = bleu.bleu(summary=model_summary,
                           references=[refrence_summary])

    # print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
    #    rouge_1, rouge_2, rouge_l, rouge_be
    # ).replace(", ", "\n"))

    return rouge_1, rouge_2, rouge_l, rouge_be, bleu_score
Esempio n. 24
0
def main(conf):
    id_list = []
    rougeone_list = []
    rougetwo_list = []
    rougel_list = []
    rouge4one = RougeCalculator(stopwords=True, lang=conf.rouge.lang)
    rouge4other = RougeCalculator(stopwords=False, lang=conf.rouge.lang)
    keynum_counter = defaultdict(lambda: {'count': 0, 'rouge1_lis': [], 'rouge2_lis': [], 'rougel_lis': []})
    if conf.rouge.get('keyword', None):
        kr = KeywordRemover(conf.rouge.keyword)
    with open(conf.system_out) as sf, \
         open(conf.reference) as rf, \
         open(conf.test_src) as tsrcf:
        assert len(sf.readlines()) == len(rf.readlines()) == len(tsrcf.readlines())
    with open(conf.system_out) as sf, \
         open(conf.reference) as rf, \
         open(conf.test_src) as tsrcf:
        tsrcs = tsrcf.readlines()
        for i, (so, re) in enumerate(zip(sf, rf)):
            print(i, end='\r', flush=True)
            if conf.rouge.get('keyword', None):
                so = kr(so)
                re = kr(re)
            else:
                idx, so = so.split(None, 1)
                idx, re = re.split(None, 1)
            id_list.append(int(idx))
            rouge1 = rouge4one.rouge_1(summary=so, references=re, alpha=conf.alpha)
            rouge2 = rouge4other.rouge_2(summary=so, references=re, alpha=conf.alpha)
            rougel = rouge4one.rouge_l(summary=so, references=re, alpha=conf.alpha)
            rougeone_list.append(rouge1)
            rougetwo_list.append(rouge2)
            rougel_list.append(rougel)
            keynum = tsrcs[int(idx)].count('</@>')
            keynum_counter[keynum]['count'] += 1
            keynum_counter[keynum]['rouge1_lis'].append(rouge1)
            keynum_counter[keynum]['rouge2_lis'].append(rouge2)
            keynum_counter[keynum]['rougel_lis'].append(rougel)
    lowest_idids = np.argpartition(rougeone_list, 100)[:100]
    print(f"Lowest IDs\t{' '.join(map(str, np.array(id_list)[lowest_idids]))}")
    print('ROUGE-1\t%.6f'%(np.average(rougeone_list)))
    print('ROUGE-2\t%.6f'%(np.average(rougetwo_list)))
    print('ROUGE-L\t%.6f'%(np.average(rougel_list)))
    if conf.output:
        with open(conf.output, 'w') as of:
            for idx, r1, r2, rl in zip(id_list, rougeone_list, rougetwo_list, rougel_list):
                of.write(f'{idx}, {r1}, {r2}, {rl}\n')

    for keynum in range(5):
        print(keynum)
        print(f'count: {keynum_counter[keynum]["count"]}')
        print('ROUGE-1\t%.6f'%(np.average(keynum_counter[keynum]['rouge1_lis'])))
        print('ROUGE-2\t%.6f'%(np.average(keynum_counter[keynum]['rouge2_lis'])))
        print('ROUGE-L\t%.6f'%(np.average(keynum_counter[keynum]['rougel_lis'])))
Esempio n. 25
0
    def train_rl(self, data, val_data, nb_epochs, batch_size, optimizer, lr,
                 tf_ratio, stop_criterion, use_cuda, print_evry):

        if self.logger is None:
            self.encoder_optimizer = optimizer(self.encoder.parameters(),
                                               lr=lr,
                                               weight_decay=0.0000001)
            self.decoder_optimizer = optimizer(self.decoder.parameters(),
                                               lr=lr,
                                               weight_decay=0.0000001)
            self.criterion = nn.NLLLoss()
            self.logger = TrainingLogger(nb_epochs, batch_size, len(data),
                                         len(val_data))
            print("Optimizers compiled for RL training")

        rouge_calc = RougeCalculator(stopwords=False, lang="en")
        for epoch in range(len(self.logger.log), nb_epochs):
            self.logger.init_epoch(epoch)
            batches = utils.sort_and_shuffle_data(data,
                                                  nb_buckets=100,
                                                  batch_size=batch_size,
                                                  rnd=True)
            for b in range(len(batches)):

                loss, _time = self.train_batch_rl_mc(samples=batches[b],
                                                     use_cuda=self.use_cuda,
                                                     rouge=rouge_calc)
                self.logger.add_iteration(b + 1, loss, _time)
                if b % print_evry == 0:
                    preds = self.predict([data[b * batch_size]],
                                         self.config['target_length'], False,
                                         self.use_cuda)
                    print('\n', " ".join([str(t[0]['word']) for t in preds]))

            for b in range(int(len(val_data) / batch_size)):
                try:
                    loss, _time = self.train_batch(
                        val_data[b * batch_size:(b + 1) * batch_size],
                        self.use_cuda,
                        backprop=False)
                    self.logger.add_val_iteration(b + 1, loss, _time)
                except:
                    print("\n", "Error during validation!")

            if epoch == 0 or self.logger.log[epoch][
                    "val_loss"] < self.logger.log[epoch - 1]["val_loss"]:
                self.save_model(self.config['model_path'],
                                self.config['model_id'],
                                epoch=epoch,
                                loss=self.logger.log[epoch]["val_loss"])
Esempio n. 26
0
 def test_rouge_l(self):
     data = self.load_test_data()
     rouge = RougeCalculator(stopwords=True)
     for eval_id in data:
         summaries = data[eval_id]["summaries"]
         references = data[eval_id]["references"]
         for s in summaries:
             baseline = Pythonrouge(summary_file_exist=False,
                                    summary=[[s]],
                                    reference=[[[r] for r in references]],
                                    n_gram=1,
                                    recall_only=False,
                                    ROUGE_L=True,
                                    length_limit=True,
                                    length=50,
                                    stemming=False,
                                    stopwords=True)
             b1_v = baseline.calc_score()
             b2_v = rouge_l(rouge.tokenize(s),
                            [rouge.tokenize(r) for r in references], 0.5)
             v = rouge.rouge_l(s, references)
             self.assertLess(abs(b2_v - v), 1e-5)
             self.assertLess(abs(b1_v["ROUGE-L-F"] - v), 1e-5)
Esempio n. 27
0
def rouge_eval(ref_dir, dec_dir):
    rouge = RougeCalculator(stopwords=True, lang="en")
    rouge_1 = 0
    rouge_2 = 0
    rouge_3 = 0
    rouge_l = 0
    num_files = len(os.listdir(ref_dir))
    for filename in os.listdir(ref_dir):
        file_id = filename[:6]
        ref_file = os.path.join(ref_dir, filename)
        dec_file = os.path.join(dec_dir, '%s_decoded.txt' % file_id)
        ref_sum = ' '.join([line for line in open(ref_file, 'r').readlines()])
        dec_sum = ' '.join([line for line in open(dec_file, 'r').readlines()])
        rouge_1 += rouge.rouge_n(dec_sum, ref_sum, n=1)
        rouge_2 += rouge.rouge_n(dec_sum, ref_sum, n=2)
        rouge_3 += rouge.rouge_n(dec_sum, ref_sum, n=3)
        rouge_l += rouge.rouge_l(dec_sum, ref_sum)
    return {
        '1': 100 * rouge_1 / num_files,
        '2': 100 * rouge_2 / num_files,
        '3': 100 * rouge_3 / num_files,
        'l': 100 * rouge_l / num_files
    }
Esempio n. 28
0
class LanguageMetrics(object):
    bleu = BLEUCalculator(tokenizer=SimpleTokenizer())
    rouge = RougeCalculator(stopwords=True,
                            lang="en",
                            tokenizer=SimpleTokenizer())

    @staticmethod
    def _computeScore(summary, refs, criteria):
        if isinstance(refs, str):
            refs = [refs]
        score = criteria(summary=summary, references=refs)
        return score

    @staticmethod
    def blue_score(summary, refs):
        score = LanguageMetrics._computeScore(summary, refs,
                                              LanguageMetrics.bleu.bleu)
        return score

    @staticmethod
    def rouge_1_score(summary, refs):
        score = LanguageMetrics._computeScore(summary, refs,
                                              LanguageMetrics.rouge.rouge_1)
        return score

    @staticmethod
    def rouge_2_score(summary, refs):
        score = LanguageMetrics._computeScore(summary, refs,
                                              LanguageMetrics.rouge.rouge_2)
        return score

    @staticmethod
    def rouge_l_score(summary, refs):
        score = LanguageMetrics._computeScore(summary, refs,
                                              LanguageMetrics.rouge.rouge_l)
        return score

    @staticmethod
    def rouge_be_score(summary, refs):
        score = LanguageMetrics._computeScore(summary, refs,
                                              LanguageMetrics.rouge.rouge_be)
        return score

    @staticmethod
    def rouge_n_score(summary, refs, n):
        rouge_n = partial(func=LanguageMetrics.rouge.rouge_n, n=n)
        score = LanguageMetrics._computeScore(summary, refs, rouge_n)
        return score
    def __init__(self, rouge_variants, remove_stopwords=False, stem=False):
        self.rouge_variants = rouge_variants
        self.rouge_calc = RougeCalculator(stopwords=remove_stopwords,
                                          lang="en",
                                          stemming=stem)
        self.availeble_calcs = {
            "ROUGE-1-F": self.rouge_calc.rouge_1,
            "ROUGE-2-F": self.rouge_calc.rouge_2,
            "ROUGE-L-F": self.rouge_calc.rouge_l
        }

        self.calcs_to_use = []

        for v in self.rouge_variants:
            if v in self.availeble_calcs:
                self.calcs_to_use.append(self.availeble_calcs[v])
            else:
                print("Rouge variant not useable", v)
Esempio n. 30
0
    def test_rouge_be_hm(self):
        rouge = RougeCalculator(stopwords=False, lang="ja")
        summaries = ["私はきれいな花が好きで、きれいな花には目がない。"]
        references = ["きれいな花が好きだ", "私はきれいな花に目がない"]
        r_bes = [rouge.parse_to_be(r) for r in references]

        for _type in ["HM", "HMR"]:
            _r_bes = [self._bes_to_words(r, _type) for r in r_bes]
            for s in summaries:
                s_bes = rouge.parse_to_be(s)
                s_bes = self._bes_to_words(s_bes, _type)
                base = rouge.rouge_n(s_bes, _r_bes, n=1)
                score = rouge.rouge_be(s, references, _type)
                self.assertLess(abs(base - score), 1e-5)