Exemple #1
0
def eval_captions(gt_captions, res_captions):
	"""
		gt_captions = ground truth captions; 5 per image
		res_captions = captions generated by the model to be evaluated
	"""
	print('ground truth captions')
	print(gt_captions)

	print('RES CAPTIONS')
	print(res_captions)

	scorers = [
		(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
		(Meteor(),"METEOR"),
		(Rouge(), "ROUGE_L"),
		(Cider(), "CIDEr"), 
	]

	res = []
	
	for scorer, method in scorers:
		print('computing %s score...' % (scorer.method()))
		score, scores = scorer.compute_score(gt_captions, res_captions)
		if type(method) == list:
			for sc, scs, m in zip(score, scores, method):
				print("%s: %0.3f"%(m, sc))
				res.append((m, sc))
		else:
				print("%s: %0.3f"%(method, score))
				res.append((method, score))

	return res
Exemple #2
0
def test_score():
    cand = "中华人民共和国"
    ref = "中华人民共和国公民"
    bleu = Bleu(N_SIZE)
    bleu.add_inst(cand, ref)
    s = bleu.get_score()
    print('score: {}'.format(s))
Exemple #3
0
def test_add_inst():
    cand = '13'
    ref = '13'
    bleu = Bleu(N_SIZE)
    bleu.add_inst(cand, ref)
    match_ngram = bleu.match_ngram
    candi_ngram = bleu.candi_ngram
    print('match_ngram: {}'.format(match_ngram))
    print('candi_ngram: {}'.format(candi_ngram))
Exemple #4
0
    def evaluate(self):
        cap = open(r'results.txt')
        cap_ = []
        for line in cap:
            line = line.split(' ')
            line[len(line)-1] = '.'
            del line[0]
            print(line)
            cap_.append(line)
        gts = {}
        res = {}
        f = open("cap_flickr30k.json")
        captions = json.load(f)
        f1 = open("dic_flickr30k.json")
        dics = json.load(f1)
        dics = dics['images']
        pos = 0
        for i in range(0, len(dics), 1):
            if dics[i]['split'] == 'test':
                caption_1 = []
                caption_2 = []
                caption_1.append(captions[i][0]['caption'])
                res[dics[i]['id']] = caption_1
                caption_2.append(cap_[pos])
                caption_2.append(cap_[pos])
                gts[dics[i]['id']] = caption_2
                pos = pos + 1

        # =================================================
        # Set up scorers
        # =================================================

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print ('computing %s score...'%(scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    print ("%s: %0.3f"%(m, sc))
            else:
                self.setEval(score, method)
                print ("%s: %0.3f"%(method, score))
Exemple #5
0
 def compute_bleu_rouge(pred_dict, ref_dict, bleu_order=4):
     """
     Compute bleu and rouge scores.
     """
     assert set(pred_dict.keys()) == set(ref_dict.keys()), \
         "missing keys: {}".format(
             set(ref_dict.keys()) - set(pred_dict.keys()))
     scores = {}
     bleu_scores, _ = Bleu(bleu_order).compute_score(ref_dict, pred_dict)
     for i, bleu_score in enumerate(bleu_scores):
         bleu_score *= 100
         scores['Bleu-%d' % (i + 1)] = bleu_score
     return scores
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        '''
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]
        '''

        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
Exemple #7
0
def test_score():
    # init all argument
    data = read_json()

    rouge_eval = RougeL()
    bleu_eval = Bleu()
    for idx, (ref_key, cand_key) in enumerate(data):
        ref_sent = data[idx][ref_key]
        cand_sent = data[idx][cand_key]

        rouge_eval.add_inst(cand_sent, ref_sent)
        bleu_eval.add_inst(cand_sent, ref_sent)

    bleu_score = bleu_eval.get_score()
    rouge_score = rouge_eval.get_score()
    print('bleu score: {}, rouge score: {}'.format(bleu_score, rouge_score))
Exemple #8
0
def score(ref, hypo):
    scorers = [
        (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(),"ROUGE_L"),
        (Cider(),"CIDEr")
    ]
    final_scores = {}
    for scorer,method in scorers:
        score,scores = scorer.compute_score(ref,hypo)
        if type(score)==list:
            for m,s in zip(method,score):
                final_scores[m] = s
        else:
            final_scores[method] = score

    return final_scores
Exemple #9
0
def cal_avg_B4(custom_gts, custom_res):
    # input tested senetences, and (top_N - 1) corresponding 'gt' sentences
    # return the BLEU-4 score
    # calculate BLEU scores in tradictional way
    gts  = tokenizer.tokenize(custom_gts)
    res = tokenizer.tokenize(custom_res)
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]
    imgToEval = {}
    for scorer, method in scorers:
        print('computing %s score...'%(scorer.method()))
        if type(method) == list:
            score, scores, subgraph_training_bleu = scorer.compute_score(gts, res)
            for sc, scs, m in zip(score, scores, method):
                setImgToEvalImgs(scs, list(gts.keys()), m, imgToEval)
                print("%s: %0.3f"%(m, sc))
    B_4s = [imgToEval[sen_id]['Bleu_4'] for sen_id in custom_gts.keys()]
    return B_4s
def score(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Rouge(), "ROUGE_L"),
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores
Exemple #11
0
def test_count_bp():
    cand = '我是中国人'
    ref = '重视啊啊啊啊我啊啊我了'
    bleu = Bleu(N_SIZE)
    bp = bleu.count_bp(cand, ref)
    print('BP: {}'.format(bp))
 def __init__(self, alpha=0.5):
     self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16)
     self.tri_bleu = Bleu(3)
     self.four_bleu = Bleu(4, beta=0.13)
     self.p = Preprocessor()
Exemple #13
0
 def __init__(self):
     self.scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]
Exemple #14
0
def test_score():
    cand = "中华人民共和国"
    ref = "中华人民共和国公民"
    bleu = Bleu(N_SIZE)
    s = bleu.score(cand, ref)
    print('score: {}'.format(s))
def init_scorer(cached_tokens):
    global CiderD_scorer
    CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
    global Bleu_scorer
    Bleu_scorer = Bleu_scorer or Bleu(4)
Exemple #16
0
from rouge import Rouge
import argparse
import logging
from ReadingComprehension.IterativeReattentionAligner.e2e_encoder import MnemicReader as e2e_MnemicReader
import cProfile, pstats, io
from utils import *
from InformationRetrieval.AttentionRM.modules import AttentionRM
from EndToEndModel.modules import EndToEndModel
from nltk.translate.bleu_score import sentence_bleu
import re
import pickle
from CSMrouge import RRRouge
from bleu import Bleu

stoplist = set(['.',',', '...', '..'])
bleu_obj = Bleu(4)

def add_arguments(parser):
    parser.add_argument("train_file", help="File that contains training data")
    parser.add_argument("dev_file", help="File that contains dev data")
    parser.add_argument("embedding_file", help="File that contains pre-trained embeddings")
    parser.add_argument('--dicts_dir', type=str, default=None, help='Directory containing the word dictionaries')
    parser.add_argument('--seed', type=int, default=6, help='Random seed for the experiment')
    parser.add_argument('--epochs', type=int, default=20, help='Train data iterations')
    parser.add_argument('--train_batch_size', type=int, default=32, help='Batch size for training')
    parser.add_argument('--dev_batch_size', type=int, default=32, help='Batch size for dev')
    parser.add_argument('--hidden_size', type=int, default=100, help='Hidden size for LSTM')
    parser.add_argument('--num_layers', type=int, default=1, help='Number of layers for LSTM')
    parser.add_argument('--char_emb_size', type=int, default=50, help='Embedding size for characters')
    parser.add_argument('--pos_emb_size', type=int, default=50, help='Embedding size for pos tags')
    parser.add_argument('--ner_emb_size', type=int, default=50, help='Embedding size for ner')