def eval_captions(gt_captions, res_captions): """ gt_captions = ground truth captions; 5 per image res_captions = captions generated by the model to be evaluated """ print('ground truth captions') print(gt_captions) print('RES CAPTIONS') print(res_captions) scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), ] res = [] for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gt_captions, res_captions) if type(method) == list: for sc, scs, m in zip(score, scores, method): print("%s: %0.3f"%(m, sc)) res.append((m, sc)) else: print("%s: %0.3f"%(method, score)) res.append((method, score)) return res
def test_score(): cand = "中华人民共和国" ref = "中华人民共和国公民" bleu = Bleu(N_SIZE) bleu.add_inst(cand, ref) s = bleu.get_score() print('score: {}'.format(s))
def test_add_inst(): cand = '13' ref = '13' bleu = Bleu(N_SIZE) bleu.add_inst(cand, ref) match_ngram = bleu.match_ngram candi_ngram = bleu.candi_ngram print('match_ngram: {}'.format(match_ngram)) print('candi_ngram: {}'.format(candi_ngram))
def evaluate(self): cap = open(r'results.txt') cap_ = [] for line in cap: line = line.split(' ') line[len(line)-1] = '.' del line[0] print(line) cap_.append(line) gts = {} res = {} f = open("cap_flickr30k.json") captions = json.load(f) f1 = open("dic_flickr30k.json") dics = json.load(f1) dics = dics['images'] pos = 0 for i in range(0, len(dics), 1): if dics[i]['split'] == 'test': caption_1 = [] caption_2 = [] caption_1.append(captions[i][0]['caption']) res[dics[i]['id']] = caption_1 caption_2.append(cap_[pos]) caption_2.append(cap_[pos]) gts[dics[i]['id']] = caption_2 pos = pos + 1 # ================================================= # Set up scorers # ================================================= # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print ('computing %s score...'%(scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) print ("%s: %0.3f"%(m, sc)) else: self.setEval(score, method) print ("%s: %0.3f"%(method, score))
def compute_bleu_rouge(pred_dict, ref_dict, bleu_order=4): """ Compute bleu and rouge scores. """ assert set(pred_dict.keys()) == set(ref_dict.keys()), \ "missing keys: {}".format( set(ref_dict.keys()) - set(pred_dict.keys())) scores = {} bleu_scores, _ = Bleu(bleu_order).compute_score(ref_dict, pred_dict) for i, bleu_score in enumerate(bleu_scores): bleu_score *= 100 scores['Bleu-%d' % (i + 1)] = bleu_score return scores
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') ''' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] ''' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print("%s: %0.3f" % (method, score)) self.setEvalImgs()
def test_score(): # init all argument data = read_json() rouge_eval = RougeL() bleu_eval = Bleu() for idx, (ref_key, cand_key) in enumerate(data): ref_sent = data[idx][ref_key] cand_sent = data[idx][cand_key] rouge_eval.add_inst(cand_sent, ref_sent) bleu_eval.add_inst(cand_sent, ref_sent) bleu_score = bleu_eval.get_score() rouge_score = rouge_eval.get_score() print('bleu score: {}, rouge score: {}'.format(bleu_score, rouge_score))
def score(ref, hypo): scorers = [ (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"]), (Meteor(),"METEOR"), (Rouge(),"ROUGE_L"), (Cider(),"CIDEr") ] final_scores = {} for scorer,method in scorers: score,scores = scorer.compute_score(ref,hypo) if type(score)==list: for m,s in zip(method,score): final_scores[m] = s else: final_scores[method] = score return final_scores
def cal_avg_B4(custom_gts, custom_res): # input tested senetences, and (top_N - 1) corresponding 'gt' sentences # return the BLEU-4 score # calculate BLEU scores in tradictional way gts = tokenizer.tokenize(custom_gts) res = tokenizer.tokenize(custom_res) print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])] imgToEval = {} for scorer, method in scorers: print('computing %s score...'%(scorer.method())) if type(method) == list: score, scores, subgraph_training_bleu = scorer.compute_score(gts, res) for sc, scs, m in zip(score, scores, method): setImgToEvalImgs(scs, list(gts.keys()), m, imgToEval) print("%s: %0.3f"%(m, sc)) B_4s = [imgToEval[sen_id]['Bleu_4'] for sen_id in custom_gts.keys()] return B_4s
def score(ref, hypo): """ ref, dictionary of reference sentences (id, sentence) hypo, dictionary of hypothesis sentences (id, sentence) score, dictionary of scores """ scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Rouge(), "ROUGE_L"), ] final_scores = {} for scorer, method in scorers: score, scores = scorer.compute_score(ref, hypo) if type(score) == list: for m, s in zip(method, score): final_scores[m] = s else: final_scores[method] = score return final_scores
def test_count_bp(): cand = '我是中国人' ref = '重视啊啊啊啊我啊啊我了' bleu = Bleu(N_SIZE) bp = bleu.count_bp(cand, ref) print('BP: {}'.format(bp))
def __init__(self, alpha=0.5): self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16) self.tri_bleu = Bleu(3) self.four_bleu = Bleu(4, beta=0.13) self.p = Preprocessor()
def __init__(self): self.scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]
def test_score(): cand = "中华人民共和国" ref = "中华人民共和国公民" bleu = Bleu(N_SIZE) s = bleu.score(cand, ref) print('score: {}'.format(s))
def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu(4)
from rouge import Rouge import argparse import logging from ReadingComprehension.IterativeReattentionAligner.e2e_encoder import MnemicReader as e2e_MnemicReader import cProfile, pstats, io from utils import * from InformationRetrieval.AttentionRM.modules import AttentionRM from EndToEndModel.modules import EndToEndModel from nltk.translate.bleu_score import sentence_bleu import re import pickle from CSMrouge import RRRouge from bleu import Bleu stoplist = set(['.',',', '...', '..']) bleu_obj = Bleu(4) def add_arguments(parser): parser.add_argument("train_file", help="File that contains training data") parser.add_argument("dev_file", help="File that contains dev data") parser.add_argument("embedding_file", help="File that contains pre-trained embeddings") parser.add_argument('--dicts_dir', type=str, default=None, help='Directory containing the word dictionaries') parser.add_argument('--seed', type=int, default=6, help='Random seed for the experiment') parser.add_argument('--epochs', type=int, default=20, help='Train data iterations') parser.add_argument('--train_batch_size', type=int, default=32, help='Batch size for training') parser.add_argument('--dev_batch_size', type=int, default=32, help='Batch size for dev') parser.add_argument('--hidden_size', type=int, default=100, help='Hidden size for LSTM') parser.add_argument('--num_layers', type=int, default=1, help='Number of layers for LSTM') parser.add_argument('--char_emb_size', type=int, default=50, help='Embedding size for characters') parser.add_argument('--pos_emb_size', type=int, default=50, help='Embedding size for pos tags') parser.add_argument('--ner_emb_size', type=int, default=50, help='Embedding size for ner')