def __init__(self, ): self.blue_scorer = Bleu(4) self.rouge_scorer = Rouge() self.cider_scorer = Cider() self.truth = None remove = string.punctuation + "、。,." self.remove_pattern = r"[{}]".format(remove) # create the pattern
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j): res = {} for imgid in train_imgids: res[imgid] = [val_caps[val_imgids[i]][j]] scorer = Cider() score, scores = scorer.compute_score(gts, res, train_imgids) #print(score) #print(len(scores)) return np.array(scores)
def evaluate(self, album_to_Gts, album_to_Res): self.album_to_Res = album_to_Res self.album_to_Gts = album_to_Gts # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [] scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") # df='VIST/VIST-train-words' ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score ...' % (scorer.method())) score, scores = scorer.compute_score(self.album_to_Gts, self.album_to_Res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setAlbumToEval(scs, self.album_to_Gts.keys(), m) print('%s: %.3f' % (m, sc)) else: self.setEval(score, method) self.setAlbumToEval(scores, self.album_to_Gts.keys(), method) print('%s: %.3f' % (method, score)) self.setEvalAlbums()
def evaluate(self): evalRefIds = [ann['ref_id'] for ann in self.Res] refToGts = {} refToGtRanks1 = {} refToGtRanks2 = {} for ref_id in evalRefIds: ref = self.refer.Refs[ref_id] gt_sents = [sent['sent'] for sent in ref['sentences']] refToGts[ref_id] = gt_sents if self.eval_cider_r: gt_ranks1 = self.refer.get_rank1(ref) gt_ranks2 = self.refer.get_rank2(ref) refToGtRanks1[ref_id] = gt_ranks1 refToGtRanks2[ref_id] = gt_ranks2 refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} tokenizer = PTBTokenizer() self.refToRes = tokenizer.tokenize(refToRes) self.refToGts = tokenizer.tokenize(refToGts) scorers = [(Cider(), "CIDEr")] for scorer, method in scorers: score, scores = scorer.compute_score(self.refToGts, self.refToRes) self.setEval(score, method) self.setRefToEvalRefs(scores, self.refToGts.keys(), method) self.setEvalRefs()
class EvalCap: ref_list = [ 'this is a reference sentence for sentence2 which was generated by your model' ] hyp_list = ['this is sentence2 which has been generated by your model'] refs = {idx: [lines.strip()] for (idx, lines) in enumerate(ref_list)} hyps = {idx: [lines.strip()] for (idx, lines) in enumerate(hyp_list)} scorers = [ (Bleu(4), ['Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4']), # (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(hyps, refs) if type(method) == list: for sc, scs, m in zip(score, scores, method): # self.setEval(sc, m) print("%s: %0.3f" % (m, sc)) else: # self.setEval(score, method) print("%s: %0.3f" % (method, score))
def evaluator(gts, res): eval = {} # ================================================= # Set up scorers # ================================================= print 'tokenization...' # Todo: use Spacy for tokenization gts = tokenize(gts) res = tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), # (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): eval[m] = sc print "%s: %0.3f" % (m, sc) else: eval[method] = score print "%s: %0.3f" % (method, score)
def evaluate_captions(res: dict, gts: dict): # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] rtn = {} # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): rtn[m] = sc else: rtn[method] = score return rtn
def language_eval(sample_seqs, gt_seqs):# sample_seqs:list[[x,x],[x,x],...], gt_seqs:list[[list1,list2,...],[list1,list2,...],...] import sys sys.path.append("coco-caption/pycocoevalcap/") from bleu.bleu import Bleu from cider.cider import Cider from meteor.meteor import Meteor from rouge.rouge import Rouge assert len(sample_seqs) == len(gt_seqs),"number of eval data is different" res = OrderedDict() # res: {0:[xx],1:[xx],...} for i in range(len(sample_seqs)): # for each data(sent) res[i] = [sample_seqs[i]] gts = OrderedDict() # gts: {0:[sent1,sent2,...],1:[sent1,sent2,...], ...} for i in range(len(gt_seqs)): gts[i] = [gt_seqs[i][j] for j in range(len(gt_seqs[i]))] res = {i: res[i] for i in range(len(sample_seqs))} gts = {i: gts[i] for i in range(len(gt_seqs))} avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res) avg_cider_score, cider_scores = Cider().compute_score(gts, res) avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res) avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res) print(" BLEU1:{}\n BLEU2:{}\n BLEU3:{}\n BLEU4:{}\n METEOR:{}\n ROUGE:{}\n CIDEr:{}\n"\ .format(avg_bleu_score[0], avg_bleu_score[1], avg_bleu_score[2], avg_bleu_score[3], \ avg_meteor_score, avg_rouge_score, avg_cider_score)) return {'BLEU':avg_bleu_score, 'METEOR':avg_meteor_score, 'ROUGE':avg_rouge_score, 'CIDEr':avg_cider_score}
def evaluate(self): output = [] scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= scores_dict = {} #scores_dict["model_key"] = self.model_key for scorer, method in scorers: # print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(self.gts, self.res) if type(method) == list: for sc, scs, m in zip(score, scores, method): print("%s: %0.5f" % (m, sc)) output.append(sc) scores_dict[m] = str(sc) else: print("%s: %0.5f" % (method, score)) output.append(score) scores_dict[method] = score return output
def evaluate(gts=None, res=None): # imgIds = self.coco.getImgIds() # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), ] # ================================================= # Compute scores # ================================================= res_scores = [] for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): res_scores.append((m, sc)) else: res_scores.append((method, score)) return res_scores
def evaluate(self): # ================================================== # Tokenization, remove punctutions # ================================================= ''' print "tokenization ..." tokenizer = PTBTokenizer() gts = tokenizer.tokenize(self.ref) res = tokenizer.tokenize(self.res) ''' gts = self.ref # ================================================== # Set up scorers # ================================================== print "setting up scorers ..." scorers = [(Bleu(4), ("Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4")), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================== # Set up scorers # ================================================== out = {} for scorer, method in scorers: print "computing %s score ..." % (scorer.method()) score, scores = scorer.compute_score(gts, res) if isinstance(method, tuple): for sc, scs, m in zip(score, scores, method): out[m] = sc print "%s: %0.4f" % (m, sc) else: print "%s: %0.4f" % (method, score) out[method] = score return out
def evaluate(self): gts = {} res = {} counter = 0 for i in self.input_captions['v_preds']: imgId = i[self.key_name] if imgId not in res: res[imgId] = [] res[imgId].append(i) gts[imgId] = self.ground_captions[imgId] # ================================================= # Set up scorers # ================================================= if self.no_print == False: print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= if self.no_print == False: print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") #(Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: if self.no_print == False: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) if self.no_print == False: print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) if self.no_print == False: print("%s: %0.3f" % (method, score)) self.setEvalImgs() res_diff_method = {} for metric, score in self.eval.items(): score_round = '%.3f' % (score) res_diff_method[metric] = float(score_round) return res_diff_method
def __init__(self): logging.info("New 'CaptionExtractor' instance has been initialized.") # Variables for computing metrics and performing transformations self.stemmer = nltk.stem.WordNetLemmatizer() self.vectorizer = CountVectorizer() # Variables related to assisting in the generating guidance captions self.captions = helpers.get_data('captions') self.cider = Cider(n=FLAGS.ngrams) # ETL if len(self.captions.keys()) == 0: self.annotations_data, self.images_data = self.get_annotations() self.make_caption_representations() # Save the dictionary for future use helpers.save_obj(self.captions, 'captions')
def evaluate(self): """ Load the sentences from json files """ def readJson(refName, candName): path_to_ref_file = os.path.join(self._pathToData, refName) path_to_cand_file = os.path.join(self._pathToData, candName) ref_list = json.loads(open(path_to_ref_file, 'r').read()) cand_list = json.loads(open(path_to_cand_file, 'r').read()) gts = defaultdict(list) res = defaultdict(list) # change of naming convention from ref to gts for l in ref_list: gts[l['image_id']].append({"caption": l['caption']}) # change of naming convention from cand to res for l in cand_list: res[l['image_id']].append({"caption": l['caption']}) return gts, res print 'Loading Data...' gts, res = readJson(self._refName, self._candName) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(self._dfMode), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) else: self.setEval(score, method)
def main(): import sys res_path = sys.argv[1] gt_path = osp.join(this_dir, 'tgif-v1.0.tsv') test_list_path = osp.join(this_dir, 'splits', 'test.txt') test_keys = load_list(test_list_path) all_sents = load_sentences(gt_path) res = load_sentences(res_path) # make sure res has and only has single sentence # for all testing keys gts = {} for key in test_keys: gts[key] = all_sents[key] if key in res: res[key] = [res[key][0]] else: res[key] = [""] # ================================================= # Convert to COCO format # ================================================= gts = to_coco(gts, res.keys()) res = to_coco(res, res.keys()) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): print "%s: %0.3f" % (m, sc) else: print "%s: %0.3f" % (method, score)
def init_cider_scorer(reward_type): global CiderD_scorer # CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) if reward_type == 'BLEU': CiderD_scorer = CiderD_scorer or Bleu() elif reward_type == 'METEOR': CiderD_scorer = CiderD_scorer or Meteor() elif reward_type == 'ROUGE': CiderD_scorer = CiderD_scorer or Rouge() elif reward_type == 'CIDEr': CiderD_scorer = CiderD_scorer or Cider()
def evaluate(self): imgIds = self.params['image_id'] # print(imgIds) # imgIds = self.coco.getImgIds() gts = {} res = {} # print(len(imgIds)) ## 676476 ids; 1000 in total # print(self.coco.imgToAnns) ## key-value pairs for imgId in imgIds: # print(imgId) gts[imgId] = self.coco.imgToAnns[imgId] ## length = 5 # print(len(gts[imgId])) # print(gts[imgId]) res[imgId] = self.cocoRes.imgToAnns[imgId] # print(len(res[imgId])) # print(res[imgId]) # ================================================= # Set up scorers # ================================================= print '===== tokenization... gts' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) print '===== tokenization... res' res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print '===== computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) # print(scores) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self, album_to_Gts, album_to_Res): """ measure is a subset of ['bleu', 'meteor', 'rouge', 'cider'] if measure is None, we will apply all the above. """ # # album_id -> pred story str # album_to_Res = {item['album_id']: [item['pred_story_str'].encode('ascii', 'ignore').decode('ascii')] # for item in self.preds } # # album_id -> gt story str(s) # album_to_Gts = {} # for album_id in album_to_Res.keys(): # album = self.vist_sis.Albums[album_id] # gd_story_strs = [] # for story_id in album['story_ids']: # gd_sent_ids = self.vist_sis.Stories[story_id]['sent_ids'] # gd_story_str = ' '.join([self.vist_sis.Sents[sent_id]['text'] for sent_id in gd_sent_ids]) # gd_story_str = gd_story_str.encode('ascii', 'ignore').decode('ascii') # ignore some weird token # gd_story_strs += [gd_story_str] # album_to_Gts[album_id] = gd_story_strs self.album_to_Res = album_to_Res self.album_to_Gts = album_to_Gts # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [] scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score ...' % (scorer.method())) score, scores = scorer.compute_score(self.album_to_Gts, self.album_to_Res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setAlbumToEval(scs, self.album_to_Gts.keys(), m) print('%s: %.4f' % (m, sc)) else: self.setEval(score, method) self.setAlbumToEval(scores, self.album_to_Gts.keys(), method) print('%s: %.4f' % (method, score)) self.setEvalAlbums()
def evaluate(self, gts=None, res=None): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() if gts is None and res is None: gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] print("before") print(gts[184321]) print(res[184321]) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) print("after") return gts, res print(gts[184321]) print(res[184321]) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) # test #gts = {1: ['a metallic refrigerator freezer sitting inside of a kitchen', 'a small kitchen with a stove and refrigerator', "a stainless steel refrigerator in a home 's kitchen", 'a kitchen with a stove and a refrigerator', 'a kitchen has a fridge a stove and a counter top']} #res = {1: ['a kitchen with a stove and a refrigerator']} #gts = {1: ['a kitchen with a stove and a refrigerator', 'a kitchen with a stove and a refrigerator']} import ipdb; ipdb.set_trace() score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def test(loader, model, opt, vocab): bleu_scores = [] write_to_txt = [] res = {} gts = {} for batch_id, (video_input, language_feat, captions, time_seg, batch_lens, duration, video_id) in enumerate(loader): # Convert the textual input to numeric labels cap_gts, cap_mask = convert_caption_labels(captions, loader.dataset.get_vocab(), opt['max_length']) video_input = video_input.cuda() cap_gts = torch.tensor(cap_gts).cuda().long() # cap_mask = cap_mask.cuda() with torch.no_grad(): # Beam Search Starts From Here batch_hyp = translate_batch(model, video_input, opt) # Stack all GTs captions references = [[cap.split(' ')] for cap in captions] # Stack all Predicted Captions hypotheses = [] for predict in zip(batch_hyp): predict = predict[0] _ = [] if EOS in predict[0]: sep_id = predict[0].index(EOS) else: sep_id = -1 for word in predict[0][0:sep_id]: _.append(vocab[str(word)]) hypotheses.append(_) # Stack all predictions for the Gougue/Meteour Scores res[batch_id] = [list_to_sentence(hypotheses[0])] gts[batch_id] = [list_to_sentence(references[0][0])] print(batch_id) avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res) avg_cider_score, cider_scores = Cider().compute_score(gts, res) avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res) avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res) print('C, M, R, B:', avg_cider_score, avg_meteor_score, avg_rouge_score, avg_bleu_score)
def __init__(self, references, scorers=['bleu', 'rouge', 'cider', 'meteor']): self.scorers = {} for scorer in scorers: if scorer == 'bleu': self.scorers['bleu'] = Bleu(4) elif scorer == 'rouge': self.scorers['rouge'] = Rouge() elif scorer == 'cider': self.scorers['cider'] = Cider() elif scorer == 'meteor': self.scorers['meteor'] = Meteor() else: raise NotImplementedError() self.references = references
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] #print type(gts),' \r\n ',type(res) #pdb.set_trace() #print gts,' ',res # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params["image_id"] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print "tokenization..." tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print "setting up scorers..." scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE"), ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print "computing %s score..." % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self): evalRefIds = [ann['ref_id'] for ann in self.Res] refToGts = {} for ref_id in evalRefIds: ref = self.refer.Refs[ref_id] gt_sents = [sent['sent'] for sent in ref['sentences']] # up to 3 expressions refToGts[ref_id] = gt_sents refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} print 'tokenization...' tokenizer = PTBTokenizer() self.refToRes = tokenizer.tokenize(refToRes) self.refToGts = tokenizer.tokenize(refToGts) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(self.refToGts, self.refToRes) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setRefToEvalRefs(scs, self.refToGts.keys(), m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setRefToEvalRefs(scores, self.refToGts.keys(), method) print "%s: %0.3f"%(method, score) self.setEvalRefs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= #print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= #print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= result = [] for scorer, method in scorers: #print('computing %s score...'%(scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) #print("%s: %0.3f"%(m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) #print("%s: %0.3f"%(method, score)) result.append((method, score)) self.setEvalImgs() return result
def evaluate(self): # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Cider(df=self.df), "CIDEr"), (CiderD(df=self.df), "CIDErD")] # ================================================= # Compute scores # ================================================= metric_scores = {} for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(self.gts, self.res) print("Mean %s score: %0.3f" % (method, score)) metric_scores[method] = list(scores) return metric_scores
def evaluate(hyp, ref): with open(hyp, 'r') as r: hypothesis = r.readlines() res = {k: [" ".join(v.strip().lower().split())] for k, v in enumerate(hypothesis)} with open(ref, 'r') as r: references = r.readlines() gts = {k: [v.strip().lower()] for k, v in enumerate(references)} score_Bleu , stderr = Bleu().compute_score(hyp, ref) print("Bleu_4: " + str(score_Bleu)) score_Meteor, scores_Meteor = Meteor().compute_score(gts, res) print("Meteor: "), score_Meteor files_rouge = FilesRouge(hyp, ref) scores = files_rouge.get_scores(avg=True) print('Rouge: ' + str(scores)) score_Cider, scores_Cider = Cider().compute_score(gts, res) print("Cider: "), score_Cider
def evaluate(self, gts, res): # ================================================= # Set up scorers # ================================================= logging.info('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= logging.info('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") #(Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= bleu_4_score = 0 for scorer, method in scorers: logging.info('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) if m == "Bleu_4": bleu_4_score = sc logging.info("%s: %0.3f" % (m, sc)) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) logging.info("%s: %0.3f" % (method, score)) print("%s: %0.3f" % (method, score)) self.setEvalImgs() return bleu_4_score
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print >> sys.stderr, 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print >> sys.stderr, 'setting up scorers...' scorers = [(Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print >> sys.stderr, 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print >> sys.stderr, "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print >> sys.stderr, "%s: %0.3f" % (method, score) self.setEvalImgs()