def __init__(self, coco, useBleu=False, useCider=False): self.coco = coco self.useBleu = useBleu self.useCider = useCider self.params = {'image_id': coco.getImgIds()} imgIds = self.params['image_id'] gts = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] if self.useBleu: self.b_scorer = BleuScorer() if self.useCider: self.c_scorer = CiderScorer() print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) for imgId in imgIds: ref = gts[imgId] assert (type(ref) is list) assert (len(ref) > 0) if self.useCider: self.c_scorer += (None, ref) if self.useCider: self.c_scorer.compute_doc_freq() assert (len(self.c_scorer.ctest) >= max( self.c_scorer.document_frequency.values()))
def evaluate(self): evalRefIds = [ann['ref_id'] for ann in self.Res] refToGts = {} refToGtRanks1 = {} refToGtRanks2 = {} for ref_id in evalRefIds: ref = self.refer.Refs[ref_id] gt_sents = [sent['sent'] for sent in ref['sentences']] refToGts[ref_id] = gt_sents if self.eval_cider_r: gt_ranks1 = self.refer.get_rank1(ref) gt_ranks2 = self.refer.get_rank2(ref) refToGtRanks1[ref_id] = gt_ranks1 refToGtRanks2[ref_id] = gt_ranks2 refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} tokenizer = PTBTokenizer() self.refToRes = tokenizer.tokenize(refToRes) self.refToGts = tokenizer.tokenize(refToGts) scorers = [(Cider(), "CIDEr")] for scorer, method in scorers: score, scores = scorer.compute_score(self.refToGts, self.refToRes) self.setEval(score, method) self.setRefToEvalRefs(scores, self.refToGts.keys(), method) self.setEvalRefs()
def evaluate(self): # ================================================== # Tokenization, remove punctutions # ================================================== print "tokenization ..." tokenizer = PTBTokenizer() gts = tokenizer.tokenize(self.ref) res = tokenizer.tokenize(self.res) # ================================================== # Set up scorers # ================================================== print "setting up scorers ..." scorers = [(Bleu(4), ("Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4")), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================== # Set up scorers # ================================================== out = {} for scorer, method in scorers: print "computing %s score ..." % (scorer.method()) score, scores = scorer.compute_score(gts, res) if isinstance(method, tuple): for sc, scs, m in zip(score, scores, method): out[m] = sc print "%s: %0.4f" % (m, sc) else: print "%s: %0.4f" % (method, score) out[method] = score return out
def evaluate_captions(res: dict, gts: dict): # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] rtn = {} # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): rtn[m] = sc else: rtn[method] = score return rtn
def main(): import sys res_path = sys.argv[1] gt_path = osp.join(this_dir, 'tgif-v1.0.tsv') test_list_path = osp.join(this_dir, 'splits', 'test.txt') test_keys = load_list(test_list_path) all_sents = load_sentences(gt_path) res = load_sentences(res_path) # make sure res has and only has single sentence # for all testing keys gts = {} for key in test_keys: gts[key] = all_sents[key] if key in res: res[key] = [res[key][0]] else: res[key] = [""] # ================================================= # Convert to COCO format # ================================================= gts = to_coco(gts, res.keys()) res = to_coco(res, res.keys()) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): print "%s: %0.3f"%(m, sc) else: print "%s: %0.3f"%(method, score)
def evaluate(self): gts = {} res = {} counter = 0 for i in self.input_captions['v_preds']: imgId = i[self.key_name] if imgId not in res: res[imgId] = [] res[imgId].append(i) gts[imgId] = self.ground_captions[imgId] # ================================================= # Set up scorers # ================================================= if self.no_print == False: print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= if self.no_print == False: print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") #(Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: if self.no_print == False: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) if self.no_print == False: print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) if self.no_print == False: print("%s: %0.3f" % (method, score)) self.setEvalImgs() res_diff_method = {} for metric, score in self.eval.items(): score_round = '%.3f' % (score) res_diff_method[metric] = float(score_round) return res_diff_method
def evaluate(self): """ Load the sentences from json files """ def readJson(refName, candName): path_to_ref_file = os.path.join(self._pathToData, refName) path_to_cand_file = os.path.join(self._pathToData, candName) ref_list = json.loads(open(path_to_ref_file, 'r').read()) cand_list = json.loads(open(path_to_cand_file, 'r').read()) gts = defaultdict(list) res = defaultdict(list) # change of naming convention from ref to gts for l in ref_list: gts[l['image_id']].append({"caption": l['caption']}) # change of naming convention from cand to res for l in cand_list: res[l['image_id']].append({"caption": l['caption']}) return gts, res print 'Loading Data...' gts, res = readJson(self._refName, self._candName) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(self._dfMode), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) else: self.setEval(score, method)
def evaluate(self): imgIds = self.params['image_id'] # print(imgIds) # imgIds = self.coco.getImgIds() gts = {} res = {} # print(len(imgIds)) ## 676476 ids; 1000 in total # print(self.coco.imgToAnns) ## key-value pairs for imgId in imgIds: # print(imgId) gts[imgId] = self.coco.imgToAnns[imgId] ## length = 5 # print(len(gts[imgId])) # print(gts[imgId]) res[imgId] = self.cocoRes.imgToAnns[imgId] # print(len(res[imgId])) # print(res[imgId]) # ================================================= # Set up scorers # ================================================= print '===== tokenization... gts' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) print '===== tokenization... res' res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print '===== computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) # print(scores) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def main(): import sys res_path = sys.argv[1] gt_path = osp.join(this_dir, 'tgif-v1.0.tsv') test_list_path = osp.join(this_dir, 'splits', 'test.txt') test_keys = load_list(test_list_path) all_sents = load_sentences(gt_path) res = load_sentences(res_path) # make sure res has and only has single sentence # for all testing keys gts = {} for key in test_keys: gts[key] = all_sents[key] if key in res: res[key] = [res[key][0]] else: res[key] = [""] # ================================================= # Convert to COCO format # ================================================= gts = to_coco(gts, res.keys()) res = to_coco(res, res.keys()) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): print "%s: %0.3f" % (m, sc) else: print "%s: %0.3f" % (method, score)
def test_tokenize(): t = PTBTokenizer() tokens = t.tokenize( dict(id1=[ dict(caption="Is this a good question?"), dict(caption="Is this a better question?") ], id2=[dict(caption="How's this question?")])) assert tokens == dict( id1=['is this a good question', 'is this a better question'], id2=['how \'s this question'])
def __init__(self, gts, res, df): print 'tokenization...' tokenizer = PTBTokenizer('gts') _gts = tokenizer.tokenize(gts) print 'tokenized refs' tokenizer = PTBTokenizer('res') _res = tokenizer.tokenize(res) print 'tokenized cands' self.gts = _gts self.res = _res self.df = df
def evaluate(self): imgIds = self.params['image_id'] # print(imgIds) # imgIds = self.coco.getImgIds() gts = {} res = {} # print(len(imgIds)) ## 676476 ids; 1000 in total # print(self.coco.imgToAnns) ## key-value pairs for imgId in imgIds: # print(imgId) gts[imgId] = self.coco.imgToAnns[imgId] ## length = 5 # print(len(gts[imgId])) # print(gts[imgId]) res[imgId] = self.cocoRes.imgToAnns[imgId] # print(len(res[imgId])) # print(res[imgId]) # ================================================= # Set up scorers # ================================================= print '===== tokenization... gts' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) print '===== tokenization... res' res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print '===== computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) # print(scores) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self, gts=None, res=None): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() if gts is None and res is None: gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] print("before") print(gts[184321]) print(res[184321]) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) print("after") return gts, res print(gts[184321]) print(res[184321]) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: anno = {} anno['image_id'] = imgId anno['caption'] = self.ref[imgId] gts[imgId] = [anno] anno_pred = {} anno_pred['image_id'] = imgId anno_pred['caption'] = self.pred[imgId] res[imgId] = [anno_pred] # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), # (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) # test #gts = {1: ['a metallic refrigerator freezer sitting inside of a kitchen', 'a small kitchen with a stove and refrigerator', "a stainless steel refrigerator in a home 's kitchen", 'a kitchen with a stove and a refrigerator', 'a kitchen has a fridge a stove and a counter top']} #res = {1: ['a kitchen with a stove and a refrigerator']} #gts = {1: ['a kitchen with a stove and a refrigerator', 'a kitchen with a stove and a refrigerator']} import ipdb; ipdb.set_trace() score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco[imgId] #.imgToAnns[imgId] res[imgId] = self.cocoRes[imgId] #.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), #(cBleu(4), ["cBleu_1", "cBleu_2", "cBleu_3", "cBleu_4"]), #(Meteor(),"METEOR"), (Rouge(), "ROUGE_L") #(Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} final_score = 0 for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f" % (m, sc) final_score = sc else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f" % (method, score) self.setEvalImgs() return final_score
def evaluate(self): imgIds = self.params # imgIds = self.coco.getImgIds() gts = {} res = {} gts= self.coco res= self.cocoRes #print type(gts),' \r\n ',type(res) #print gts,' ',res #pdb.set_trace() # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f"%(method, score) self.setEvalImgs()
def evaluate(self, ngram_metric): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco[imgId]#.imgToAnns[imgId] res[imgId] = self.cocoRes[imgId]#.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= if ngram_metric == 'ROUGE_L': scorers = [ (Bleu(1), ["Bleu_1"]), (Rouge(), "ROUGE_L") ] else: assert ngram_metric.startswith('Bleu_') i = ngram_metric[len('Bleu_'):] assert i.isdigit() i = int(i) assert i > 0 scorers = [ (Bleu(i), ['Bleu_{}'.format(j) for j in range(1, i + 1)]), ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) self.setEvalImgs() return self.evalImgs
def eval(target, refers, scorer, tokenizer = PTBTokenizer(), use_private = False): """docstring for main""" k = 'single' res_single = {k:[target]} gts = {k:refers} # ================================================= # Convert to COCO format # ================================================= gts = to_coco(gts, res_single.keys()) res = to_coco(res_single, res_single.keys()) # ================================================= # Set up scorers # ================================================= #print 'tokenization...' #tokenizer = PTBTokenizer() #import ipdb; ipdb.set_trace() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) if use_private: # initialize the meteor.jar score, scores = scorer._compute_score(gts, res) else: score, scores = scorer.compute_score(gts, res) print score return score, scores
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...'%(scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print("%s: %0.3f"%(m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print("%s: %0.3f"%(method, score)) self.setEvalImgs()
def evaluate(self): imgIds = self.params # imgIds = self.coco.getImgIds() gts = {} res = {} gts = self.coco res = self.cocoRes #print type(gts),' \r\n ',type(res) #print gts,' ',res #pdb.set_trace() # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(["-m", "stem", '-w', '1.0', '-p', '0.85 0.2 0.0 0.75']), "METEOR_Stems"), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] print('res:', res[imgId]) # print('gt:',gts[imgId]) # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) #weiyu print('res:', res[imgId]) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print("%s: %0.3f" % (method, score)) self.setEvalImgs()
def evaluate(self): evalRefIds = [ann['ref_id'] for ann in self.Res] refToGts = {} for ref_id in evalRefIds: ref = self.refer.Refs[ref_id] gt_sents = [sent['sent'] for sent in ref['sentences']] # up to 3 expressions refToGts[ref_id] = gt_sents refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} print 'tokenization...' tokenizer = PTBTokenizer() self.refToRes = tokenizer.tokenize(refToRes) self.refToGts = tokenizer.tokenize(refToGts) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(self.refToGts, self.refToRes) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setRefToEvalRefs(scs, self.refToGts.keys(), m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setRefToEvalRefs(scores, self.refToGts.keys(), method) print "%s: %0.3f"%(method, score) self.setEvalRefs()
def evaluate(self, gts, res): # ================================================= # Set up scorers # ================================================= logging.info('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= logging.info('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") #(Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= bleu_4_score = 0 for scorer, method in scorers: logging.info('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) if m == "Bleu_4": bleu_4_score = sc logging.info("%s: %0.3f" % (m, sc)) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) logging.info("%s: %0.3f" % (method, score)) print("%s: %0.3f" % (method, score)) self.setEvalImgs() return bleu_4_score
def setup(self): imgIds = self.params['image_id'] gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.coco.imgToAnns[imgId] # Tokenize print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # print len(gts.keys), len(res.keys) # Setup cider assert (gts.keys() == res.keys()) imgIds = gts.keys() cider_scorer = CiderScorer() for id in imgIds: hypo = res[id] ref = gts[id] # Sanity check. assert (type(hypo) is list) # assert(len(hypo) == 1) assert (type(ref) is list) assert (len(ref) > 0) cider_scorer += (hypo[0], ref) cider_scorer.compute_doc_freq() assert (len(cider_scorer.ctest) >= max( cider_scorer.document_frequency.values())) self.flag_setup = True self.cider_scorer = cider_scorer
def main(): coco_train = COCO( "/data/home/wuzhiron/lixin/coco14/annotations/captions_train2014.json") coco_val = COCO( "/data/home/wuzhiron/lixin/coco14/annotations/captions_val2014.json") # res_train = coco_train.getImgIds() # res_val = coco_val.getImgIds() # print(np.all(res_train == gts_train)) # print(np.all(res_val == gts_val)) # print(res_train[:10]) # print(res_val[:10]) # print(gts_train[:10]) # print(gts_val[:10]) train_imgids = pkl.load( open("/data/home/wuzhiron/lixin/coco14/train_imgids.pkl", 'rb')) val_imgids = pkl.load( open("/data/home/wuzhiron/lixin/coco14/val_imgids.pkl", 'rb')) train_caps = {} val_caps = {} for imgid in train_imgids: train_caps[imgid] = coco_train.imgToAnns[imgid] for imgid in val_imgids: val_caps[imgid] = coco_val.imgToAnns[imgid] tokenizer = PTBTokenizer() train_caps = tokenizer.tokenize(train_caps) val_caps = tokenizer.tokenize(val_caps) scores = np.zeros((100, 5, len(train_caps)), dtype=np.float32) for i in range(100): for j in range(5): scores[i][j] = compute_score(train_caps, val_caps, train_imgids, val_imgids, i, j) #print(".", end="") print("{} / 100".format(i)) np.save("cider_scores", scores) return
def evaluate(self): # imgIds = self.coco.getImgIds() gts = dict(zip(range(0, len(self.predicted_list)), self.predicted_list)) res = dict(zip(range(0, len(self.label_list)), self.label_list)) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.set_textid_to_eval(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.set_textid_to_eval(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.set_eval()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) self.setEvalImgs()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(self.Spice, "SPICE")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print "%s: %0.3f" % (method, score) self.setEvalImgs()
def setup(self): imgIds = self.params['image_id'] gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.coco.imgToAnns[imgId] # Tokenize print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # print len(gts.keys), len(res.keys) # Setup cider assert(gts.keys() == res.keys()) imgIds = gts.keys() cider_scorer = CiderScorer() for id in imgIds: hypo = res[id] ref = gts[id] # Sanity check. assert(type(hypo) is list) # assert(len(hypo) == 1) assert(type(ref) is list) assert(len(ref) > 0) cider_scorer += (hypo[0], ref) cider_scorer.compute_doc_freq() assert(len(cider_scorer.ctest) >= max(cider_scorer.document_frequency.values())) self.flag_setup = True self.cider_scorer = cider_scorer
def __init__(self, gts, res, df): print('tokenization...') tokenizer = PTBTokenizer('gts') _gts = tokenizer.tokenize(gts) print('tokenized refs') tokenizer = PTBTokenizer('res') _res = tokenizer.tokenize(res) print('tokenized cands') self.gts = _gts self.res = _res self.df = df
class SpiceEval(): def __init__(self): self.evalImgs = [] self.eval = {} self.imgToEval = {} self.spice = Spice() self.tokenizer = PTBTokenizer() """ The input have structure {'123': [{'image_id':123, 'caption': 'xxxxx'}, {'image_id':123, 'caption': 'yyy'}], ...} """ def evaluate(self, gts, res): assert set(gts.keys()) == set(res.keys()) imgIds = gts.keys() gts = self.tokenizer.tokenize(gts) res = self.tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= # ================================================= # Compute scores # ================================================= print 'computing %s score...' % (self.spice.method()) score, scores = self.spice.compute_score(gts, res) print "%s: %0.3f" % ("spice", score) self.eval['spice'] = score print scores for imgId, score in zip(sorted(imgIds), scores): if not imgId in self.imgToEval: self.imgToEval[imgId] = {} self.imgToEval[imgId]["image_id"] = imgId self.imgToEval[imgId]["spice"] = score return self.eval['spice'], self.imgToEval
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} gts_all = {} gts_region_idx = {} for imgId in imgIds: gts[imgId] = self.ref[imgId] res[imgId] = self.pred[imgId] gts_all[imgId] = [] for i,anno in enumerate(gts[imgId]): for cap in anno['captions']: gts_all[imgId].append({'image_id': anno['image_id'], 'caption': cap, 'region_id': i}) # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() gts_tokens = tokenizer.tokenize(gts_all) res_tokens = tokenizer.tokenize(res) #insert caption tokens to gts for imgId in imgIds: for tokens, cap_info in zip(gts_tokens[imgId], gts_all[imgId]): region_id = cap_info['region_id'] if 'caption_tokens' not in gts[imgId][region_id]: gts[imgId][region_id]['caption_tokens'] = [] gts[imgId][region_id]['caption_tokens'].append(tokens) # ================================================= # Compute scores # ================================================= # Holistic score, as in DenseCap paper: multi-to-multi matching eval = {} print 'computing Meteor score...' score, scores = Meteor().compute_score_m2m(gts_tokens, res_tokens) #self.setEval(score, method) #self.setImgToEvalImgs(scores, imgIds, method) print "Meteor: %0.3f"%(score) #self.setEvalImgs() # mean ap settings, as in DenseCap paper overlap_ratios = [0.3,0.4,0.5,0.6,0.7] metoer_score_th = [0, 0.05, 0.1, 0.15, 0.2, 0.25] ap_matrix = np.zeros((len(overlap_ratios), len(metoer_score_th))) gt_region_n = sum([len(gts[imgId]) for imgId in imgIds]) #calculate the nxm bbox overlap in one pass #overlap_matrices = {} eval_stats = {} gts_tokens_match = {} res_tokens_match = {} all_keys = [] t1 = time.time() for imgId in imgIds: model_caption_locations = res[imgId] gt_caption_locations = gts[imgId] #should be sorted using predicted prob in advance #model_caption_locations.sort(key=lambda x:-x['log_prob']) ov_matrix = self.calculate_overlap_matrix(model_caption_locations, gt_caption_locations) match_gt_ids, match_ratios = self.bbox_match(ov_matrix) probs = np.array([x['prob'] for x in model_caption_locations]) scores = np.zeros((len(res[imgId]))) match_model_ids = np.where(match_gt_ids > -1)[0] match_pairs = zip(match_model_ids, match_gt_ids[match_model_ids]) for model_id, gt_id in match_pairs: key = (imgId, model_id) all_keys.append(key) gts_tokens_match[key] = gts[imgId][gt_id]['caption_tokens'] res_tokens_match[key] = [res_tokens[imgId][model_id]] #assert(gts_tokens_match.keys() == match_model_ids.tolist()) #score_match, scores_match = Meteor().compute_score(gts_tokens_match, res_tokens_match) #scores[match_model_ids] = scores_match eval_stats[imgId] = {'match_ids': match_gt_ids, 'match_ratios': match_ratios, 'probs': probs, 'meteor_scores': scores} #compute meteor scores of matched regions in one pass score_match, scores_match = Meteor().compute_score(gts_tokens_match, res_tokens_match, imgIds=all_keys) for key, score in zip(all_keys, scores_match): eval_stats[key[0]]['meteor_scores'][key[1]] = score t2 = time.time() print 'caption scoring finished, takes %f seconds' % (t2-t1) all_match_ratios = np.concatenate([v['match_ratios'] for k,v in eval_stats.iteritems()]) all_probs = np.concatenate([v['probs'] for k,v in eval_stats.iteritems()]) all_scores = np.concatenate([v['meteor_scores'] for k,v in eval_stats.iteritems()]) prob_order = np.argsort(all_probs)[::-1] all_match_ratios = all_match_ratios[prob_order] all_scores = all_scores[prob_order] for rid, overlap_r in enumerate(overlap_ratios): for th_id, score_th in enumerate(metoer_score_th): # compute AP for each setting tp = (all_match_ratios > overlap_r) & (all_scores > score_th) fp = 1 - tp tp = tp.cumsum().astype(np.float32) fp = fp.cumsum().astype(np.float32) rec = tp / gt_region_n prec = tp / (fp + tp) ap = 0 all_t = np.linspace(0,1,100) apn = len(all_t) for t in all_t: mask = rec > t p = np.max(prec * mask) ap += p ap_matrix[rid, th_id] = ap / apn t3 = time.time() print 'mean ap computing finished, takes %f seconds' % (t3 - t2) mean_ap = np.mean(ap_matrix) * 100 # percent print 'ap matrix' print ap_matrix print "mean average precision is %0.3f" % mean_ap
def __init__(self): self.evalImgs = [] self.eval = {} self.imgToEval = {} self.spice = Spice() self.tokenizer = PTBTokenizer()
def evaluate(self): """ Load the sentences from json files """ def readJson(refName, candName): path_to_ref_file = os.path.join(refName) path_to_cand_file = os.path.join(self._pathToData, candName) ref_list = json.loads(open(path_to_ref_file, 'r').read()) cand_list = json.loads(open(path_to_cand_file, 'r').read()) ref_list = ref_list['annotations'] res = defaultdict(list) for id_cap in cand_list: res[id_cap['image_id']].extend(id_cap['captions']) gts = defaultdict(list) # change of naming convention from ref to gts for l in ref_list: gts[l['image_id']].append({"caption": l['caption']}) return gts, res print 'Loading Data...' gts, res = readJson(self._refName, self._candName) # ================================================= # Set up scorers # ================================================= accuracy = {} tokenizer = PTBTokenizer() token_gts = tokenizer.tokenize(gts) avg_cider = 0 for image_id in res.keys(): accuracy[image_id] = [] new_gts = {image_id: token_gts[image_id]} # new_gts = {"0": [ # "zebras stand on grassy field with trees", # "zebras stand on grassy field" # # "zebras gazing" # ] # } for i in range(self._num): new_res = {image_id: [res[image_id][i]]} # new_res = {"0": ["grassy field with trees"]} scorers = [ # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), # (Meteor(),"METEOR"), # (Rouge(), "ROUGE_L"), (Cider(self._dfMode, self._df_file), "CIDEr"), # (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(new_gts, new_res) accuracy[image_id].append(score) avg_cider += np.mean(accuracy[image_id]) self.eval = accuracy print('Average Cider: %.5f') % (avg_cider / len(accuracy))
def cross_evaluate(self): """ We will evaluate how relevant is the generated expression to the ground-truth expressions, and how different it is to the expressions of the other objects within the same image. Thus, the prerequisite is the dataset is split by image_id, and each ann has multiple expressions, e.g., our new RefCOCO dataset whose tesing object has ~10 expressions. We first compute score on sc_ii = (sent_i, gd_sents_i), then compute score on sc_ij = (sent_i, gd_sents_j), the margin of max(0, sc_ii - sc_ij) will be considered as final score. Speficically, we choose METEOR and CIDEr for this kind of evaluation. For doing so, we need to prepare ref_to_gts and ref_to_res. As we want to do cross evaluation, our key would be paird_id, i.e., "ref_id1_to_ref_id2", e.g, '123_456', then input: - Gts[123_456] = [456's gd sents] - Res[123_456] = [123's predicted sents]. return: - ref_to_eval[123_456] = {method: score}, which measures 123's generation over 456's gd-sents Note, we also compute score of 123_123 We will use "sids" and "cids" to donate source_ref_ids and cross_ref_ids. """ source_ref_ids = [pred['ref_id'] for pred in self.preds] Preds = {pred['ref_id']: pred['sent'] for pred in self.preds} # construct pair_id, which is [source_ref_id]_[target_ref_id], i.e, 123_456 Gts = {} Res = {} for source_ref_id in source_ref_ids: image_id = self.refer.Refs[source_ref_id]['image_id'] cross_refs = self.refer.imgToRefs[ image_id] # including source_ref itself for cross_ref in cross_refs: pair_id = str(source_ref_id) + '_' + str(cross_ref['ref_id']) Res[pair_id] = [Preds[source_ref_id]] Gts[pair_id] = [ sent['sent'] for sent in cross_ref['sentences'] ] # tokenize print 'tokenization...' tokenizer = PTBTokenizer() Gts = tokenizer.tokenize(Gts) Res = tokenizer.tokenize(Res) # set up scorers print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # compute scores for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(Gts, Res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEvals(scs, Gts.keys(), m) print "%s: %0.3f" % (m, sc) else: self.setEvals(scores, Gts.keys(), method) print "%s: %0.3f" % (method, score)
def evaluate(self): """ Load the sentences from json files """ def readJson(candName, num=10): path_to_cand_file = os.path.join(self._pathToData, candName) cand_list = json.loads(open(path_to_cand_file, 'r').read()) res = defaultdict(list) for id_cap in cand_list: res[id_cap['image_id']].extend(id_cap['captions']) return res print 'Loading Data...' res = readJson(self._candName) ratio = {} for im_id in res.keys(): print('number of images: %d\n') % (len(ratio)) cov = np.zeros([10, 10]) for i in range(10): for j in range(i, 10): new_gts = {} new_res = {} new_res[im_id] = [{'caption': res[im_id][i]}] new_gts[im_id] = [{'caption': res[im_id][j]}] # new_gts[im_id] = gt # ================================================= # Set up scorers # ================================================= print 'tokenization...' tokenizer = PTBTokenizer() new_gts = tokenizer.tokenize(new_gts) new_res = tokenizer.tokenize(new_res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), # (Meteor(),"METEOR"), # (Rouge(), "ROUGE_L"), (Cider(self._dfMode, self._df_file), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(new_gts, new_res) cov[i, j] = score cov[j, i] = cov[i, j] u, s, v = np.linalg.svd(cov) r = max(s) / s.sum() print('ratio=%.5f\n') % (r) ratio[im_id] = r if len(ratio) == 5000: break self.eval = ratio