def coco_eval(candidates_file, references_file): """ Given the candidates and references, the coco-caption module is used to calculate various metrics. Returns a list of dictionaries containing: -BLEU -ROUGE -METEOR -CIDEr """ # This is used to suppress the output of coco-eval: old_stdout = sys.stdout sys.stdout = open(os.devnull, "w") try: # Derived from example code in coco-captions repo coco = COCO( references_file ) cocoRes = coco.loadRes( candidates_file ) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.evaluate() finally: # Change back to standard output sys.stdout.close() sys.stdout = old_stdout return cocoEval.evalImgs
def language_eval(dataset, preds): import sys if 'coco' in dataset: sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' else: sys.path.append("f30k-caption") annFile = 'f30k-caption/annotations/dataset_flickr30k.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print 'using %d/%d predictions' % (len(preds_filt), len(preds)) json.dump(preds_filt, open('tmp.json', 'w')) # serialize to temporary json file. Sigh, COCO API... resFile = 'tmp.json' cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score return out
def main(): HASH_IMG_NAME = True pylab.rcParams['figure.figsize'] = (10.0, 8.0) json.encoder.FLOAT_REPR = lambda o: format(o, '.3f') parser = argparse.ArgumentParser() parser.add_argument("-i", "--inputfile", type=str, required=True, help='File containing model-generated/hypothesis sentences.') parser.add_argument("-r", "--references", type=str, required=True, help='JSON File containing references/groundtruth sentences.') args = parser.parse_args() prediction_file = args.inputfile reference_file = args.references json_predictions_file = '{0}.json'.format(prediction_file) crf = CocoResFormat() crf.read_file(prediction_file, HASH_IMG_NAME) crf.dump_json(json_predictions_file) # create coco object and cocoRes object. coco = COCO(reference_file) cocoRes = coco.loadRes(json_predictions_file) # create cocoEval object. cocoEval = COCOEvalCap(coco, cocoRes) # evaluate results cocoEval.evaluate() # print output evaluation scores for metric, score in cocoEval.eval.items(): print '%s: %.3f'%(metric, score)
def coco_eval(ann_fn, json_fn, save_fn): coco = COCO(ann_fn) coco_res = coco.loadRes(json_fn) coco_evaluator = COCOEvalCap(coco, coco_res) # comment below line to evaluate the full validation or testing set. coco_evaluator.params['image_id'] = coco_res.getImgIds() coco_evaluator.evaluate(save_fn)
def main(argv): input_json = 'results/' + sys.argv[1] annFile = 'annotations/captions_val2014.json' coco = COCO(annFile) valids = coco.getImgIds() checkpoint = json.load(open(input_json, 'r')) preds = checkpoint['val_predictions'] # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print 'using %d/%d predictions' % (len(preds_filt), len(preds)) json.dump(preds_filt, open('tmp.json', 'w')) # serialize to temporary json file. Sigh, COCO API... resFile = 'tmp.json' cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # serialize to file, to be read from Lua json.dump(out, open(input_json + '_out.json', 'w'))
def language_eval(input_data, savedir, split): if type(input_data) == str: # Filename given. checkpoint = json.load(open(input_data, 'r')) preds = checkpoint elif type(input_data) == list: # Direct predictions give. preds = input_data annFile = 'third_party/coco-caption/annotations/captions_val2014.json' coco = COCO(annFile) valids = coco.getImgIds() # Filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print 'Using %d/%d predictions' % (len(preds_filt), len(preds)) resFile = osp.join(savedir, 'result_%s.json' % (split)) json.dump(preds_filt, open(resFile, 'w')) # Serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # Create output dictionary. out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # Return aggregate and per image score. return out, cocoEval.evalImgs
def evaluateModel(model_json): cocoRes = coco.loadRes(model_json) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() results = {} for metric, score in cocoEval.eval.items(): results[metric] = score return results
def score_generation(gt_filename=None, generation_result=None): coco_dict = read_json(generation_result) coco = COCO(gt_filename) generation_coco = coco.loadRes(generation_result) coco_evaluator = COCOEvalCap(coco, generation_coco) #coco_image_ids = [self.sg.image_path_to_id[image_path] # for image_path in self.images] coco_image_ids = [j['image_id'] for j in coco_dict] coco_evaluator.params['image_id'] = coco_image_ids results = coco_evaluator.evaluate(return_results=True) return results
def measure(prediction_txt_path, reference): # 把txt格式的预测结果转换成检验程序所要求的格式 crf = CocoResFormat() crf.read_file(prediction_txt_path, True) # crf.res就是格式转换之后的预测结果 cocoRes = reference.loadRes(crf.res) cocoEval = COCOEvalCap(reference, cocoRes) cocoEval.evaluate() for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, score)) return cocoEval.eval
def language_eval(dataset, preds, model_id, split): import sys if 'coco' in dataset: sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' elif 'msvd' in dataset: sys.path.append('coco-caption') annFile = 'coco-caption/annotations/coco_ref_msvd.json' elif 'kuaishou' in dataset: sys.path.append('coco-caption') annFile = 'coco-caption/annotations/coco_ref_kuaishou.json' else: sys.path.append("f30k-caption") annFile = 'f30k-caption/annotations/dataset_flickr30k.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def coco_val_eval(self, pred_path, result_path): """Evaluate the predicted sentences on MS COCO validation.""" sys.path.append('./external/coco-caption') from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap coco = COCO('./external/coco-caption/annotations/captions_val2014.json') cocoRes = coco.loadRes(pred_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() with open(result_path, 'w') as fout: for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, score), file=fout)
def run(dataset,algName,outDir): pylab.rcParams['figure.figsize'] = (10.0, 8.0) import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.3f') # set up file names and pathes # dataDir='./data/'+dataset # dataDir= '/media/SSD/projects/NeuralTalkAnimator' dataType='val' # annFile='%s/annotations/captions_%s.json'%(dataDir,dataType) # annFile='/media/SSD/projects/NeuralTalkAnimator/data/youtube2text/captions_val2014.json' dataDir = 'data/'+dataset annFile='%s/captions_%s.json'%(dataDir,dataType) subtypes=['results', 'evalImgs', 'eval'] [resFile, evalImgsFile, evalFile]= \ ['%s/captions_%s_%s_%s.json'%(outDir,dataType,algName,subtype) for subtype in subtypes] coco = COCO(annFile) cocoRes = coco.loadRes(resFile) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results cocoEval.evaluate() # print output evaluation scores scores = list() for metric, score in cocoEval.eval.items(): print '%s: %.3f'%(metric, score) scores.append(score) print 'inside metrics' return scores
import sys input_json = sys.argv[1] annFile = 'annotations/captions_val2014.json' coco = COCO(annFile) valids = coco.getImgIds() checkpoint = json.load(open(input_json, 'r')) preds = checkpoint['val_predictions'] # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] # preds_filt = preds print 'using %d/%d predictions' % (len(preds_filt), len(preds)) json.dump(preds_filt, open('tmp.json', 'w')) # serialize to temporary json file. Sigh, COCO API... resFile = 'tmp.json' cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # serialize to file, to be read from Lua json.dump(out, open(input_json + '_out.json', 'w'))
def language_eval(dataset, preds, preds_n, eval_kwargs, split): model_id = eval_kwargs['id'] eval_oracle = eval_kwargs.get('eval_oracle', 0) import sys sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap # encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', '.cache_' + model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] mean_perplexity = sum([_['perplexity'] for _ in preds_filt]) / len(preds_filt) mean_entropy = sum([_['entropy'] for _ in preds_filt]) / len(preds_filt) print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # Add mean perplexity out['perplexity'] = mean_perplexity out['entropy'] = mean_entropy imgToEval = cocoEval.imgToEval for k in list(imgToEval.values())[0]['SPICE'].keys(): if k != 'All': out['SPICE_' + k] = np.array( [v['SPICE'][k]['f'] for v in imgToEval.values()]) out['SPICE_' + k] = (out['SPICE_' + k][out['SPICE_' + k] == out['SPICE_' + k]]).mean() for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption if len(preds_n) > 0: cache_path_n = os.path.join( 'eval_results/', '.cache_' + model_id + '_' + split + '_n.json') spice_n = eval_multi.eval_spice_n(preds_n, model_id, split) out.update(spice_n['overall']) div_stats = eval_multi.eval_div_stats(preds_n, model_id, split) out.update(div_stats['overall']) if eval_oracle: oracle = eval_multi.eval_oracle(preds_n, model_id, split) out.update(oracle['overall']) with open(cache_path_n, 'w') as outfile: json.dump( { 'spice_n': spice_n, 'div_stats': div_stats, 'oracle': oracle }, outfile) out['bad_count_rate'] = sum([count_bad(_['caption']) for _ in preds_filt]) / float(len(preds_filt)) outfile_path = os.path.join('eval_results/', model_id + '_' + split + '.json') with open(outfile_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
annFile='./annotations/captions_val2014.json' # create coco object and cocoRes object coco = COCO(annFile) all_results_json=[] for i in xrange(50): resFile=model_dir+'/caption_model%d.json'%i print resFile cocoRes = coco.loadRes(resFile) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set #cocoEval.params['image_id'] = cocoRes.getImgIds() #evaluate results cocoEval.evaluate() # print output evaluation scores results={} for metric, score in cocoEval.eval.items(): results[metric]=score all_results_json.append(results)
def language_eval(dataset, preds, model_id, split): import sys sys.path.append("coco-caption") if 'coco' in dataset: annFile = 'coco-caption/annotations/captions_val2014.json' elif 'flickr30k' in dataset or 'f30k' in dataset: annFile = 'coco-caption/f30k_captions4eval.json' elif 'person' in dataset: annFile='coco-caption/person_captions4eval.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap # encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', '.cache_'+ model_id + '_' + split + '.json') best_cider=0 #gdindex=[0,1,2,3,4] gdindex=[-1] cider_list =[] for i in gdindex: annFile='coco-caption/person_captions4eval_'+str(i)+'.json' print(annFile) coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() cider_list.append(cocoEval.eval['CIDEr']) # create output dictionary if cocoEval.eval['CIDEr']>=best_cider: best_cider = cocoEval.eval['CIDEr'] out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval # collect SPICE_sub_score #for k in imgToEval.values()[0]['SPICE'].keys(): # if k != 'All': # out['SPICE_'+k] = np.array([v['SPICE'][k]['f'] for v in imgToEval.values()]) # out['SPICE_'+k] = (out['SPICE_'+k][out['SPICE_'+k]==out['SPICE_'+k]]).mean() for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption #update predictions for i in range(len(preds)): if preds[i]['image_id'] in imgToEval: preds[i]['eval'] = imgToEval[preds[i]['image_id']] out['bad_count_rate'] = sum([count_bad(_['caption']) for _ in preds_filt]) / float(len(preds_filt)) else: continue outfile_path = os.path.join('eval_results/', model_id + '_' + split + '.json') with open(outfile_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) cider_list=np.array(cider_list) print("min:",np.min(cider_list)," max:",np.max(cider_list)," mean:",np.mean(cider_list)," std:",np.std(cider_list)) return out
import os from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap coco = COCO( os.path.expanduser( "~/Projects/datasets/COCO/annotations/captions_val2017.json")) cocoRes = coco.loadRes( os.path.expanduser("~/Projects/datasets/COCO/results_3.json")) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate()
def generation_experiment(self, strategy, max_batch_size=1000): # Compute image descriptors. print 'Computing image descriptors' self.compute_descriptors() do_batches = (strategy['type'] == 'beam' and strategy['beam_size'] == 1) or \ (strategy['type'] == 'sample' and ('temp' not in strategy or strategy['temp'] in (1, float('inf'))) and ('num' not in strategy or strategy['num'] == 1)) num_images = len(self.images) batch_size = min(max_batch_size, num_images) if do_batches else 1 # Generate captions for all images. all_captions = [None] * num_images for image_index in xrange(0, num_images, batch_size): batch_end_index = min(image_index + batch_size, num_images) sys.stdout.write("\rGenerating captions for image %d/%d" % (image_index, num_images)) sys.stdout.flush() if do_batches: if strategy['type'] == 'beam' or \ ('temp' in strategy and strategy['temp'] == float('inf')): temp = float('inf') else: temp = strategy['temp'] if 'temp' in strategy else 1 output_captions, output_probs = self.captioner.sample_captions( self.descriptors[image_index:batch_end_index], temp=temp) for batch_index, output in zip(range(image_index, batch_end_index), output_captions): all_captions[batch_index] = output else: for batch_image_index in xrange(image_index, batch_end_index): captions, caption_probs = self.captioner.predict_caption( self.descriptors[batch_image_index], strategy=strategy) best_caption, max_log_prob = None, None for caption, probs in zip(captions, caption_probs): log_prob = gen_stats(probs)['log_p'] if best_caption is None or \ (best_caption is not None and log_prob > max_log_prob): best_caption, max_log_prob = caption, log_prob all_captions[batch_image_index] = best_caption sys.stdout.write('\n') # Compute the number of reference files as the maximum number of ground # truth captions of any image in the dataset. num_reference_files = 0 for captions in self.dataset.values(): if len(captions) > num_reference_files: num_reference_files = len(captions) if num_reference_files <= 0: raise Exception('No reference captions.') # Collect model/reference captions, formatting the model's captions and # each set of reference captions as a list of len(self.images) strings. exp_dir = '%s/generation' % self.cache_dir if not os.path.exists(exp_dir): os.makedirs(exp_dir) # For each image, write out the highest probability caption. model_captions = [''] * len(self.images) reference_captions = [([''] * len(self.images)) for _ in xrange(num_reference_files)] for image_index, image in enumerate(self.images): caption = self.captioner.sentence(all_captions[image_index]) model_captions[image_index] = caption for reference_index, (_, caption) in enumerate(self.dataset[image]): caption = ' '.join(caption) reference_captions[reference_index][image_index] = caption coco_image_ids = [self.sg.image_path_to_id[image_path] for image_path in self.images] generation_result = [{ 'image_id': self.sg.image_path_to_id[image_path], 'caption': model_captions[image_index] } for (image_index, image_path) in enumerate(self.images)] json_filename = '%s/generation_result.json' % self.cache_dir print 'Dumping result to file: %s' % json_filename with open(json_filename, 'w') as json_file: json.dump(generation_result, json_file) generation_result = self.sg.coco.loadRes(json_filename) coco_evaluator = COCOEvalCap(self.sg.coco, generation_result) coco_evaluator.params['image_id'] = coco_image_ids coco_evaluator.evaluate()
def language_eval(type, preds, model_id, split): import sys if 'coco' in type: annFile = 'coco-caption/annotations/captions_val2014.json' sys.path.append("coco-caption") print("Load reference file from: {}".format(annFile)) from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap elif '30k' in type: annFile = 'coco-caption/annotations/flickr30k_val.json' sys.path.append("coco-caption") print("Load reference file from: {}".format(annFile)) from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap elif 'zh' in type: annFile = 'data/aic_i2t/eval_reference.json' sys.path.append("AI_Challenger/Evaluation/caption_eval") print("Load reference file from: {}".format(annFile)) from coco_caption.pycxtools.coco import COCO from coco_caption.pycxevalcap.eval import COCOEvalCap else: raise Exception('Current eval type is not recognizable.') encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', type + '_' + model_id + '_' + split + '.json') print("Load cache path is:" + cache_path) coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) if 'coco' in type: preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open( cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... elif '30k' in type: preds_filt = [{ 'caption': p['caption'], 'image_id': str(p['image_id']) } for p in preds if p['image_id'] in valids] else: json.dump(preds, open( cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() print(len(set(cocoRes.getImgIds()) & set(coco.getImgIds()))) cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval # for p in preds: # image_id, caption = p['image_id'], p['caption'] # imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out