def evaluate_on_coco_caption(res_file, label_file, outfile=None): """ res_tsv: TSV file, each row is [image_key, json format list of captions]. Each caption is a dict, with fields "caption", "conf". label_file: JSON file of ground truth captions in COCO format. """ assert label_file.endswith('.json') if res_file.endswith('.tsv'): res_file_coco = op.splitext(res_file)[0] + '_coco_format.json' convert_tsv_to_coco_format(res_file, res_file_coco) else: raise ValueError( 'unknown prediction result file format: {}'.format(res_file)) coco = COCO(label_file) cocoRes = coco.loadRes(res_file_coco) cocoEval = COCOEvalCap(coco, cocoRes, 'corpus') # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate() result = cocoEval.eval if not outfile: print(result) else: with open(outfile, 'w') as fp: json.dump(result, fp, indent=4) return result
def evaluate_metrics_from_files(pred_file: Union[Path, str], ref_file: Union[Path, str]) \ -> Tuple[Dict[str, float], Dict[int, Dict[str, float]]]: """ Evaluate the translation metrics from annotation files with the coco lib Follows the example in the repo. :param pred_file: File with predicted captions :type pred_file: Path | str :param ref_file: File with reference captions :type ref_file: Path | str :return: Tuple with metrics for the whole dataset and per-file metrics :rtype: tuple[dict[str, float], dict[int, dict[str, float]]] """ # Load annotations from files coco = COCO(str(ref_file)) cocoRes = coco.loadRes(str(pred_file)) # Create evaluation object and evaluate metrics cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['audio_id'] = cocoRes.getAudioIds() cocoEval.evaluate() # Make dict from metrics metrics = dict( (m, s) for m, s in cocoEval.eval.items() ) return metrics, cocoEval.audioToEval
def evaluate(self, count): samples = [] samples_index = [] image_feature, image_id, test_annotation = self.dataset.get_test_for_eval() num_samples = self.dataset.num_test_images samples_index = np.full([self.batch_size*(num_samples//self.batch_size), self.max_words], self.NOT) for i in range(num_samples//self.batch_size): image_feature_test = image_feature[i*self.batch_size:(i+1)*self.batch_size] feed_dict = {self.images: image_feature_test} predict_words = self.sess.run(self.predict_words_argmax, feed_dict) for j in range(self.batch_size): samples.append([self.dataset.decode(predict_words[j, :], type='string', remove_END=True)[0]]) sample_index = self.dataset.decode(predict_words[j, :], type='index', remove_END=False)[0] samples_index[i*self.batch_size+j][:len(sample_index)] = sample_index # predict from samples samples = np.asarray(samples) samples_index = np.asarray(samples_index) print '[%] Sentence:', samples[0] meteor_pd = {} meteor_id = [] for j in range(len(samples)): if image_id[j] == 0: break meteor_pd[str(int(image_id[j]))] = [{'image_id':str(int(image_id[j])), 'caption':samples[j][0]}] meteor_id.append(str(int(image_id[j]))) scorer = COCOEvalCap(test_annotation, meteor_pd, meteor_id) scorer.evaluate(verbose=True) sample_dir = os.path.join("./SeqGAN_samples_sample", self.model_name) if not os.path.exists(sample_dir): os.makedirs(sample_dir) file_name = "%s_%s" % (self.dataset.dataset_name, str(count)) np.savez(os.path.join(sample_dir, file_name), string=samples, index=samples_index, id=meteor_id)
def get_metric(args_dict, results_file, ann_file): coco = COCO(ann_file) cocoRes = coco.loadRes(results_file) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.evaluate() return cocoEval.eval[args_dict.es_metric]
def evaluate(config, corpus, data_loader, decoder, search_method): total_vids = [] total_pd_captions = [] pd_vid_caption_dict = defaultdict(lambda: []) for batch in iter(data_loader): vids, encoder_outputs = batch encoder_outputs = encoder_outputs.to(C.device) input = torch.LongTensor([[ corpus.vocab.word2idx['<SOS>'] for _ in range(config.batch_size) ]]) input = input.to(C.device) if config.decoder_model == "LSTM": hidden = ( torch.zeros(config.decoder_n_layers, config.batch_size, config.decoder_hidden_size).to(C.device), torch.zeros(config.decoder_n_layers, config.batch_size, config.decoder_hidden_size).to(C.device), ) else: hidden = torch.zeros(config.decoder_n_layers, config.batch_size, config.decoder_hidden_size) hidden = hidden.to(C.device) if isinstance(search_method, str) and search_method == "greedy": output_indices = greedy_search(config, decoder, input, hidden, encoder_outputs) elif isinstance(search_method, tuple) and search_method[0] == "beam": beam_width = search_method[1] output_indices = beam_search(config, beam_width, corpus.vocab, decoder, input, hidden, encoder_outputs) output_indices = np.asarray(output_indices) output_indices = output_indices.T else: raise NotImplementedError("Unknown search method: {}".format( config.search_method)) total_vids += vids total_pd_captions += convert_idxs_to_sentences( output_indices, corpus.vocab.idx2word, corpus.vocab.word2idx['<EOS>']) total_vids = total_vids[:config.n_test] total_pd_captions = total_pd_captions[:config.n_test] with open("predictions.txt", 'w') as fout: for vid, caption in zip(total_vids, total_pd_captions): fout.write("{}\t\t{}\n".format(vid, caption)) for vid, caption in zip(total_vids, total_pd_captions): pd_vid_caption_dict[vid].append(caption) gts = COCOMSVD(corpus.test_dataset.video_caption_pairs) res = load_res(pd_vid_caption_dict) cocoEval = COCOEvalCap(gts, res) cocoEval.params['image_id'] = gts.getImgIds() cocoEval.evaluate() return cocoEval.eval
def eval_coco_metrics(results_file): coco = COCO(ANNOTATION_FILE_PATH) cocoRes = coco.loadRes(results_file) cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params["image_id"] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate()
def coco_eval_specific(results,eval_caption_path,entry_limit=500): eval_json_output_dir = './coco/results/' os.makedirs(eval_json_output_dir,exist_ok=True) resFile = eval_json_output_dir + 'captions-generate.json' json.dump(results,open(resFile,'w')) annFile = eval_caption_path coco = COCO(annFile) cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco,cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() ans = [{'img_id':eva['image_id'],'CIDEr':eva['CIDEr']} for eva in cocoEval.evalImgs] os.makedirs('./Data/Eval_Statics/',exist_ok=True) with open("./Data/Eval_Statics/CIDEr_Result.txt",'w') as f: entry = "img_id" + " " + "CIDEr" + "\n" f.writelines(entry) entry_num = 0 for ans_entry in ans: entry = str(ans_entry['img_id']) + " " + str(np.round(ans_entry['CIDEr'],2)) + "\n" f.writelines(entry) entry_num += 1 if entry_num >= entry_limit: break cider_list = [eva['CIDEr'] for eva in cocoEval.evalImgs] cider_list_npy = np.array(cider_list) indices = np.argsort(cider_list_npy)[::-1] f.writelines('best samples:\n') for idx in indices[:50]: entry = str(ans[idx]['img_id']) + " " + str(np.round(ans[idx]['CIDEr'],2)) + "\n" f.writelines(entry) indices = indices[::-1] f.writelines('worst samples:\n') for idx in indices[:50]: entry = str(ans[idx]['img_id']) + " " + str(np.round(ans[idx]['CIDEr'],2)) + "\n" f.writelines(entry) f.close() ciderScores = [eva['CIDEr'] for eva in cocoEval.evalImgs] x = plt.hist(ciderScores,bins=[0,1,2,3,4,5,6,7,8,9,10]) print(x) plt.title('Histogram of CIDEr Scores', fontsize=20) plt.xlabel('CIDEr score', fontsize=20) plt.ylabel('result counts', fontsize=20) plt.savefig('ciderHist.png',dpi=500) plt.show()
def language_eval(preds, test_coco_ids, cache_path): import sys sys.path.insert(0, "coco_caption") # generate target file annFile = transform_annos(test_coco_ids) from coco_caption.pycocotools.coco import COCO from coco_caption.pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) #preds_filt = [p for p in preds if p['image_id'] in valids] preds_filt = [] image_id_filt = [] for p in preds: if p['image_id'] in valids and p['image_id'] not in image_id_filt: preds_filt.append(p) image_id_filt.append(p['image_id']) print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def language_eval(preds, model_id, split): annFile = 'coco_caption/annotations/captions_val2014.json' from coco_caption.pycocotools.coco import COCO from coco_caption.pycocoevalcap.eval import COCOEvalCap # encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print(len(preds_filt)) print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def coco_eval(results,eval_caption_path): eval_json_output_dir = './coco_caption/results/' os.makedirs(eval_json_output_dir,exist_ok=True) resFile = eval_json_output_dir + 'captions-generate.json' json.dump(results,open(resFile,'w')) annFile = eval_caption_path coco = COCO(annFile) cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco,cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() cider = 0 print('---------------Evaluation performance-----------------') for metric,score in cocoEval.eval.items(): print('%s: %.3f'%(metric,score)) if metric == 'CIDEr': cider = score return cider
def coco_metrics(generated_captions_fn, annotations_dir, split): # Read generated captions # resFile = '/home/plz563/projects/syncap/experiments/coco_karpathy/butd/results_best_beam_5_test.json' # annotations_dir = '/home/plz563/data/coco2014/captions/annotations_trainval2014' ann_fn = "{}/annotations/captions_{}.json".format(annotations_dir, split) coco = COCO(ann_fn) cocoRes = coco.loadRes(generated_captions_fn) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate() # print output evaluation scores for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, 100 * score)) return cocoEval.eval
#!/usr/bin/python from pycocotools.coco import COCO from coco_caption.pycocoevalcap.eval import COCOEvalCap import sys # create coco object and cocoRes object coco = COCO(sys.argv[1]) cocoRes = coco.loadRes(sys.argv[2]) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results cocoEval.evaluate() # print output evaluation scores for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, score)) for key, value in cocoEval.imgToEval.iteritems(): print(key, value)