def coco_metric(input_sentence, path_anna, tmp_file=None): coco_set = COCO(path_anna) imgid_set = coco_set.getImgIds() if tmp_file is None: encoder.FLOAT_REPR = lambda o: format(o, '.3f') random.seed(time.time()) tmp_file = ''.join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6)) pred_set = [ prediction for prediction in input_sentence if prediction['image_id'] in imgid_set ] print('using %d/%d predictions' % (len(pred_set), len(input_sentence))) ensure_dir('cache/') with open('cache/' + tmp_file + '.json', 'w') as f: json.dump(pred_set, f) result = 'cache/' + tmp_file + '.json' cocoRes = coco_set.loadRes(result) cocoEval = COCOEvalCap(coco_set, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # delete the temp file os.system('rm ' + 'cache/' + tmp_file + '.json') # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score return out, pred_set
def coco_eval(candidates_file, references_file): """ Given the candidates and references, the coco-caption module is used to calculate various metrics. Returns a list of dictionaries containing: -BLEU -ROUGE -METEOR -CIDEr """ # This is used to suppress the output of coco-eval: old_stdout = sys.stdout sys.stdout = open(os.devnull, "w") try: # Derived from example code in coco-captions repo coco = COCO(references_file) cocoRes = coco.loadRes(candidates_file) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.evaluate() finally: # Change back to standard output sys.stdout.close() sys.stdout = old_stdout return cocoEval.evalImgs
def main(_): with open(FLAGS.keyword_pickle_file,'r')as f: keyword_data=cPickle.load(f) with open(FLAGS.test_json_path)as f: test_json=json.load(f) id_to_filename=test_json['images'] id_to_path=[{'path':os.path.join(FLAGS.image_path,x['file_name']),'id':x['id']}for x in id_to_filename] result_json=[] g=tf.Graph() with g.as_default(): model=inference_wrapper.InferenceWrapper() restore_fn=model.build_graph_from_config(configuration.ModelConfig(),FLAGS.checkpoint_path) g.finalize() vocab=vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g)as sess: restore_fn(sess) generator=caption_generator.CaptionGenerator(model,vocab) for data in id_to_path: filename=data['path'] with tf.gfile.GFile(filename,"r")as f: image=f.read() captions=generator.beam_search(sess,image,keyword_data[os.path.basename(filename)]) print("Captions for image %s:"%os.path.basename(filename)) result={'image_id':data['id'],'caption':(" ".join([vocab.id_to_word(w)for w in captions[0].sentence[1:-1]])).decode('utf-8')} print(result) result_json.append(result) with open(os.path.join(FLAGS.temp_path,"result.json"),'w')as f: json.dump(result_json,f) coco=COCO(FLAGS.test_json_path) cocoRes=coco.loadRes(os.path.join(FLAGS.temp_path,"result.json")) cocoEval=COCOEvalCap(coco,cocoRes) cocoEval.evaluate()
def language_eval(dataset, preds): import sys if 'coco' in dataset: sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' else: sys.path.append("f30k-caption") annFile = 'f30k-caption/annotations/dataset_flickr30k.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print 'using %d/%d predictions' % (len(preds_filt), len(preds)) json.dump(preds_filt, open('tmp.json', 'w')) # serialize to temporary json file. Sigh, COCO API... resFile = 'tmp.json' cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score return out
def main(argv): input_json = 'results/' + sys.argv[1] annFile = 'annotations/captions_val2014.json' coco = COCO(annFile) valids = coco.getImgIds() checkpoint = json.load(open(input_json, 'r')) preds = checkpoint['val_predictions'] # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print 'using %d/%d predictions' % (len(preds_filt), len(preds)) json.dump(preds_filt, open('tmp.json', 'w')) # serialize to temporary json file. Sigh, COCO API... resFile = 'tmp.json' cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # serialize to file, to be read from Lua json.dump(out, open(input_json + '_out.json', 'w'))
def scores(ann_file=None, res_file=None): encoder.FLOAT_REPR = lambda o: format(o, '.3f') # set up file names and pathes dataDir = '.' dataType = 'val2014' algName = 'fakecap' annFile = '%s/annotations/captions_%s_coco.json' % (dataDir, dataType) subtypes = ['results', 'evalImgs', 'eval'] [resFile, evalImgsFile, evalFile]= \ ['%s/results/captions_%s_%s_%s.json'%(dataDir,dataType,algName,subtype) for subtype in subtypes] if res_file is not None: resFile = res_file # create coco object and cocoRes object coco = COCO(annFile) cocoRes = coco.loadRes(resFile) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results cocoEval.evaluate() return cocoEval.eval
def language_scores(preds, model_id, log_dir, annFile, split="val"): result_path = os.path.join( log_dir, 'cococap_scores_' + model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(result_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(result_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(result_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def language_eval(dataset, preds, model_id, split): from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap ann_file = 'coco-caption/annotations/captions_val2014.json' encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('log/eval_results'): os.mkdir('log/eval_results') cache_path = os.path.join('log/eval_results/', model_id + '_' + split + '.json') coco = COCO(ann_file) valid = coco.getImgIds() json.dump(preds, open(cache_path, 'w')) # serialize to temporary json file coco_res = coco.loadRes(cache_path) coco_eval = COCOEvalCap(coco, coco_res) coco_eval.params['image_id'] = coco_res.getImgIds() print(len(set(coco_res.getImgIds()) & set(coco.getImgIds()))) coco_eval.evaluate() # create output dictionary out = {} for metric, score in coco_eval.eval.items(): out[metric] = score img_to_eval = coco_eval.imgToEval # for p in preds: # image_id, caption = p['image_id'], p['caption'] # img_to_eval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'img_to_eval': img_to_eval}, outfile) return out
def main(): HASH_IMG_NAME = True pylab.rcParams['figure.figsize'] = (10.0, 8.0) json.encoder.FLOAT_REPR = lambda o: format(o, '.3f') parser = argparse.ArgumentParser() parser.add_argument("-i", "--inputfile", type=str, required=True, help='File containing model-generated/hypothesis sentences.') parser.add_argument("-r", "--references", type=str, required=True, help='JSON File containing references/groundtruth sentences.') args = parser.parse_args() prediction_file = args.inputfile reference_file = args.references json_predictions_file = '{0}.json'.format(prediction_file) crf = CocoResFormat() crf.read_file(prediction_file, HASH_IMG_NAME) crf.dump_json(json_predictions_file) # create coco object and cocoRes object. coco = COCO(reference_file) cocoRes = coco.loadRes(json_predictions_file) # create cocoEval object. cocoEval = COCOEvalCap(coco, cocoRes) # evaluate results cocoEval.evaluate() # print output evaluation scores for metric, score in cocoEval.eval.items(): print '%s: %.3f'%(metric, score)
def calcScore(prediction_file): HASH_IMG_NAME = True pylab.rcParams['figure.figsize'] = (10.0, 8.0) json.encoder.FLOAT_REPR = lambda o: format(o, '.3f') with open('data/gentmp.txt','w+') as f: f.write(prediction_file) prediction_file ='data/gentmp.txt' reference_file = 'data/ref_dev.json' json_predictions_file = '{0}.json'.format(prediction_file) crf = CocoResFormat() crf.read_file(prediction_file, HASH_IMG_NAME) crf.dump_json(json_predictions_file) # create coco object and cocoRes object. coco = COCO(reference_file) cocoRes = coco.loadRes(json_predictions_file) # create cocoEval object. cocoEval = COCOEvalCap(coco, cocoRes) # evaluate results cocoEval.evaluate() # print output evaluation scores scores=[] for _,i in cocoEval.eval.items(): scores.append(float(i)) return scores
def evaluate(self, resDir, display=False): [resFile, evalImgsFile, evalFile]= ['%s/captions_%s_%s.json'%(resDir, self.dataType, subtype) \ for subtype in self.subtypes] cocoRes = self.coco.loadRes(resFile) cocoEval = COCOEvalCap(self.coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # demo how to use evalImgs to retrieve low score result indexes = np.argsort([eva['CIDEr'] for eva in cocoEval.evalImgs], axis=0)[:5] imgIds = [cocoEval.evalImgs[index]['image_id'] for index in indexes] if display: for i, imgId in zip(indexes, imgIds): print 'image Id %d, CIDEr score %f' % ( imgId, cocoEval.evalImgs[i]['CIDEr']) annIds = self.coco.getAnnIds(imgIds=imgId) anns = self.coco.loadAnns(annIds) self.coco.showAnns(anns) print " " annIds = cocoRes.getAnnIds(imgIds=imgId) anns = cocoRes.loadAnns(annIds) cocoRes.showAnns(anns) os.system('display ' + self.transform(imgId)) raw_input("Press Enter to continue...") # save evaluation results to ./results folder json.dump(cocoEval.evalImgs, open(evalImgsFile, 'w')) json.dump(cocoEval.eval, open(evalFile, 'w')) return cocoEval.eval
def coco_eval(ann_fn, json_fn, save_fn): coco = COCO(ann_fn) coco_res = coco.loadRes(json_fn) coco_evaluator = COCOEvalCap(coco, coco_res) # comment below line to evaluate the full validation or testing set. coco_evaluator.params['image_id'] = coco_res.getImgIds() coco_evaluator.evaluate(save_fn)
def language_eval(dataset, preds, split): annFile = '/home/zy/mscoco/annotations/captions_val2014.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/' + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def evaluate(json_path, images, captions, flag=True): import sys sys.path.append("coco-caption") annFile = 'data/annotations/captions_val2014.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap fw = open(json_path, 'w') fw.write('[') for idx, image_name in enumerate(images): image_id = int(image_name.split('_')[2][:-4]) caption = captions[idx] caption = caption.replace(' ,', ',').replace('"', '').replace('\n', '') if idx != len(images) - 1: fw.write('{"image_id": ' + str(image_id) + ', "caption": "' + str(caption) + '"}, ') else: fw.write('{"image_id": ' + str(image_id) + ', "caption": "' + str(caption) + '"}]') fw.close() # coco evaluation coco = COCO(annFile) cocoRes = coco.loadRes(json_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() return cocoEval.eval['Bleu_1'], cocoEval.eval['Bleu_2'], cocoEval.eval['Bleu_3'], cocoEval.eval['Bleu_4'], \ cocoEval.eval['CIDEr'], cocoEval.eval['METEOR'], cocoEval.eval['ROUGE_L'], cocoEval.eval['SPICE']
def eval(ann_file, res_file, return_imgscores=False): coco = COCO(ann_file) cocoRes = coco.loadRes(res_file) # create cocoEval object by taking coco and cocoRes # cocoEval = COCOEvalCap(coco, cocoRes) cocoEval = COCOEvalCap(coco, cocoRes, use_scorers=['Bleu', 'METEOR', 'ROUGE_L', 'CIDEr']) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate() all_score = {} # print output evaluation scores for metric, score in cocoEval.eval.items(): # print('%s: %.4f' % (metric, score)) all_score[metric] = score img_scores = [cocoEval.imgToEval[key] for key in cocoEval.imgToEval.keys()] if return_imgscores: return all_score, img_scores else: return all_score
def language_eval(preds): import sys sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') cache_path = 'eval/test.json' if not os.path.exists(os.path.dirname(cache_path)): os.makedirs(os.path.dirname(cache_path)) coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score return out
def coco_eval(candidates_file, references_file): """ Given the candidates and references, the coco-caption module is used to calculate various metrics. Returns a list of dictionaries containing: -BLEU -ROUGE -METEOR -CIDEr """ # This is used to suppress the output of coco-eval: old_stdout = sys.stdout sys.stdout = open(os.devnull, "w") try: # Derived from example code in coco-captions repo coco = COCO( references_file ) cocoRes = coco.loadRes( candidates_file ) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.evaluate() finally: # Change back to standard output sys.stdout.close() sys.stdout = old_stdout return cocoEval.evalImgs
def evaluate(): coco_dataset = CocoCaptions( root="../data/train2014/train2014", annFile= train_ann_file ) capgen = CaptionGenerator(coco_dataset, k=k, early_stop=train_early_stop, load_knn=load_knn) # evaluate val_dataset = CocoCaptions( root="../data/val2014/val2014", annFile=valid_ann_file ) best_captions, cap_map = capgen.evaluate(val_dataset, early_stop=val_early_stop) # best_captions = list(dict('image_id':img_id, 'caption':'caption')) with open(res_file, 'w') as f: json.dump(best_captions, f) # evaluate best captions against gt coco_result = capgen.coco.coco.loadRes(res_file) cocoEval = COCOEvalCap(capgen.coco.coco, coco_result) cocoEval.params['image_id'] = coco_result.getImgIds() cocoEval.evaluate() indices = ["BLEU 1-gram", "BLEU 2-gram", "BLEU 3-gram", "BLEU 4-gram", "METEOR", "ROUGE_L", "CIDEr", "SPICE"] data = [cocoEval.eval['Bleu_1']] + [cocoEval.eval['Bleu_2']] + [cocoEval.eval['Bleu_3']] + [cocoEval.eval['Bleu_4']] + \ [cocoEval.eval['METEOR']] + [cocoEval.eval['ROUGE_L']] + [cocoEval.eval['CIDEr']] + [cocoEval.eval['SPICE']] results = pd.DataFrame(columns=[f"k={k}_Train_num={train_early_stop}_Val_num={val_early_stop}"], index=indices, data=data) results.to_excel(out_file) print(f"Results saved to {out_file}")
def language_eval(input_data, savedir, split): if type(input_data) == str: # Filename given. checkpoint = json.load(open(input_data, 'r')) preds = checkpoint elif type(input_data) == list: # Direct predictions give. preds = input_data annFile = 'third_party/coco-caption/annotations/captions_val2014.json' coco = COCO(annFile) valids = coco.getImgIds() # Filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print 'Using %d/%d predictions' % (len(preds_filt), len(preds)) resFile = osp.join(savedir, 'result_%s.json' % (split)) json.dump(preds_filt, open(resFile, 'w')) # Serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # Create output dictionary. out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # Return aggregate and per image score. return out, cocoEval.evalImgs
def language_eval(type, preds, model_id, split): import sys if type == 'en': sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap elif type == 'en_30K': sys.path.append("coco-caption") annFile = 'coco-caption/annotations/flickr30k_val.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap else: sys.path.append("AI_Challenger/Evaluation/caption_eval") annFile = 'data/ai_challenger/image_captioning/eval_reference.json' from coco_caption.pycxtools.coco import COCO from coco_caption.pycxevalcap.eval import COCOEvalCap #encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) if type == 'en': preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open( cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... else: json.dump(preds, open( cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval if type == 'en': for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def language_eval(dataset, preds, model_id, split, eval_chair): import sys sys.path.append("coco-caption") if 'coco' == dataset: annFile = 'coco-caption/annotations/captions_val2014.json' elif 'flickr' == dataset: annFile = 'coco-caption/annotations/caption_flickr30k.json' elif 'robust-coco' == dataset: annFile = 'coco-caption/annotations/captions_robust_val_test.json' else: raise from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap # encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', '.cache_' + model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) # pdb.set_trace() preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption out['bad_count_rate'] = sum([count_bad(_['caption']) for _ in preds_filt]) / float(len(preds_filt)) outfile_path = os.path.join('eval_results/', model_id + '_' + split + '.json') with open(outfile_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) if eval_chair: return out, outfile_path else: return out
def evaluate(self): """Evaluate the model for one epoch using the provided parameters. Return the epoch's average CIDEr score.""" # Switch to validation mode self.encoder.eval() self.decoder.eval() cocoRes = COCO() anns = [] # Disable gradient calculation because we are in inference mode with torch.no_grad(): pbar = tqdm(self.val_loader) pbar.set_description('evaluating epoch {}'.format(self.epoch)); for batch in pbar: images, img_id = batch[0], batch[3] # Move to GPU if CUDA is available if torch.cuda.is_available(): images = images.cuda() # Pass the inputs through the CNN-RNN model features = encoder(images).unsqueeze(1) for i in range(img_id.size()[0]): slice = features[i].unsqueeze(0) outputs = decoder.sample_beam_search(slice) sentence = self.clean_sentence(outputs[0]) id = img_id[i].item() #print('id: {}, cap: {}'.format(id, sentence)) anns.append({'image_id': id, 'caption': sentence}) for id, ann in enumerate(anns): ann['id'] = id cocoRes.dataset['annotations'] = anns cocoRes.createIndex() cocoEval = COCOEvalCap(self.val_loader.coco_dataset.coco, cocoRes) imgIds = set([ann['image_id'] for ann in cocoRes.dataset['annotations']]) cocoEval.params['image_id'] = imgIds cocoEval.evaluate() cider = cocoEval.eval['CIDEr'] old_max = 0 if len(self.cider) > 0: old_max = max(self.cider) if len(self.cider) < self.epoch: self.cider.append(cider) else: self.cider[self.epoch-1] = cider self.save() print("DEBUG: self.epoch: {}, self.cider: {}".format(self.epoch, self.cider)) if cider > old_max: print('CIDEr improved: {:.2f} => {:.2f}'.format(old_max, cider)) self.save_as(os.path.join("./models", "best-model.pkl")) return self.cider[self.epoch-1]
def main(args): coco = COCO(args.annotations_path) coco_res = coco.loadRes(args.results_json_path) coco_eval = COCOEvalCap(coco, coco_res) coco_eval.params['image_id'] = coco_res.getImgIds() coco_eval.evaluate()
def evaluateModel(model_json): cocoRes = coco.loadRes(model_json) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() results = {} for metric, score in cocoEval.eval.items(): results[metric] = score return results
def score_generation(anno_file, result_file): coco = COCO(anno_file) coco_res = coco.loadRes(result_file) coco_eval = COCOEvalCap(coco, coco_res) coco_eval.params['image_id'] = coco_res.getImgIds() coco_eval.evaluate() return copy.deepcopy(coco_eval.eval)
def language_eval(dataset, preds, model_id, split, scores_needed): import sys sys.path.append("coco-caption") if dataset == 'coco': annFile = 'coco-caption/annotations/captions_val2014.json' elif dataset == 'flickr30k': annFile = 'coco-caption/annotations/caption_flickr30k.json' elif dataset == 'cc': annFile = 'coco-caption/annotations/caption_cc_val.json' elif dataset== 'aic': annFile = 'coco-caption/annotations/caption_aic_val_2.json' #post-process zh for item in preds: #print(preds[id_]) item['caption'] = item['caption'].replace('</w>','').replace('。','').replace(' ','') #remove </w> 。 blank #re-cut item['caption'] = ' '.join(list(jieba.cut(item['caption'], cut_all=False))) #print(preds[id_]) #input() from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # valids = json.load(open('/mnt/dat/CC/annotations/cc_valid_jpgs.json')) # valids = {int(i[:-4]):int(i[:-4]) for i,j in valids.items()} # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) # print(preds_filt) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes, 'corpus') cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate(scores_needed) # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
class QuestionEvaluator(object): def __init__(self, annotation_file, question_file): self._filter = AnswerTypeManager(annotation_file) self._gt = VQAQuestion(question_file) self._types = self._filter.answer_types self._evaluator = None self._eval_metric = None self._scores = [] def evaluate(self, res_file): res = VQAQuestion(res_file) self._evaluator = COCOEvalCap(self._gt, res) quest_ids = res.getImgIds() self._types = ['all'] for ans_type in self._types: print('\n====== Evaluate type %s =======' % ans_type.upper()) self.evalute_subtype(ans_type, quest_ids) self._scores = np.array(self._scores).transpose() self.print_results() return self._scores, self._types, self._eval_metric def evalute_subtype(self, type, image_inds): inds = self._filter.filter_by_answer_type(type, image_inds) self._evaluator.params['image_id'] = inds # evaluate results self._evaluator.evaluate() if self._eval_metric is None: self._eval_metric = self._evaluator.eval.keys() scores = [] for metric in self._eval_metric: score = self._evaluator.eval[metric] scores.append(score) print '%s: %.3f' % (metric, score) self._scores.append(scores) def print_results(self): types = ' '.join(self._types) print('\t\t%s' % types) for metric, score in zip(self._eval_metric, self._scores): print('%s\t%s' % (metric, np.array_str(score, precision=3))) def save_results(self, eval_res_file='result/alg_eval_result.mat'): from scipy.io import savemat metric = np.array(self._eval_metric, dtype=np.object) types = np.array(self._types, dtype=np.object) savemat(eval_res_file, { 'metric': metric, 'question_type': types, 'score': self._scores }) def get_overall_cider(self): return float(self._scores[0][0]) def get_overall_blue4(self): return float(self._scores[2][0])
def score_generation_with_ids(anno_file, result_file, img_ids): coco = COCO(anno_file) coco_res = coco.loadRes(result_file) coco_eval = COCOEvalCap(coco, coco_res) filtered = set(coco_res.getImgIds()).intersection(set(img_ids)) coco_eval.params['image_id'] = list(filtered) coco_eval.evaluate() return copy.deepcopy(coco_eval.eval)
def eval(self, result): in_file = tempfile.NamedTemporaryFile(mode='w', delete=False, dir=cfg.TEMP_DIR) json.dump(result, in_file) in_file.close() cocoRes = self.coco.loadRes(in_file.name) cocoEval = COCOEvalCap(self.coco, cocoRes) cocoEval.evaluate() os.remove(in_file.name) return cocoEval.eval
def compare(self, resDir1, resDir2): [resFile1, evalImgsFile1, evalFile2]= ['%s/captions_%s_%s.json'%(resDir1, self.dataType, subtype) \ for subtype in self.subtypes] [resFile2, evalImgsFile2, evalFile2]= ['%s/captions_%s_%s.json'%(resDir2, self.dataType, subtype) \ for subtype in self.subtypes] cocoRes1 = self.coco.loadRes(resFile1) cocoEval1 = COCOEvalCap(self.coco, cocoRes1) cocoEval1.params['image_id'] = cocoRes1.getImgIds() cocoEval1.evaluate() cocoRes2 = self.coco.loadRes(resFile2) cocoEval2 = COCOEvalCap(self.coco, cocoRes2) cocoEval2.params['image_id'] = cocoRes2.getImgIds() cocoEval2.evaluate() cider_map1 = { eva['image_id']: eva['CIDEr'] for eva in cocoEval1.evalImgs } cider_map2 = { eva['image_id']: eva['CIDEr'] for eva in cocoEval2.evalImgs } counts, diff_map = [0, 0, 0], {} for k in cider_map1: diff_map[k] = cider_map1[k] - cider_map2[k] if diff_map[k] == 0: counts[0] += 1 elif diff_map[k] < 0: counts[1] += 1 else: counts[2] += 1 print "same score: %d, worse score: %d, better score: %d" % ( counts[0], counts[1], counts[2]) sorted_map = sorted(diff_map.items(), key=operator.itemgetter(1))[:50] for imgId, cider_diff in sorted_map: annIds = self.coco.getAnnIds(imgIds=imgId) anns = self.coco.loadAnns(annIds) self.coco.showAnns(anns) print " " annIds = cocoRes1.getAnnIds(imgIds=imgId) anns = cocoRes1.loadAnns(annIds) cocoRes1.showAnns(anns) print " " annIds = cocoRes2.getAnnIds(imgIds=imgId) anns = cocoRes2.loadAnns(annIds) cocoRes2.showAnns(anns) print " " print "imgid:%d cider1:%0.2f cider2:%0.2f" % ( imgId, cider_map1[imgId], cider_map2[imgId]) os.system('display ' + self.imgDir + 'COCO_val2014_' + '%.12d' % imgId + '.jpg') raw_input("Press Enter to continue...")
def eval_oracle(dataset, preds_n, model_id, split): cache_path = os.path.join('eval_results/', model_id + '_' + split + '_n.json') coco = getCOCO(dataset) valids = coco.getImgIds() capsById = {} for d in preds_n: capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d] sample_n = capsById[list(capsById.keys())[0]] for i in range(len(capsById[list(capsById.keys())[0]])): preds = [_[i] for _ in capsById.values()] json.dump(preds, open( cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() imgToEval = cocoEval.imgToEval for img_id in capsById.keys(): tmp = imgToEval[img_id] for k in tmp['SPICE'].keys(): if k != 'All': tmp['SPICE_' + k] = tmp['SPICE'][k]['f'] if tmp['SPICE_' + k] != tmp['SPICE_' + k]: # nan tmp['SPICE_' + k] = -100 tmp['SPICE'] = tmp['SPICE']['All']['f'] if tmp['SPICE'] != tmp['SPICE']: tmp['SPICE'] = -100 capsById[img_id][i]['scores'] = imgToEval[img_id] out = {'overall': {}, 'ImgToEval': {}} for img_id in capsById.keys(): out['ImgToEval'][img_id] = {} for metric in capsById[img_id][0]['scores'].keys(): if metric == 'image_id': continue out['ImgToEval'][img_id]['oracle_' + metric] = max( [_['scores'][metric] for _ in capsById[img_id]]) out['ImgToEval'][img_id]['avg_' + metric] = sum( [_['scores'][metric] for _ in capsById[img_id]]) / len(capsById[img_id]) out['ImgToEval'][img_id]['captions'] = capsById[img_id] for metric in list(out['ImgToEval'].values())[0].keys(): if metric == 'captions': continue tmp = np.array([_[metric] for _ in out['ImgToEval'].values()]) tmp = tmp[tmp != -100] out['overall'][metric] = tmp.mean() return out
def score_generation(self, json_filename, read_file=True): if read_file: generation_result = self.sg.coco.loadRes(json_filename) else: generation_result = json_filename coco_evaluator = COCOEvalCap(self.sg.coco, generation_result) coco_image_ids = [ self.sg.image_path_to_id[image_path] for image_path in self.images ] coco_evaluator.params['image_id'] = coco_image_ids coco_evaluator.evaluate()
def measure(prediction_txt_path, reference): # 把txt格式的预测结果转换成检验程序所要求的格式 crf = CocoResFormat() crf.read_file(prediction_txt_path, True) # crf.res就是格式转换之后的预测结果 cocoRes = reference.loadRes(crf.res) cocoEval = COCOEvalCap(reference, cocoRes) cocoEval.evaluate() for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, score)) return cocoEval.eval
def language_eval(dataset, preds, model_id, split): import sys if 'coco' in dataset: sys.path.append("coco-caption") annFile = 'coco-caption/annotations/captions_val2014.json' elif 'msvd' in dataset: sys.path.append('coco-caption') annFile = 'coco-caption/annotations/coco_ref_msvd.json' elif 'kuaishou' in dataset: sys.path.append('coco-caption') annFile = 'coco-caption/annotations/coco_ref_kuaishou.json' else: sys.path.append("f30k-caption") annFile = 'f30k-caption/annotations/dataset_flickr30k.json' from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def coco_val_eval(self, pred_path, result_path): """Evaluate the predicted sentences on MS COCO validation.""" sys.path.append('./external/coco-caption') from pycocotools.coco import COCO from pycocoevalcap.eval import COCOEvalCap coco = COCO('./external/coco-caption/annotations/captions_val2014.json') cocoRes = coco.loadRes(pred_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() with open(result_path, 'w') as fout: for metric, score in cocoEval.eval.items(): print('%s: %.3f' % (metric, score), file=fout)
def run(dataset,algName,outDir): pylab.rcParams['figure.figsize'] = (10.0, 8.0) import json from json import encoder encoder.FLOAT_REPR = lambda o: format(o, '.3f') # set up file names and pathes # dataDir='./data/'+dataset # dataDir= '/media/SSD/projects/NeuralTalkAnimator' dataType='val' # annFile='%s/annotations/captions_%s.json'%(dataDir,dataType) # annFile='/media/SSD/projects/NeuralTalkAnimator/data/youtube2text/captions_val2014.json' dataDir = 'data/'+dataset annFile='%s/captions_%s.json'%(dataDir,dataType) subtypes=['results', 'evalImgs', 'eval'] [resFile, evalImgsFile, evalFile]= \ ['%s/captions_%s_%s_%s.json'%(outDir,dataType,algName,subtype) for subtype in subtypes] coco = COCO(annFile) cocoRes = coco.loadRes(resFile) # create cocoEval object by taking coco and cocoRes cocoEval = COCOEvalCap(coco, cocoRes) # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results cocoEval.evaluate() # print output evaluation scores scores = list() for metric, score in cocoEval.eval.items(): print '%s: %.3f'%(metric, score) scores.append(score) print 'inside metrics' return scores
def score_generation(gt_filename=None, generation_result=None): coco_dict = read_json(generation_result) coco = COCO(gt_filename) generation_coco = coco.loadRes(generation_result) coco_evaluator = COCOEvalCap(coco, generation_coco) #coco_image_ids = [self.sg.image_path_to_id[image_path] # for image_path in self.images] coco_image_ids = [j['image_id'] for j in coco_dict] coco_evaluator.params['image_id'] = coco_image_ids results = coco_evaluator.evaluate(return_results=True) return results
def generation_experiment(self, strategy, max_batch_size=1000): # Compute image descriptors. print 'Computing image descriptors' self.compute_descriptors() do_batches = (strategy['type'] == 'beam' and strategy['beam_size'] == 1) or \ (strategy['type'] == 'sample' and ('temp' not in strategy or strategy['temp'] in (1, float('inf'))) and ('num' not in strategy or strategy['num'] == 1)) num_images = len(self.images) batch_size = min(max_batch_size, num_images) if do_batches else 1 # Generate captions for all images. all_captions = [None] * num_images for image_index in xrange(0, num_images, batch_size): batch_end_index = min(image_index + batch_size, num_images) sys.stdout.write("\rGenerating captions for image %d/%d" % (image_index, num_images)) sys.stdout.flush() if do_batches: if strategy['type'] == 'beam' or \ ('temp' in strategy and strategy['temp'] == float('inf')): temp = float('inf') else: temp = strategy['temp'] if 'temp' in strategy else 1 output_captions, output_probs = self.captioner.sample_captions( self.descriptors[image_index:batch_end_index], temp=temp) for batch_index, output in zip(range(image_index, batch_end_index), output_captions): all_captions[batch_index] = output else: for batch_image_index in xrange(image_index, batch_end_index): captions, caption_probs = self.captioner.predict_caption( self.descriptors[batch_image_index], strategy=strategy) best_caption, max_log_prob = None, None for caption, probs in zip(captions, caption_probs): log_prob = gen_stats(probs)['log_p'] if best_caption is None or \ (best_caption is not None and log_prob > max_log_prob): best_caption, max_log_prob = caption, log_prob all_captions[batch_image_index] = best_caption sys.stdout.write('\n') # Compute the number of reference files as the maximum number of ground # truth captions of any image in the dataset. num_reference_files = 0 for captions in self.dataset.values(): if len(captions) > num_reference_files: num_reference_files = len(captions) if num_reference_files <= 0: raise Exception('No reference captions.') # Collect model/reference captions, formatting the model's captions and # each set of reference captions as a list of len(self.images) strings. exp_dir = '%s/generation' % self.cache_dir if not os.path.exists(exp_dir): os.makedirs(exp_dir) # For each image, write out the highest probability caption. model_captions = [''] * len(self.images) reference_captions = [([''] * len(self.images)) for _ in xrange(num_reference_files)] for image_index, image in enumerate(self.images): caption = self.captioner.sentence(all_captions[image_index]) model_captions[image_index] = caption for reference_index, (_, caption) in enumerate(self.dataset[image]): caption = ' '.join(caption) reference_captions[reference_index][image_index] = caption coco_image_ids = [self.sg.image_path_to_id[image_path] for image_path in self.images] generation_result = [{ 'image_id': self.sg.image_path_to_id[image_path], 'caption': model_captions[image_index] } for (image_index, image_path) in enumerate(self.images)] json_filename = '%s/generation_result.json' % self.cache_dir print 'Dumping result to file: %s' % json_filename with open(json_filename, 'w') as json_file: json.dump(generation_result, json_file) generation_result = self.sg.coco.loadRes(json_filename) coco_evaluator = COCOEvalCap(self.sg.coco, generation_result) coco_evaluator.params['image_id'] = coco_image_ids coco_evaluator.evaluate()
import sys input_json = sys.argv[1] annFile = 'annotations/captions_val2014.json' coco = COCO(annFile) valids = coco.getImgIds() checkpoint = json.load(open(input_json, 'r')) preds = checkpoint['val_predictions'] # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] # preds_filt = preds print 'using %d/%d predictions' % (len(preds_filt), len(preds)) json.dump(preds_filt, open('tmp.json', 'w')) # serialize to temporary json file. Sigh, COCO API... resFile = 'tmp.json' cocoRes = coco.loadRes(resFile) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score # serialize to file, to be read from Lua json.dump(out, open(input_json + '_out.json', 'w'))