def evaluate_metrics_from_files(pred_file: Union[Path, str], ref_file: Union[Path, str]) \ -> Tuple[Dict[str, float], Dict[int, Dict[str, float]]]: """ Evaluate the translation metrics from annotation files with the coco lib Follows the example in the repo. :param pred_file: File with predicted captions :type pred_file: Path | str :param ref_file: File with reference captions :type ref_file: Path | str :return: Tuple with metrics for the whole dataset and per-file metrics :rtype: tuple[dict[str, float], dict[int, dict[str, float]]] """ # Load annotations from files coco = COCO(str(ref_file)) cocoRes = coco.loadRes(str(pred_file)) # Create evaluation object and evaluate metrics cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['audio_id'] = cocoRes.getAudioIds() cocoEval.evaluate() # Make dict from metrics metrics = dict( (m, s) for m, s in cocoEval.eval.items() ) return metrics, cocoEval.audioToEval
def evaluate_on_coco_caption(res_file, label_file, outfile=None): """ res_tsv: TSV file, each row is [image_key, json format list of captions]. Each caption is a dict, with fields "caption", "conf". label_file: JSON file of ground truth captions in COCO format. """ assert label_file.endswith('.json') if res_file.endswith('.tsv'): res_file_coco = op.splitext(res_file)[0] + '_coco_format.json' convert_tsv_to_coco_format(res_file, res_file_coco) else: raise ValueError( 'unknown prediction result file format: {}'.format(res_file)) coco = COCO(label_file) cocoRes = coco.loadRes(res_file_coco) cocoEval = COCOEvalCap(coco, cocoRes, 'corpus') # evaluate on a subset of images by setting # cocoEval.params['image_id'] = cocoRes.getImgIds() # please remove this line when evaluating the full validation set cocoEval.params['image_id'] = cocoRes.getImgIds() # evaluate results # SPICE will take a few minutes the first time, but speeds up due to caching cocoEval.evaluate() result = cocoEval.eval if not outfile: print(result) else: with open(outfile, 'w') as fp: json.dump(result, fp, indent=4) return result
def get_metric(args_dict, results_file, ann_file): coco = COCO(ann_file) cocoRes = coco.loadRes(results_file) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.evaluate() return cocoEval.eval[args_dict.es_metric]
def language_eval(preds, test_coco_ids, cache_path): import sys sys.path.insert(0, "coco_caption") # generate target file annFile = transform_annos(test_coco_ids) from coco_caption.pycocotools.coco import COCO from coco_caption.pycocoevalcap.eval import COCOEvalCap encoder.FLOAT_REPR = lambda o: format(o, '.3f') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) #preds_filt = [p for p in preds if p['image_id'] in valids] preds_filt = [] image_id_filt = [] for p in preds: if p['image_id'] in valids and p['image_id'] not in image_id_filt: preds_filt.append(p) image_id_filt.append(p['image_id']) print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out
def language_eval(preds, model_id, split): annFile = 'coco_caption/annotations/captions_val2014.json' from coco_caption.pycocotools.coco import COCO from coco_caption.pycocoevalcap.eval import COCOEvalCap # encoder.FLOAT_REPR = lambda o: format(o, '.3f') if not os.path.isdir('eval_results'): os.mkdir('eval_results') cache_path = os.path.join('eval_results/', model_id + '_' + split + '.json') coco = COCO(annFile) valids = coco.getImgIds() # filter results to only those in MSCOCO validation set (will be about a third) preds_filt = [p for p in preds if p['image_id'] in valids] print(len(preds_filt)) print('using %d/%d predictions' % (len(preds_filt), len(preds))) json.dump(preds_filt, open(cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() # create output dictionary out = {} for metric, score in cocoEval.eval.items(): out[metric] = score imgToEval = cocoEval.imgToEval for p in preds_filt: image_id, caption = p['image_id'], p['caption'] imgToEval[image_id]['caption'] = caption with open(cache_path, 'w') as outfile: json.dump({'overall': out, 'imgToEval': imgToEval}, outfile) return out