def eval_oracle(dataset, preds_n, model_id, split): cache_path = os.path.join('eval_results/', model_id + '_' + split + '_n.json') coco = getCOCO(dataset) valids = coco.getImgIds() capsById = {} for d in preds_n: capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d] sample_n = capsById[list(capsById.keys())[0]] for i in range(len(capsById[list(capsById.keys())[0]])): preds = [_[i] for _ in capsById.values()] json.dump(preds, open( cache_path, 'w')) # serialize to temporary json file. Sigh, COCO API... cocoRes = coco.loadRes(cache_path) cocoEval = COCOEvalCap(coco, cocoRes) cocoEval.params['image_id'] = cocoRes.getImgIds() cocoEval.evaluate() imgToEval = cocoEval.imgToEval for img_id in capsById.keys(): tmp = imgToEval[img_id] for k in tmp['SPICE'].keys(): if k != 'All': tmp['SPICE_' + k] = tmp['SPICE'][k]['f'] if tmp['SPICE_' + k] != tmp['SPICE_' + k]: # nan tmp['SPICE_' + k] = -100 tmp['SPICE'] = tmp['SPICE']['All']['f'] if tmp['SPICE'] != tmp['SPICE']: tmp['SPICE'] = -100 capsById[img_id][i]['scores'] = imgToEval[img_id] out = {'overall': {}, 'ImgToEval': {}} for img_id in capsById.keys(): out['ImgToEval'][img_id] = {} for metric in capsById[img_id][0]['scores'].keys(): if metric == 'image_id': continue out['ImgToEval'][img_id]['oracle_' + metric] = max( [_['scores'][metric] for _ in capsById[img_id]]) out['ImgToEval'][img_id]['avg_' + metric] = sum( [_['scores'][metric] for _ in capsById[img_id]]) / len(capsById[img_id]) out['ImgToEval'][img_id]['captions'] = capsById[img_id] for metric in list(out['ImgToEval'].values())[0].keys(): if metric == 'captions': continue tmp = np.array([_[metric] for _ in out['ImgToEval'].values()]) tmp = tmp[tmp != -100] out['overall'][metric] = tmp.mean() return out
def eval_self_cider(dataset, preds_n, model_id, split): cache_path = os.path.join('eval_results/', model_id + '_' + split + '_n.json') coco = getCOCO(dataset) valids = coco.getImgIds() # Get Cider_scorer Cider_scorer = Cider(df='corpus') tokenizer = PTBTokenizer() gts = {} for imgId in valids: gts[imgId] = coco.imgToAnns[imgId] gts = tokenizer.tokenize(gts) for imgId in valids: Cider_scorer.cider_scorer += (None, gts[imgId]) Cider_scorer.cider_scorer.compute_doc_freq() Cider_scorer.cider_scorer.ref_len = np.log( float(len(Cider_scorer.cider_scorer.crefs))) # Prepare captions capsById = {} for d in preds_n: capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d] capsById = tokenizer.tokenize(capsById) imgIds = list(capsById.keys()) scores = Cider_scorer.my_self_cider([capsById[_] for _ in imgIds]) def get_div(eigvals): eigvals = np.clip(eigvals, 0, None) return -np.log(np.sqrt(eigvals[-1]) / (np.sqrt(eigvals).sum())) / np.log(len(eigvals)) sc_scores = [get_div(np.linalg.eigvalsh(_ / 10)) for _ in scores] score = np.mean(np.array(sc_scores)) imgToEval = {} for i, image_id in enumerate(imgIds): imgToEval[image_id] = { 'self_cider': sc_scores[i], 'self_cider_mat': scores[i].tolist() } return {'overall': {'self_cider': score}, 'imgToEval': imgToEval} return score
def eval_allspice(dataset, preds_n, model_id, split): coco = getCOCO(dataset) valids = coco.getImgIds() capsById = {} for d in preds_n: capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d] # filter results to only those in MSCOCO validation set (will be about a third) preds_filt_n = [p for p in preds_n if p['image_id'] in valids] print('using %d/%d predictions_n' % (len(preds_filt_n), len(preds_n))) cache_path_n = os.path.join('eval_results/', model_id + '_' + split + '_n.json') json.dump(preds_filt_n, open(cache_path_n, 'w')) # serialize to temporary json file. Sigh, COCO API... # Eval AllSPICE cocoRes_n = coco.loadRes(cache_path_n) cocoEvalAllSPICE = COCOEvalCapSpice(coco, cocoRes_n) cocoEvalAllSPICE.params['image_id'] = cocoRes_n.getImgIds() cocoEvalAllSPICE.evaluate() out = {} for metric, score in cocoEvalAllSPICE.eval.items(): out['All' + metric] = score imgToEvalAllSPICE = cocoEvalAllSPICE.imgToEval # collect SPICE_sub_score for k in list(imgToEvalAllSPICE.values())[0]['SPICE'].keys(): if k != 'All': out['AllSPICE_' + k] = np.array( [v['SPICE'][k]['f'] for v in imgToEvalAllSPICE.values()]) out['AllSPICE_' + k] = (out['AllSPICE_' + k][out['AllSPICE_' + k] == out['AllSPICE_' + k]]).mean() for p in preds_filt_n: image_id, caption = p['image_id'], p['caption'] imgToEvalAllSPICE[image_id]['caption'] = capsById[image_id] return {'overall': out, 'imgToEvalAllSPICE': imgToEvalAllSPICE}