def eval_oracle(dataset, preds_n, model_id, split):
    cache_path = os.path.join('eval_results/',
                              model_id + '_' + split + '_n.json')

    coco = getCOCO(dataset)
    valids = coco.getImgIds()

    capsById = {}
    for d in preds_n:
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    sample_n = capsById[list(capsById.keys())[0]]
    for i in range(len(capsById[list(capsById.keys())[0]])):
        preds = [_[i] for _ in capsById.values()]

        json.dump(preds, open(
            cache_path,
            'w'))  # serialize to temporary json file. Sigh, COCO API...

        cocoRes = coco.loadRes(cache_path)
        cocoEval = COCOEvalCap(coco, cocoRes)
        cocoEval.params['image_id'] = cocoRes.getImgIds()
        cocoEval.evaluate()

        imgToEval = cocoEval.imgToEval
        for img_id in capsById.keys():
            tmp = imgToEval[img_id]
            for k in tmp['SPICE'].keys():
                if k != 'All':
                    tmp['SPICE_' + k] = tmp['SPICE'][k]['f']
                    if tmp['SPICE_' + k] != tmp['SPICE_' + k]:  # nan
                        tmp['SPICE_' + k] = -100
            tmp['SPICE'] = tmp['SPICE']['All']['f']
            if tmp['SPICE'] != tmp['SPICE']: tmp['SPICE'] = -100
            capsById[img_id][i]['scores'] = imgToEval[img_id]

    out = {'overall': {}, 'ImgToEval': {}}
    for img_id in capsById.keys():
        out['ImgToEval'][img_id] = {}
        for metric in capsById[img_id][0]['scores'].keys():
            if metric == 'image_id': continue
            out['ImgToEval'][img_id]['oracle_' + metric] = max(
                [_['scores'][metric] for _ in capsById[img_id]])
            out['ImgToEval'][img_id]['avg_' + metric] = sum(
                [_['scores'][metric]
                 for _ in capsById[img_id]]) / len(capsById[img_id])
        out['ImgToEval'][img_id]['captions'] = capsById[img_id]
    for metric in list(out['ImgToEval'].values())[0].keys():
        if metric == 'captions':
            continue
        tmp = np.array([_[metric] for _ in out['ImgToEval'].values()])
        tmp = tmp[tmp != -100]
        out['overall'][metric] = tmp.mean()

    return out
def eval_self_cider(dataset, preds_n, model_id, split):
    cache_path = os.path.join('eval_results/',
                              model_id + '_' + split + '_n.json')

    coco = getCOCO(dataset)
    valids = coco.getImgIds()

    # Get Cider_scorer
    Cider_scorer = Cider(df='corpus')

    tokenizer = PTBTokenizer()
    gts = {}
    for imgId in valids:
        gts[imgId] = coco.imgToAnns[imgId]
    gts = tokenizer.tokenize(gts)

    for imgId in valids:
        Cider_scorer.cider_scorer += (None, gts[imgId])
    Cider_scorer.cider_scorer.compute_doc_freq()
    Cider_scorer.cider_scorer.ref_len = np.log(
        float(len(Cider_scorer.cider_scorer.crefs)))

    # Prepare captions
    capsById = {}
    for d in preds_n:
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    capsById = tokenizer.tokenize(capsById)
    imgIds = list(capsById.keys())
    scores = Cider_scorer.my_self_cider([capsById[_] for _ in imgIds])

    def get_div(eigvals):
        eigvals = np.clip(eigvals, 0, None)
        return -np.log(np.sqrt(eigvals[-1]) /
                       (np.sqrt(eigvals).sum())) / np.log(len(eigvals))

    sc_scores = [get_div(np.linalg.eigvalsh(_ / 10)) for _ in scores]
    score = np.mean(np.array(sc_scores))

    imgToEval = {}
    for i, image_id in enumerate(imgIds):
        imgToEval[image_id] = {
            'self_cider': sc_scores[i],
            'self_cider_mat': scores[i].tolist()
        }
    return {'overall': {'self_cider': score}, 'imgToEval': imgToEval}

    return score
def eval_allspice(dataset, preds_n, model_id, split):
    coco = getCOCO(dataset)
    valids = coco.getImgIds()

    capsById = {}
    for d in preds_n:
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    # filter results to only those in MSCOCO validation set (will be about a third)
    preds_filt_n = [p for p in preds_n if p['image_id'] in valids]
    print('using %d/%d predictions_n' % (len(preds_filt_n), len(preds_n)))
    cache_path_n = os.path.join('eval_results/',
                                model_id + '_' + split + '_n.json')
    json.dump(preds_filt_n,
              open(cache_path_n,
                   'w'))  # serialize to temporary json file. Sigh, COCO API...

    # Eval AllSPICE
    cocoRes_n = coco.loadRes(cache_path_n)
    cocoEvalAllSPICE = COCOEvalCapSpice(coco, cocoRes_n)
    cocoEvalAllSPICE.params['image_id'] = cocoRes_n.getImgIds()
    cocoEvalAllSPICE.evaluate()

    out = {}
    for metric, score in cocoEvalAllSPICE.eval.items():
        out['All' + metric] = score

    imgToEvalAllSPICE = cocoEvalAllSPICE.imgToEval
    # collect SPICE_sub_score
    for k in list(imgToEvalAllSPICE.values())[0]['SPICE'].keys():
        if k != 'All':
            out['AllSPICE_' + k] = np.array(
                [v['SPICE'][k]['f'] for v in imgToEvalAllSPICE.values()])
            out['AllSPICE_' + k] = (out['AllSPICE_' +
                                        k][out['AllSPICE_' +
                                               k] == out['AllSPICE_' +
                                                         k]]).mean()
    for p in preds_filt_n:
        image_id, caption = p['image_id'], p['caption']
        imgToEvalAllSPICE[image_id]['caption'] = capsById[image_id]
    return {'overall': out, 'imgToEvalAllSPICE': imgToEvalAllSPICE}