예제 #1
0
def evaluate_on_coco_caption(res_file, label_file, outfile=None):
    """
    res_tsv: TSV file, each row is [image_key, json format list of captions].
             Each caption is a dict, with fields "caption", "conf".
    label_file: JSON file of ground truth captions in COCO format.
    """
    assert label_file.endswith('.json')
    if res_file.endswith('.tsv'):
        res_file_coco = op.splitext(res_file)[0] + '_coco_format.json'
        convert_tsv_to_coco_format(res_file, res_file_coco)
    else:
        raise ValueError(
            'unknown prediction result file format: {}'.format(res_file))

    coco = COCO(label_file)
    cocoRes = coco.loadRes(res_file_coco)
    cocoEval = COCOEvalCap(coco, cocoRes, 'corpus')

    # evaluate on a subset of images by setting
    # cocoEval.params['image_id'] = cocoRes.getImgIds()
    # please remove this line when evaluating the full validation set
    cocoEval.params['image_id'] = cocoRes.getImgIds()

    # evaluate results
    # SPICE will take a few minutes the first time, but speeds up due to caching
    cocoEval.evaluate()
    result = cocoEval.eval
    if not outfile:
        print(result)
    else:
        with open(outfile, 'w') as fp:
            json.dump(result, fp, indent=4)
    return result
예제 #2
0
def evaluate_metrics_from_files(pred_file: Union[Path, str],
                                ref_file: Union[Path, str]) \
        -> Tuple[Dict[str, float], Dict[int, Dict[str, float]]]:
    """ Evaluate the translation metrics from annotation files with the coco lib
    Follows the example in the repo.

    :param pred_file: File with predicted captions
    :type pred_file: Path | str
    :param ref_file: File with reference captions
    :type ref_file: Path | str
    :return: Tuple with metrics for the whole dataset and per-file metrics
    :rtype: tuple[dict[str, float], dict[int, dict[str, float]]]
    """
    # Load annotations from files
    coco = COCO(str(ref_file))
    cocoRes = coco.loadRes(str(pred_file))

    # Create evaluation object and evaluate metrics
    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['audio_id'] = cocoRes.getAudioIds()
    cocoEval.evaluate()

    # Make dict from metrics
    metrics = dict(
        (m, s) for m, s in cocoEval.eval.items()
    )
    return metrics, cocoEval.audioToEval
예제 #3
0
    def evaluate(self, count):
	
        samples = []
        samples_index = []
	image_feature, image_id, test_annotation = self.dataset.get_test_for_eval()
	num_samples = self.dataset.num_test_images
	samples_index = np.full([self.batch_size*(num_samples//self.batch_size), self.max_words], self.NOT)
        for i in range(num_samples//self.batch_size):
	    image_feature_test = image_feature[i*self.batch_size:(i+1)*self.batch_size]
	    feed_dict = {self.images: image_feature_test}
            predict_words = self.sess.run(self.predict_words_argmax, feed_dict)
            for j in range(self.batch_size):
		samples.append([self.dataset.decode(predict_words[j, :], type='string', remove_END=True)[0]])
                sample_index = self.dataset.decode(predict_words[j, :], type='index', remove_END=False)[0]
                samples_index[i*self.batch_size+j][:len(sample_index)] = sample_index
        # predict from samples
        samples = np.asarray(samples)
        samples_index = np.asarray(samples_index)
        print '[%] Sentence:', samples[0]
	meteor_pd = {}
        meteor_id = []
        for j in range(len(samples)):
            if image_id[j] == 0:
                break
            meteor_pd[str(int(image_id[j]))] = [{'image_id':str(int(image_id[j])), 'caption':samples[j][0]}]
            meteor_id.append(str(int(image_id[j])))
        scorer = COCOEvalCap(test_annotation, meteor_pd, meteor_id)
	scorer.evaluate(verbose=True)
        sample_dir = os.path.join("./SeqGAN_samples_sample", self.model_name)
        if not os.path.exists(sample_dir):
            os.makedirs(sample_dir)
        file_name = "%s_%s" % (self.dataset.dataset_name, str(count))
        np.savez(os.path.join(sample_dir, file_name), string=samples, index=samples_index, id=meteor_id)
예제 #4
0
def get_metric(args_dict, results_file, ann_file):
    coco = COCO(ann_file)
    cocoRes = coco.loadRes(results_file)

    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.evaluate()

    return cocoEval.eval[args_dict.es_metric]
예제 #5
0
파일: eval.py 프로젝트: AcodeC/video
def evaluate(config, corpus, data_loader, decoder, search_method):
    total_vids = []
    total_pd_captions = []
    pd_vid_caption_dict = defaultdict(lambda: [])
    for batch in iter(data_loader):
        vids, encoder_outputs = batch
        encoder_outputs = encoder_outputs.to(C.device)

        input = torch.LongTensor([[
            corpus.vocab.word2idx['<SOS>'] for _ in range(config.batch_size)
        ]])
        input = input.to(C.device)

        if config.decoder_model == "LSTM":
            hidden = (
                torch.zeros(config.decoder_n_layers, config.batch_size,
                            config.decoder_hidden_size).to(C.device),
                torch.zeros(config.decoder_n_layers, config.batch_size,
                            config.decoder_hidden_size).to(C.device),
            )
        else:
            hidden = torch.zeros(config.decoder_n_layers, config.batch_size,
                                 config.decoder_hidden_size)
            hidden = hidden.to(C.device)

        if isinstance(search_method, str) and search_method == "greedy":
            output_indices = greedy_search(config, decoder, input, hidden,
                                           encoder_outputs)
        elif isinstance(search_method, tuple) and search_method[0] == "beam":
            beam_width = search_method[1]
            output_indices = beam_search(config, beam_width, corpus.vocab,
                                         decoder, input, hidden,
                                         encoder_outputs)
            output_indices = np.asarray(output_indices)
            output_indices = output_indices.T
        else:
            raise NotImplementedError("Unknown search method: {}".format(
                config.search_method))

        total_vids += vids
        total_pd_captions += convert_idxs_to_sentences(
            output_indices, corpus.vocab.idx2word,
            corpus.vocab.word2idx['<EOS>'])

    total_vids = total_vids[:config.n_test]
    total_pd_captions = total_pd_captions[:config.n_test]
    with open("predictions.txt", 'w') as fout:
        for vid, caption in zip(total_vids, total_pd_captions):
            fout.write("{}\t\t{}\n".format(vid, caption))

    for vid, caption in zip(total_vids, total_pd_captions):
        pd_vid_caption_dict[vid].append(caption)
    gts = COCOMSVD(corpus.test_dataset.video_caption_pairs)
    res = load_res(pd_vid_caption_dict)
    cocoEval = COCOEvalCap(gts, res)
    cocoEval.params['image_id'] = gts.getImgIds()
    cocoEval.evaluate()
    return cocoEval.eval
예제 #6
0
def eval_coco_metrics(results_file):
    coco = COCO(ANNOTATION_FILE_PATH)
    cocoRes = coco.loadRes(results_file)

    cocoEval = COCOEvalCap(coco, cocoRes)

    # evaluate on a subset of images by setting
    # cocoEval.params['image_id'] = cocoRes.getImgIds()
    # please remove this line when evaluating the full validation set
    cocoEval.params["image_id"] = cocoRes.getImgIds()

    # evaluate results
    # SPICE will take a few minutes the first time, but speeds up due to caching
    cocoEval.evaluate()
def coco_eval_specific(results,eval_caption_path,entry_limit=500):
    eval_json_output_dir = './coco/results/'
    os.makedirs(eval_json_output_dir,exist_ok=True)
    resFile = eval_json_output_dir + 'captions-generate.json'
    json.dump(results,open(resFile,'w'))

    annFile = eval_caption_path
    coco = COCO(annFile)
    cocoRes = coco.loadRes(resFile)

    cocoEval = COCOEvalCap(coco,cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()

    ans = [{'img_id':eva['image_id'],'CIDEr':eva['CIDEr']} for eva in cocoEval.evalImgs]
    os.makedirs('./Data/Eval_Statics/',exist_ok=True)
    with open("./Data/Eval_Statics/CIDEr_Result.txt",'w') as f:
        entry = "img_id" + " " + "CIDEr" + "\n"
        f.writelines(entry)
        entry_num = 0
        for ans_entry in ans:
            entry = str(ans_entry['img_id']) + " " + str(np.round(ans_entry['CIDEr'],2)) + "\n"
            f.writelines(entry)
            entry_num += 1
            if entry_num >= entry_limit: break
        cider_list = [eva['CIDEr'] for eva in cocoEval.evalImgs]
        cider_list_npy = np.array(cider_list)
        indices = np.argsort(cider_list_npy)[::-1]
        f.writelines('best samples:\n')
        for idx in indices[:50]:
            entry = str(ans[idx]['img_id']) + " " + str(np.round(ans[idx]['CIDEr'],2)) + "\n"
            f.writelines(entry)
        indices = indices[::-1]
        f.writelines('worst samples:\n')
        for idx in indices[:50]:
            entry = str(ans[idx]['img_id']) + " " + str(np.round(ans[idx]['CIDEr'],2)) + "\n"
            f.writelines(entry)

    f.close()

    ciderScores = [eva['CIDEr'] for eva in cocoEval.evalImgs]

    x = plt.hist(ciderScores,bins=[0,1,2,3,4,5,6,7,8,9,10])
    print(x)
    plt.title('Histogram of CIDEr Scores', fontsize=20)
    plt.xlabel('CIDEr score', fontsize=20)
    plt.ylabel('result counts', fontsize=20)
    plt.savefig('ciderHist.png',dpi=500)
    plt.show()
예제 #8
0
def language_eval(preds, test_coco_ids, cache_path):
    import sys

    sys.path.insert(0, "coco_caption")
    # generate target file
    annFile = transform_annos(test_coco_ids)

    from coco_caption.pycocotools.coco import COCO
    from coco_caption.pycocoevalcap.eval import COCOEvalCap

    encoder.FLOAT_REPR = lambda o: format(o, '.3f')

    coco = COCO(annFile)
    valids = coco.getImgIds()

    # filter results to only those in MSCOCO validation set (will be about a third)
    #preds_filt = [p for p in preds if p['image_id'] in valids]
    preds_filt = []
    image_id_filt = []
    for p in preds:
        if p['image_id'] in valids and p['image_id'] not in image_id_filt:
            preds_filt.append(p)
            image_id_filt.append(p['image_id'])
    print('using %d/%d predictions' % (len(preds_filt), len(preds)))
    json.dump(preds_filt,
              open(cache_path,
                   'w'))  # serialize to temporary json file. Sigh, COCO API...

    cocoRes = coco.loadRes(cache_path)
    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()

    # create output dictionary
    out = {}
    for metric, score in cocoEval.eval.items():
        out[metric] = score

    imgToEval = cocoEval.imgToEval
    for p in preds_filt:
        image_id, caption = p['image_id'], p['caption']
        imgToEval[image_id]['caption'] = caption
    with open(cache_path, 'w') as outfile:
        json.dump({'overall': out, 'imgToEval': imgToEval}, outfile)
    return out
예제 #9
0
파일: eval.py 프로젝트: SpartaG117/TRcap
def language_eval(preds, model_id, split):

    annFile = 'coco_caption/annotations/captions_val2014.json'
    from coco_caption.pycocotools.coco import COCO
    from coco_caption.pycocoevalcap.eval import COCOEvalCap

    # encoder.FLOAT_REPR = lambda o: format(o, '.3f')

    if not os.path.isdir('eval_results'):
        os.mkdir('eval_results')
    cache_path = os.path.join('eval_results/',
                              model_id + '_' + split + '.json')

    coco = COCO(annFile)
    valids = coco.getImgIds()

    # filter results to only those in MSCOCO validation set (will be about a third)
    preds_filt = [p for p in preds if p['image_id'] in valids]
    print(len(preds_filt))
    print('using %d/%d predictions' % (len(preds_filt), len(preds)))
    json.dump(preds_filt,
              open(cache_path,
                   'w'))  # serialize to temporary json file. Sigh, COCO API...

    cocoRes = coco.loadRes(cache_path)
    cocoEval = COCOEvalCap(coco, cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()

    # create output dictionary
    out = {}
    for metric, score in cocoEval.eval.items():
        out[metric] = score

    imgToEval = cocoEval.imgToEval
    for p in preds_filt:
        image_id, caption = p['image_id'], p['caption']
        imgToEval[image_id]['caption'] = caption
    with open(cache_path, 'w') as outfile:
        json.dump({'overall': out, 'imgToEval': imgToEval}, outfile)

    return out
def coco_eval(results,eval_caption_path):
    eval_json_output_dir = './coco_caption/results/'
    os.makedirs(eval_json_output_dir,exist_ok=True)
    resFile = eval_json_output_dir + 'captions-generate.json'
    json.dump(results,open(resFile,'w'))

    annFile = eval_caption_path
    coco = COCO(annFile)
    cocoRes = coco.loadRes(resFile)

    cocoEval = COCOEvalCap(coco,cocoRes)
    cocoEval.params['image_id'] = cocoRes.getImgIds()
    cocoEval.evaluate()

    cider = 0
    print('---------------Evaluation performance-----------------')
    for metric,score in cocoEval.eval.items():
        print('%s: %.3f'%(metric,score))
        if metric == 'CIDEr':
            cider = score
    return cider
예제 #11
0
def coco_metrics(generated_captions_fn, annotations_dir, split):
    # Read generated captions
    # resFile = '/home/plz563/projects/syncap/experiments/coco_karpathy/butd/results_best_beam_5_test.json'
    # annotations_dir = '/home/plz563/data/coco2014/captions/annotations_trainval2014'
    ann_fn = "{}/annotations/captions_{}.json".format(annotations_dir, split)
    coco = COCO(ann_fn)
    cocoRes = coco.loadRes(generated_captions_fn)

    # create cocoEval object by taking coco and cocoRes
    cocoEval = COCOEvalCap(coco, cocoRes)

    # evaluate on a subset of images by setting
    # cocoEval.params['image_id'] = cocoRes.getImgIds()
    # please remove this line when evaluating the full validation set
    cocoEval.params['image_id'] = cocoRes.getImgIds()

    # evaluate results
    # SPICE will take a few minutes the first time, but speeds up due to caching
    cocoEval.evaluate()

    # print output evaluation scores
    for metric, score in cocoEval.eval.items():
        print('%s: %.3f' % (metric, 100 * score))
    return cocoEval.eval
예제 #12
0
#!/usr/bin/python
from pycocotools.coco import COCO
from coco_caption.pycocoevalcap.eval import COCOEvalCap
import sys

# create coco object and cocoRes object
coco = COCO(sys.argv[1])
cocoRes = coco.loadRes(sys.argv[2])
# create cocoEval object by taking coco and cocoRes
cocoEval = COCOEvalCap(coco, cocoRes)
# evaluate on a subset of images by setting
cocoEval.params['image_id'] = cocoRes.getImgIds()

# evaluate results
cocoEval.evaluate()
# print output evaluation scores
for metric, score in cocoEval.eval.items():
    print('%s: %.3f' % (metric, score))
for key, value in cocoEval.imgToEval.iteritems():
    print(key, value)