Esempio n. 1
0
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j):
    res = {}
    for imgid in train_imgids:
        res[imgid] = [val_caps[val_imgids[i]][j]]

    
    scorer = Meteor()
    score, scores = scorer.compute_score(gts, res, train_imgids)
    #print(score)
    #print(len(scores))
    return np.array(scores)
Esempio n. 2
0
    def evaluate(self, album_to_Gts, album_to_Res):
        self.album_to_Res = album_to_Res
        self.album_to_Gts = album_to_Gts

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = []
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")  # df='VIST/VIST-train-words'
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score ...' % (scorer.method()))
            score, scores = scorer.compute_score(self.album_to_Gts,
                                                 self.album_to_Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAlbumToEval(scs, self.album_to_Gts.keys(), m)
                    print('%s: %.3f' % (m, sc))
            else:
                self.setEval(score, method)
                self.setAlbumToEval(scores, self.album_to_Gts.keys(), method)
                print('%s: %.3f' % (method, score))

        self.setEvalAlbums()
Esempio n. 3
0
def evaluate(gts=None, res=None):
    # imgIds = self.coco.getImgIds()
    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
    ]

    # =================================================
    # Compute scores
    # =================================================
    res_scores = []
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                res_scores.append((m, sc))
        else:
            res_scores.append((method, score))
    return res_scores
Esempio n. 4
0
    def evaluate(self):
        output = []
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            # (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            # print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.gts, self.res)

            # set_trace()

            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    print "%s: %0.5f" % (m, sc)
                    output.append(sc)
            else:
                print "%s: %0.5f" % (method, score)
                output.append(score)
        return output
Esempio n. 5
0
def evaluate_captions(res: dict, gts: dict):

    # =================================================
    # Set up scorers
    # =================================================
    print('tokenization...')
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
               (Spice(), "SPICE")]
    rtn = {}
    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                rtn[m] = sc
        else:
            rtn[method] = score

    return rtn
Esempio n. 6
0
def language_eval(sample_seqs, gt_seqs):# sample_seqs:list[[x,x],[x,x],...], gt_seqs:list[[list1,list2,...],[list1,list2,...],...]
	import sys
        sys.path.append("coco-caption/pycocoevalcap/")
	from bleu.bleu import Bleu
	from cider.cider import Cider
	from meteor.meteor import Meteor
	from rouge.rouge import Rouge

	assert len(sample_seqs) == len(gt_seqs),"number of eval data is different"
	res = OrderedDict()  # res: {0:[xx],1:[xx],...}
	for i in range(len(sample_seqs)): # for each data(sent)
		res[i] = [sample_seqs[i]]

	gts = OrderedDict() # gts: {0:[sent1,sent2,...],1:[sent1,sent2,...], ...}
	for i in range(len(gt_seqs)):
		gts[i] = [gt_seqs[i][j] for j in range(len(gt_seqs[i]))]

	res = {i: res[i] for i in range(len(sample_seqs))}
	gts = {i: gts[i] for i in range(len(gt_seqs))}

	avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res)
	avg_cider_score, cider_scores = Cider().compute_score(gts, res)
	avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res)
	avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res)

	print(" BLEU1:{}\n BLEU2:{}\n BLEU3:{}\n BLEU4:{}\n METEOR:{}\n ROUGE:{}\n CIDEr:{}\n"\
		.format(avg_bleu_score[0], avg_bleu_score[1], avg_bleu_score[2], avg_bleu_score[3], \
				avg_meteor_score, avg_rouge_score, avg_cider_score))

	return {'BLEU':avg_bleu_score, 'METEOR':avg_meteor_score, 'ROUGE':avg_rouge_score, 'CIDEr':avg_cider_score}
Esempio n. 7
0
def evaluator(gts, res):
    eval = {}
    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    # Todo: use Spacy for tokenization
    gts = tokenize(gts)
    res = tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        # (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE")
    ]

    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                eval[m] = sc
                print "%s: %0.3f" % (m, sc)
        else:
            eval[method] = score
            print "%s: %0.3f" % (method, score)
Esempio n. 8
0
    def evaluate(self):
        # ==================================================
        # Tokenization, remove punctutions
        # =================================================
        '''
        print "tokenization ..."
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(self.ref)
        res = tokenizer.tokenize(self.res)
        '''
        gts = self.ref
        # ==================================================
        # Set up scorers
        # ==================================================
        print "setting up scorers ..."
        scorers = [(Bleu(4), ("Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4")),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # ==================================================
        # Set up scorers
        # ==================================================
        out = {}
        for scorer, method in scorers:
            print "computing %s score ..." % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if isinstance(method, tuple):
                for sc, scs, m in zip(score, scores, method):
                    out[m] = sc
                    print "%s: %0.4f" % (m, sc)
            else:
                print "%s: %0.4f" % (method, score)
                out[method] = score

        return out
    def evaluate(self):

        gts = {}
        res = {}
        counter = 0
        for i in self.input_captions['v_preds']:
            imgId = i[self.key_name]
            if imgId not in res:
                res[imgId] = []
            res[imgId].append(i)
            gts[imgId] = self.ground_captions[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if self.no_print == False:
            print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")
                   #(Spice(), "SPICE")
                   ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            if self.no_print == False:
                print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    if self.no_print == False:
                        print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                if self.no_print == False:
                    print("%s: %0.3f" % (method, score))
        self.setEvalImgs()

        res_diff_method = {}
        for metric, score in self.eval.items():
            score_round = '%.3f' % (score)
            res_diff_method[metric] = float(score_round)

        return res_diff_method
Esempio n. 10
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(["-m", "stem", '-w', '1.0', '-p',
                     '0.85 0.2 0.0 0.75']), "METEOR_Stems"),
            (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
            (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Esempio n. 11
0
    def evaluate(self):
        """
        Load the sentences from json files
        """
        def readJson(refName, candName):

            path_to_ref_file = os.path.join(self._pathToData, refName)
            path_to_cand_file = os.path.join(self._pathToData, candName)

            ref_list = json.loads(open(path_to_ref_file, 'r').read())
            cand_list = json.loads(open(path_to_cand_file, 'r').read())

            gts = defaultdict(list)
            res = defaultdict(list)
            # change of naming convention from ref to gts
            for l in ref_list:
                gts[l['image_id']].append({"caption": l['caption']})

            # change of naming convention from cand to res
            for l in cand_list:
                res[l['image_id']].append({"caption": l['caption']})

            return gts, res

        print 'Loading Data...'
        gts, res = readJson(self._refName, self._candName)
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(self._dfMode), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
            else:
                self.setEval(score, method)
Esempio n. 12
0
def main():

    import sys
    res_path = sys.argv[1]

    gt_path = osp.join(this_dir, 'tgif-v1.0.tsv')
    test_list_path = osp.join(this_dir, 'splits', 'test.txt')

    test_keys = load_list(test_list_path)
    all_sents = load_sentences(gt_path)
    res = load_sentences(res_path)

    # make sure res has and only has single sentence
    # for all testing keys
    gts = {}
    for key in test_keys:
        gts[key] = all_sents[key]
        if key in res:
            res[key] = [res[key][0]]
        else:
            res[key] = [""]

    # =================================================
    # Convert to COCO format
    # =================================================
    gts = to_coco(gts, res.keys())
    res = to_coco(res, res.keys())

    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")]

    # =================================================
    # Compute scores
    # =================================================
    eval = {}
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                print "%s: %0.3f" % (m, sc)
        else:
            print "%s: %0.3f" % (method, score)
Esempio n. 13
0
def init_cider_scorer(reward_type):
    global CiderD_scorer
    # CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
    if reward_type == 'BLEU':
        CiderD_scorer = CiderD_scorer or Bleu()
    elif reward_type == 'METEOR':
        CiderD_scorer = CiderD_scorer or Meteor()
    elif reward_type == 'ROUGE':
        CiderD_scorer = CiderD_scorer or Rouge()
    elif reward_type == 'CIDEr':
        CiderD_scorer = CiderD_scorer or Cider()
Esempio n. 14
0
def evaluate(ref, hyp):
    with open(hyp, 'r') as r:
        hypothesis = r.readlines()
        res = {
            k: [" ".join(v.strip().lower().split())]
            for k, v in enumerate(hypothesis)
        }
    with open(ref, 'r') as r:
        references = r.readlines()
        gts = {k: [v.strip().lower()] for k, v in enumerate(references)}

    meteor = Meteor()
    score_Meteor = meteor.compute_score(gts, res)
    print("Meteor: " + str(score_Meteor))

    rouge = Rouge()
    scores_Rouge = rouge.get_scores([i[0] for i in gts.values()],
                                    [i[0] for i in res.values()],
                                    avg=True)
    print("Rouge: " + str(scores_Rouge))
Esempio n. 15
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # print(imgIds)
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        # print(len(imgIds)) ## 676476 ids; 1000 in total
        # print(self.coco.imgToAnns) ## key-value pairs
        for imgId in imgIds:
            # print(imgId)
            gts[imgId] = self.coco.imgToAnns[imgId]  ## length = 5
            # print(len(gts[imgId]))
            # print(gts[imgId])
            res[imgId] = self.cocoRes.imgToAnns[imgId]
            # print(len(res[imgId]))
            # print(res[imgId])

        # =================================================
        # Set up scorers
        # =================================================
        print '===== tokenization... gts'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        print '===== tokenization... res'
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print '===== computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            # print(scores)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Esempio n. 16
0
    def evaluate(self, album_to_Gts, album_to_Res):
        """
		measure is a subset of ['bleu', 'meteor', 'rouge', 'cider']
		if measure is None, we will apply all the above.
		"""

        # # album_id -> pred story str
        # album_to_Res = {item['album_id']: [item['pred_story_str'].encode('ascii', 'ignore').decode('ascii')]
        # 				for item in self.preds }

        # # album_id -> gt story str(s)
        # album_to_Gts = {}
        # for album_id in album_to_Res.keys():
        # 	album = self.vist_sis.Albums[album_id]
        # 	gd_story_strs = []
        # 	for story_id in album['story_ids']:
        # 		gd_sent_ids = self.vist_sis.Stories[story_id]['sent_ids']
        # 		gd_story_str = ' '.join([self.vist_sis.Sents[sent_id]['text'] for sent_id in gd_sent_ids])
        # 		gd_story_str = gd_story_str.encode('ascii', 'ignore').decode('ascii')  # ignore some weird token
        # 		gd_story_strs += [gd_story_str]
        # 	album_to_Gts[album_id] = gd_story_strs

        self.album_to_Res = album_to_Res
        self.album_to_Gts = album_to_Gts

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = []
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score ...' % (scorer.method()))
            score, scores = scorer.compute_score(self.album_to_Gts,
                                                 self.album_to_Res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAlbumToEval(scs, self.album_to_Gts.keys(), m)
                    print('%s: %.4f' % (m, sc))
            else:
                self.setEval(score, method)
                self.setAlbumToEval(scores, self.album_to_Gts.keys(), method)
                print('%s: %.4f' % (method, score))

        self.setEvalAlbums()
Esempio n. 17
0
    def evaluate(self, gts=None, res=None):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        if gts is None and res is None:
            gts = {}
            res = {}
            for imgId in imgIds:
                gts[imgId] = self.coco.imgToAnns[imgId]
                res[imgId] = self.cocoRes.imgToAnns[imgId]

            print("before")
            print(gts[184321])
            print(res[184321])

            # =================================================
            # Set up scorers
            # =================================================
            print 'tokenization...'
            tokenizer = PTBTokenizer()
            gts = tokenizer.tokenize(gts)
            res = tokenizer.tokenize(res)

        print("after")
        return gts, res
        print(gts[184321])
        print(res[184321])

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr"), (Spice(), "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Esempio n. 18
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            # test
            #gts = {1: ['a metallic refrigerator freezer sitting inside of a kitchen', 'a small kitchen with a stove and refrigerator', "a stainless steel refrigerator in a home 's kitchen", 'a kitchen with a stove and a refrigerator', 'a kitchen has a fridge a stove and a counter top']}
            #res = {1: ['a kitchen with a stove and a refrigerator']}
            #gts = {1: ['a kitchen with a stove and a refrigerator', 'a kitchen with a stove and a refrigerator']}
            import ipdb; ipdb.set_trace()
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
Esempio n. 19
0
def test(loader, model, opt, vocab):
    bleu_scores = []
    write_to_txt = []

    res = {}
    gts = {}

    for batch_id, (video_input, language_feat, captions, time_seg, batch_lens,
                   duration, video_id) in enumerate(loader):

        # Convert the textual input to numeric labels
        cap_gts, cap_mask = convert_caption_labels(captions,
                                                   loader.dataset.get_vocab(),
                                                   opt['max_length'])

        video_input = video_input.cuda()
        cap_gts = torch.tensor(cap_gts).cuda().long()
        # cap_mask = cap_mask.cuda()

        with torch.no_grad():
            # Beam Search Starts From Here
            batch_hyp = translate_batch(model, video_input, opt)

        # Stack all GTs captions
        references = [[cap.split(' ')] for cap in captions]

        # Stack all Predicted Captions
        hypotheses = []
        for predict in zip(batch_hyp):
            predict = predict[0]
            _ = []
            if EOS in predict[0]:
                sep_id = predict[0].index(EOS)
            else:
                sep_id = -1
            for word in predict[0][0:sep_id]:
                _.append(vocab[str(word)])
            hypotheses.append(_)

        # Stack all predictions for the Gougue/Meteour Scores
        res[batch_id] = [list_to_sentence(hypotheses[0])]
        gts[batch_id] = [list_to_sentence(references[0][0])]
        print(batch_id)
    avg_bleu_score, bleu_scores = Bleu(4).compute_score(gts, res)
    avg_cider_score, cider_scores = Cider().compute_score(gts, res)
    avg_meteor_score, meteor_scores = Meteor().compute_score(gts, res)
    avg_rouge_score, rouge_scores = Rouge().compute_score(gts, res)
    print('C, M, R, B:', avg_cider_score, avg_meteor_score, avg_rouge_score,
          avg_bleu_score)
Esempio n. 20
0
 def __init__(self,
              references,
              scorers=['bleu', 'rouge', 'cider', 'meteor']):
     self.scorers = {}
     for scorer in scorers:
         if scorer == 'bleu':
             self.scorers['bleu'] = Bleu(4)
         elif scorer == 'rouge':
             self.scorers['rouge'] = Rouge()
         elif scorer == 'cider':
             self.scorers['cider'] = Cider()
         elif scorer == 'meteor':
             self.scorers['meteor'] = Meteor()
         else:
             raise NotImplementedError()
     self.references = references
Esempio n. 21
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]
        #print type(gts),' \r\n ',type(res)
        #pdb.set_trace()
        #print gts,' ',res
        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, imgIds, m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, imgIds, method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
Esempio n. 22
0
    def evaluate(self):

        evalRefIds = [ann['ref_id'] for ann in self.Res]

        refToGts = {}
        for ref_id in evalRefIds:
            ref = self.refer.Refs[ref_id]
            gt_sents = [sent['sent'] for sent in ref['sentences']]  # up to 3 expressions
            refToGts[ref_id] = gt_sents
        refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res}

        print 'tokenization...'
        tokenizer = PTBTokenizer()
        self.refToRes = tokenizer.tokenize(refToRes)
        self.refToGts = tokenizer.tokenize(refToGts)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(self.refToGts, self.refToRes)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setRefToEvalRefs(scs, self.refToGts.keys(), m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setRefToEvalRefs(scores, self.refToGts.keys(), method)
                print "%s: %0.3f"%(method, score)
        self.setEvalRefs()
Esempio n. 23
0
def evaluate(hyp, ref):
    with open(hyp, 'r') as r:
        hypothesis = r.readlines()
        res = {k: [" ".join(v.strip().lower().split())] for k, v in enumerate(hypothesis)}
    with open(ref, 'r') as r:
        references = r.readlines()
        gts = {k: [v.strip().lower()] for k, v in enumerate(references)}
    score_Bleu , stderr = Bleu().compute_score(hyp, ref)
    print("Bleu_4: " + str(score_Bleu))

    score_Meteor, scores_Meteor = Meteor().compute_score(gts, res)
    print("Meteor: "), score_Meteor

    files_rouge = FilesRouge(hyp, ref)
    scores = files_rouge.get_scores(avg=True)
    print('Rouge: ' + str(scores))
    
    score_Cider, scores_Cider = Cider().compute_score(gts, res)
    print("Cider: "), score_Cider
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]
        # =================================================
        # Set up scorers
        # =================================================
        #print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        #print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        result = []
        for scorer, method in scorers:
            #print('computing %s score...'%(scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    #print("%s: %0.3f"%(m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                #print("%s: %0.3f"%(method, score))
            result.append((method, score))
        self.setEvalImgs()
        return result
Esempio n. 25
0
    def evaluate(self, gts, res):
        # =================================================
        # Set up scorers
        # =================================================
        logging.info('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        logging.info('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")
                   #(Spice(), "SPICE")
                   ]

        # =================================================
        # Compute scores
        # =================================================
        bleu_4_score = 0
        for scorer, method in scorers:
            logging.info('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    if m == "Bleu_4":
                        bleu_4_score = sc
                    logging.info("%s: %0.3f" % (m, sc))
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                logging.info("%s: %0.3f" % (method, score))
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
        return bleu_4_score
Esempio n. 26
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print >> sys.stderr, 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print >> sys.stderr, 'setting up scorers...'
        scorers = [(Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print >> sys.stderr, 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print >> sys.stderr, "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print >> sys.stderr, "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Esempio n. 27
0
    def evaluate(self):

        # imgIds = self.coco.getImgIds()
        gts = dict(zip(range(0, len(self.predicted_list)),
                       self.predicted_list))
        res = dict(zip(range(0, len(self.label_list)), self.label_list))

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.set_textid_to_eval(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.set_textid_to_eval(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.set_eval()
Esempio n. 28
0
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
        self.setEvalImgs()
Esempio n. 29
0
def main(hyp, ref, len):
    with open(hyp, 'r') as r:
        hypothesis = r.readlines()
        res = {k: [" ".join(v.strip().lower().split()[:len])] for k, v in enumerate(hypothesis)}
    with open(ref, 'r') as r:
        references = r.readlines()
        gts = {k: [v.strip().lower()] for k, v in enumerate(references)}

    score_Bleu, scores_Bleu = Bleu(4).compute_score(gts, res)
    print("Bleu_1: "), np.mean(scores_Bleu[0])
    print("Bleu_2: "), np.mean(scores_Bleu[1])
    print("Bleu_3: "), np.mean(scores_Bleu[2])
    print("Bleu_4: "), np.mean(scores_Bleu[3])

    score_Meteor, scores_Meteor = Meteor().compute_score(gts, res)
    print("Meteor: "), score_Meteor

    score_Rouge, scores_Rouge = Rouge().compute_score(gts, res)
    print("ROUGe: "), score_Rouge

    score_Cider, scores_Cider = Cider().compute_score(gts, res)
    print("Cider: "), score_Cider
Esempio n. 30
0
        if i % 2 == 1:
            res[int(line.strip('\n').split(':')[0])] = [
                line.strip('\n').split(':')[2]
            ]
        elif i % 2 == 0:
            gts[int(line.strip('\n').split(':')[0])] = [
                line.strip('\n').split(':')[2]
            ]

hyps = []
refs = []
bleu_score = 0.0

for k in res:
    assert k in gts
    hyps.append(res[k][0])
    refs.append(gts[k][0])
for hyp, ref in zip(hyps, refs):
    hyp = hyp.strip().split()
    ref = ref.strip().split()
    bleu_score += sentence_bleu([ref],
                                hyp,
                                smoothing_function=SmoothingFunction().method4)

print("score_Bleu: "), bleu_score * 1.0 / len(hyps)

score_Meteor, scores_Meteor = Meteor().compute_score(gts, res)
print("Meteor: "), score_Meteor
score_Rouge, scores_Rouge = Rouge().compute_score(gts, res)
print("ROUGe: "), score_Rouge