def __init__(self, ground_truth_filenames=None, prediction_filename=None, tious=None, max_proposals=1000, prediction_fields=PREDICTION_FIELDS, verbose=False): # Check that the gt and submission files exist and load them if len(tious) == 0: raise IOError('Please input a valid tIoU.') if not ground_truth_filenames: raise IOError('Please input a valid ground truth file.') if not prediction_filename: raise IOError('Please input a valid prediction file.') self.verbose = verbose self.tious = tious self.max_proposals = max_proposals self.pred_fields = prediction_fields self.ground_truths = self.import_ground_truths(ground_truth_filenames) self.prediction = self.import_prediction(prediction_filename) self.tokenizer = PTBTokenizer() # Set up scorers, if not verbose, we only use the one we're # testing on: METEOR if self.verbose: self.scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] else: self.scorers = [(Meteor(), "METEOR")]
def generate_data_list(mode, caption_file_path): # get file name if mode == 'train': file_name = 'train.json' elif mode == 'val': file_name = 'val_1.json' else: print 'Invalid mode:' % mode sys.exit() # get timestamps and sentences input_dict = {} data = json.loads(open(os.path.join( \ caption_file_path, file_name)).read()) for vid, content in data.iteritems(): sentences = content['sentences'] timestamps = content['timestamps'] for t, s in zip(timestamps, sentences): dictkey = ' '.join([vid, str(t[0]), str(t[1])]) input_dict[dictkey] = [{'caption': remove_nonascii(s)}] # ptbtokenizer tokenizer = PTBTokenizer() output_dict = tokenizer.tokenize(input_dict) with open('%s.list' % mode, 'wb') as f: for id, sentence in output_dict.iteritems(): try: f.write('\t'.join(id.split() + sentence) + '\n') except: pass print 'Generate %s.list done ...' % mode
def __init__(self, logger, args, data_path, is_training, passages=None): super(QGData, self).__init__(logger, args, data_path, is_training, passages) self.qg_tokenizer = PTBTokenizer() self.metric = "Bleu" if not self.is_training: self.qg_tokenizer = PTBTokenizer()
def score(gts, res, ids): origingts = gts originres = res tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) """ scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] """ scorers = [(Meteor(), "METEOR")] for scorer, method in scorers: score, scores = scorer.compute_score(gts, res) print("{:<14}:\t{:0.4f}".format(method, score)) F1_score = F1(originres, origingts) avg = 0.0 for noc_word in sorted(F1_score.keys()): print("{:<14}:\t{:0.4f}".format(noc_word, F1_score[noc_word])) avg += F1_score[noc_word] avg = avg / len(F1_score.keys()) print("{:<14}:\t{:0.4f}".format("Average", avg))
def test(model_path='models/model-61', video_feat_path=video_feat_path): train_data, test_data = get_video_data(video_data_path, video_feat_path, train_ratio=0.7) test_videos = test_data['video_path'].values test_captions = test_data['Description'].values ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist()) test_videos_unique = list() test_captions_list = list() for (video, caption) in zip(test_videos, test_captions): if len(test_videos_unique) == 0 or test_videos_unique[-1] != video: test_videos_unique.append(video) test_captions_list.append([caption]) else: test_captions_list[-1].append(caption) model = Video_Caption_Generator( dim_image=dim_image, n_words=len(ixtoword), dim_embed=dim_embed, dim_hidden=dim_hidden, batch_size=batch_size, encoder_max_sequence_length=encoder_step, decoder_max_sentence_length=decoder_step, bias_init_vector=None) video_tf, video_mask_tf, caption_tf, probs_tf, last_embed_tf = model.build_generator() sess = tf.InteractiveSession() saver = tf.train.Saver() saver.restore(sess, model_path) scorer = Meteor() scorer_bleu = Bleu(4) GTS = defaultdict(list) RES = defaultdict(list) counter = 0 for (video_feat_path, caption) in zip(test_videos_unique, test_captions_list): generated_sentence = gen_sentence( sess, video_tf, video_mask_tf, caption_tf, video_feat_path, ixtoword) print video_feat_path, generated_sentence #print caption GTS[str(counter)] = [{'image_id':str(counter),'cap_id':i,'caption':s} for i, s in enumerate(caption)] RES[str(counter)] = [{'image_id':str(counter),'caption':generated_sentence[:-2]+'.'}] #GTS[video_feat_path] = caption #RES[video_feat_path] = [generated_sentence[:-2] + '.'] counter += 1 #ipdb.set_trace() tokenizer = PTBTokenizer() GTS = tokenizer.tokenize(GTS) RES = tokenizer.tokenize(RES) score, scores = scorer.compute_score(GTS, RES) print "METEOR", score score, scores = scorer_bleu.compute_score(GTS, RES) print "BLEU", score
def evaluate(self): imgIds = self.params['image_id'] gts = self.gts res = self.res # ================================================= # Set up scorers # ================================================= tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, imgIds, m) else: self.setEval(score, method) self.setImgToEvalImgs(scores, imgIds, method) self.setEvalImgs()
def format_for_metrics(gts, res, rev_word_map): """ to generate appropriate format to use pycocoevalcap :param gts: groud truth list :param res: hypothesis list :param rev_word_map: reverse word map, from idxes to character :return: """ gts_dic = {} for idx, sents in enumerate(gts): tmp = [] for sent in sents: tmp.append({ u'image_id': idx, u'caption': ' '.join([rev_word_map[x] for x in sent]) }) gts_dic[idx] = tmp[:] res_dic = {} for idx, sent in enumerate(res): res_dic[idx] = [{ u'image_id': idx, u'caption': ' '.join([rev_word_map[x] for x in sent]) }] tokenizer = PTBTokenizer() return tokenizer.tokenize(gts_dic), tokenizer.tokenize(res_dic)
def score(self, GT, RES, IDs): # edited by rgh #self.eval = {} self.eval = OrderedDict() self.imgToEval = {} gts = {} res = {} for ID in IDs: # print ID gts[ID] = GT[ID] res[ID] = RES[ID] print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') # edited by rgh # scorers = [ # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), # (Meteor(),"METEOR"), # (Rouge(), "ROUGE_L"), # (Cider(), "CIDEr"), # #(Spice(), "SPICE") # ] scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Cider(), "CIDEr"), (Rouge(), "ROUGE_L"), # (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: # added by rgh # for sc, scs, m in zip(score, scores, method): # self.setEval(sc, m) # self.setImgToEvalImgs(scs, IDs, m) # print("%s: %0.3f" % (m, sc)) self.setEval("%.4f" % score[-1], method[-1]) self.setImgToEvalImgs(scores[-1], IDs, method[-1]) print("%s: %0.4f" % (method[-1], score[-1])) else: self.setEval("%.4f" % score, method) self.setImgToEvalImgs(scores, IDs, method) print("%s: %0.4f" % (method, score)) # for metric, score in self.eval.items(): # print '%s: %.3f'%(metric, score) return self.eval
def evaluate(gts, res): eval = {} # ================================================= # Set up scorers # ================================================= print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): eval[m] = sc else: eval[method] = score return eval
def get_dcc_scores(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] score_dict = {} for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): score_dict[m] = sc print "%s: %0.3f" % (m, sc) else: score_dict[method] = score print "%s: %0.3f" % (method, score) return score_dict
def evaluate(self): # ================================================= # Tokenization # ================================================= print("Tokenization") tokenizer = PTBTokenizer() gts = tokenizer.tokenize(self.ground_truth) preds = tokenizer.tokenize(self.prediction) # ================================================= # Setup scorers # ================================================= print("Setting up scorers...") scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), # (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print("Computing {} score...".format(scorer.method())) score, scores = scorer.compute_score(gts, preds) if isinstance(method, list): for sc, scs, m in zip(score, scores, method): self.eval_res[m] = sc * 100 else: self.eval_res[method] = score * 100
def get_scorers(self): # from pycoco_scorers_vizseq import BLEUScorerAll from pycocoevalcap.bleu.bleu import Bleu # from pycocoevalcap.spice.spice import Spice from pycocoevalcap.cider.cider import Cider from pycocoevalcap.rouge.rouge import Rouge from pycocoevalcap.meteor.meteor import Meteor from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer import logging import transformers transformers.tokenization_utils.logger.setLevel(logging.ERROR) transformers.configuration_utils.logger.setLevel(logging.ERROR) transformers.modeling_utils.logger.setLevel(logging.ERROR) Scorer_ = namedtuple("Scorer_", ["cls_fn", "to_init", "out_str"]) self.scorer_dict = { "bleu": Scorer_(Bleu(4, verbose=0), False, ["bleu@1", "bleu@2", "bleu@3", "bleu@4"]), "meteor": Scorer_(Meteor(), False, ["meteor"]), "cider": Scorer_(Cider("corpus"), False, ["cider"]), "rouge": Scorer_(Rouge(), False, ["rouge"]), # "spice": Scorer_(Spice(), False, ["spice"]), "bert_score": Scorer_(BertScoreSimple, True, ["bert_score"]), } self.tokenizer = PTBTokenizer()
def score(num, DIR): print("Testing results on epoch ", num, " in DIR=", DIR) print("Loading coco annotations") dataDir = '.' dataType = 'val2014' algName = 'fakecap' annFile = '%s/annotations/captions_%s.json' % (dataDir, dataType) subtypes = ['results', 'evalImgs', 'eval'] [resFile, evalImgsFile, evalFile]= \ ['%s/results/captions_%s_%s_%s.json'%(dataDir,dataType,algName,subtype) for subtype in subtypes] coco_anns = COCO(annFile) print("COCO anns imported") path = DIR + str(num) + '_test_result.tar.gz' save = pickle.load(open(path)) cocoRes = {} coco = {} for key, val in save.items(): reslst = val[u'res'] res = [] for data in reslst: if data != u'<SEND>': res.append(data) else: break res = res[1:] #print "RES: ",reslst #print "ANN: ", val[u'ann'] #res = [word for word in res if word!=u'<SEND>'][1:] #print "RES FIXED: ", res if len(res) == 0: res = [u'a'] #just not to be empty, and it has low low idf cocoRes[key] = [{u'caption': ' '.join(res)}] #coco[key] = [{u'caption':' '.join(val[u'ann'][1:-1])}] coco[key] = coco_anns.imgToAnns[key] print 'examples' for key in coco.keys()[:5]: print "IMG_NUM=", key print "Annotation: ", '\n'.join( [coco[key][i][u'caption'] for i in range(len(coco[key]))]) print "Generated data: ", ' '.join(save[key][u'res']) print "Cleared generation: ", cocoRes[key][0][u'caption'] print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(coco) res = tokenizer.tokenize(cocoRes) print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) print(score)
def eval_div_stats(dataset, preds_n, model_id, split): tokenizer = PTBTokenizer() capsById = {} for i, d in enumerate(preds_n): d['id'] = i capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d] n_caps_perimg = len(capsById[list(capsById.keys())[0]]) print(n_caps_perimg) _capsById = capsById # save the untokenized version capsById = tokenizer.tokenize(capsById) div_1, adiv_1 = compute_div_n(capsById, 1) div_2, adiv_2 = compute_div_n(capsById, 2) globdiv_1, _ = compute_global_div_n(capsById, 1) print( 'Diversity Statistics are as follows: \n Div1: %.2f, Div2: %.2f, gDiv1: %d\n' % (div_1, div_2, globdiv_1)) # compute mbleu scorer = Bleu(4) all_scrs = [] scrperimg = np.zeros((n_caps_perimg, len(capsById))) for i in range(n_caps_perimg): tempRefsById = {} candsById = {} for k in capsById: tempRefsById[k] = capsById[k][:i] + capsById[k][i + 1:] candsById[k] = [capsById[k][i]] score, scores = scorer.compute_score(tempRefsById, candsById) all_scrs.append(score) scrperimg[i, :] = scores[1] all_scrs = np.array(all_scrs) out = {} out['overall'] = {'Div1': div_1, 'Div2': div_2, 'gDiv1': globdiv_1} for k, score in zip(range(4), all_scrs.mean(axis=0).tolist()): out['overall'].update({'mBLeu_%d' % (k + 1): score}) imgToEval = {} for i, imgid in enumerate(capsById.keys()): imgToEval[imgid] = {'mBleu_2': scrperimg[:, i].mean()} imgToEval[imgid]['individuals'] = [] for j, d in enumerate(_capsById[imgid]): imgToEval[imgid]['individuals'].append(preds_n[d['id']]) imgToEval[imgid]['individuals'][-1]['mBleu_2'] = scrperimg[j, i] out['ImgToEval'] = imgToEval print( 'Mean mutual Bleu scores on this set is:\nmBLeu_1, mBLeu_2, mBLeu_3, mBLeu_4' ) print(all_scrs.mean(axis=0)) return out
def initializeTokenizer(self): groundTruthCaptions = self.actualCaptions predictedCaptions = self.predictedCaptions #Tokenize tokenizer = PTBTokenizer() self.gtc_tokens = tokenizer.tokenize(groundTruthCaptions) self.pc_tokens = tokenizer.tokenize(predictedCaptions)
def __init__(self, preds, gts, gt_vid, verbose=False): self.pred_keys = ['results'] # self.pred_keys = ['results', 'version', 'external_data'] self.verbose = verbose self.preds = preds self.gts = gts self.gt_vids = gt_vid self.tokenizer = PTBTokenizer()
def __init__(self): # The following script requires Java 1.8.0 and pycocotools installed. # The pycocoevalcap can be installed with pip from M4C-Captioner's Github repo # but has no python3 support yet. from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer from pycocoevalcap.bleu.bleu import Bleu self.tokenizer = PTBTokenizer() self.scorer = Bleu(4)
def __init__(self): # The following script requires Java 1.8.0 and pycocotools installed. # The pycocoevalcap can be installed with pip as # pip install git+https://github.com/ronghanghu/coco-caption.git@python23 # Original pycocoevalcap code is at https://github.com/tylin/coco-caption # but has no python3 support yet. from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer from pycocoevalcap.bleu.bleu import Bleu self.tokenizer = PTBTokenizer() self.scorer = Bleu(4)
def __init__(self, d_model, encoder, vocab_trg, d_hidden=2048, n_layers=6, n_heads=8, drop_ratio=0.1): super().__init__() # self.encoder = Encoder(d_model, d_hidden, n_vocab_src, n_layers, # n_heads, drop_ratio) self.encoder = encoder self.decoder = Decoder(d_model, d_hidden, vocab_trg, n_layers, n_heads, drop_ratio) self.n_layers = n_layers self.tokenizer = PTBTokenizer()
def evaluate_tiou(self, tiou): # For every prediction, find it's respective references with tIoU > the passed in argument. res = {} gts = {} unique_index = 0 for vid_id in self.prediction: for pred in self.prediction[vid_id]: res[unique_index] = [{'caption': pred['sentence']}] matches = [] for gt in self.ground_truths: refs = gt[vid_id] for ref_i, ref_timestamp in enumerate(refs['timestamps']): if self.iou(pred['timestamp'], ref_timestamp) > tiou: matches.append(refs['sentences'][ref_i]) if len(matches) == 0: gts[unique_index] = [{'caption': 'abc123!@#'}] else: gts[unique_index] = [{'caption': v} for v in matches] unique_index += 1 # Set up scorers if self.verbose: print '| Tokenizing ...' # Suppressing tokenizer output tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # Set up scorers if self.verbose: print '| Setting up scorers ...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # Compute scores output = {} for scorer, method in scorers: if self.verbose: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): output[m] = sc if self.verbose: print "Calculated tIoU: %1.1f, %s: %0.3f" % (tiou, m, sc) else: output[method] = score if self.verbose: print "Calculated tIoU: %1.1f, %s: %0.3f" % (tiou, method, score) return output
def score(self, GT, RES, IDs): self.eval = {} self.imgToEval = {} gts = {} res = {} for ID in IDs: # print ID gts[ID] = GT[ID] res[ID] = RES[ID] print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), # (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= eval = {} sub_category_score = None for scorer, method in scorers: print('computing %s score...' % (scorer.method())) if method == 'SPICE': score, scores, sub_category_score = scorer.compute_score( gts, res) else: score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, IDs, m) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, IDs, method) print("%s: %0.3f" % (method, score)) # for metric, score in self.eval.items(): # print '%s: %.3f'%(metric, score) return self.eval, sub_category_score
def score(gts, res, ids, log_out): tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] for scorer, method in scorers: # print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): print >> log_out, "%s: %f" % (m, sc) else: print >> log_out, "%s: %f" % (method, score)
def score(self, GT, RES, IDs, result_file): self.eval = {} self.imgToEval = {} gts = {} res = {} for ID in IDs: # print ID gts[ID] = GT[ID] res[ID] = RES[ID] print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # result_file = '/home/anguyen/workspace/paper_src/2018.icra.v2c.source/output/' + net_id + '/prediction/score_result.txt' print 'RESULT FILE: ', result_file fwriter = open(result_file, 'w') # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, IDs, m) print "%s: %0.3f" % (m, sc) fwriter.write("%s %0.3f\n" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, IDs, method) print "%s: %0.3f" % (method, score) fwriter.write("%s %0.3f\n" % (method, score)) #for metric, score in self.eval.items(): # print '%s: %.3f'%(metric, score) return self.eval
def score(self, GT, RES, IDs): self.eval = {} self.imgToEval = {} gts = {} res = {} for ID in IDs: gts[ID] = GT[ID] res[ID] = RES[ID] print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) with open('all_samples.txt', 'w') as f: for i in res.keys(): print >> f, 'valid stuff' print >> f, '\t'.join(res[i]) print >> f, 'ground truth' print >> f, '\n'.join(gts[i]) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), # (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...' % (scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, IDs, m) print "%s: %0.3f" % (m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, IDs, method) print "%s: %0.3f" % (method, score) for metric, score in self.eval.items(): print '%s: %.3f' % (metric, score) return self.eval
def create_tokcap(data_folder=DATA_FOLDER): cap = COCO(COCO_TRAIN_CAP_FILE) listedCapMap = {} for i in cap.loadAnns(cap.getAnnIds()): listedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])] tokenizedListedCapMap = PTBTokenizer().tokenize(listedCapMap) tokcap = [] #map caption ids to a map of its tokenized caption and image id for i, j in tokenizedListedCapMap.iteritems(): tokcap += [(i, dict([('caption', j[0]), ('image_id', listedCapMap[i][0]['image_id'])]))] f = open(data_folder + '/preprocessed/tokcap.json', 'w') json.dump(tokcap, f) f.close()
def evaluate(self): imgIds = self.params['image_id'] # imgIds = self.coco.getImgIds() gts = {} res = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] res[imgId] = self.cocoRes.imgToAnns[imgId] # ================================================= # Set up scorers # ================================================= print('you') print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: print('computing %s score...'%(scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) print("%s: %0.3f"%(m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) print("%s: %0.3f"%(method, score)) self.setEvalImgs()
def score(self, GT, RES, IDs): self.eval = {} self.imgToEval = {} gts = {} res = {} for ID in IDs: gts[ID] = GT[ID] res[ID] = RES[ID] print('tokenization...') tokenizer = PTBTokenizer() ''' print("gts: ") for key in gts: print(key) for value in gts[key]: print(value) ''' gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr")] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, IDs, m) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, IDs, method) print("%s: %0.3f" % (method, score)) for metric, score in self.eval.items(): print('%s: %.3f' % (metric, score)) return self.eval
def eval_self_cider(dataset, preds_n, model_id, split): cache_path = os.path.join('eval_results/', model_id + '_' + split + '_n.json') coco = getCOCO(dataset) valids = coco.getImgIds() # Get Cider_scorer Cider_scorer = Cider(df='corpus') tokenizer = PTBTokenizer() gts = {} for imgId in valids: gts[imgId] = coco.imgToAnns[imgId] gts = tokenizer.tokenize(gts) for imgId in valids: Cider_scorer.cider_scorer += (None, gts[imgId]) Cider_scorer.cider_scorer.compute_doc_freq() Cider_scorer.cider_scorer.ref_len = np.log( float(len(Cider_scorer.cider_scorer.crefs))) # Prepare captions capsById = {} for d in preds_n: capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d] capsById = tokenizer.tokenize(capsById) imgIds = list(capsById.keys()) scores = Cider_scorer.my_self_cider([capsById[_] for _ in imgIds]) def get_div(eigvals): eigvals = np.clip(eigvals, 0, None) return -np.log(np.sqrt(eigvals[-1]) / (np.sqrt(eigvals).sum())) / np.log(len(eigvals)) sc_scores = [get_div(np.linalg.eigvalsh(_ / 10)) for _ in scores] score = np.mean(np.array(sc_scores)) imgToEval = {} for i, image_id in enumerate(imgIds): imgToEval[image_id] = { 'self_cider': sc_scores[i], 'self_cider_mat': scores[i].tolist() } return {'overall': {'self_cider': score}, 'imgToEval': imgToEval} return score
class TextCapsBleu4Evaluator: def __init__(self): # The following script requires Java 1.8.0 and pycocotools installed. # The pycocoevalcap can be installed with pip as # pip install git+https://github.com/ronghanghu/coco-caption.git@python23 # Original pycocoevalcap code is at https://github.com/tylin/coco-caption # but has no python3 support yet. try: from pycocoevalcap.bleu.bleu import Bleu from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer except ModuleNotFoundError: print( "Please install pycocoevalcap module using " "pip install git+https://github.com/ronghanghu/coco-caption.git@python23" # noqa ) raise self.tokenizer = PTBTokenizer() self.scorer = Bleu(4) def eval_pred_list(self, pred_list): # Create reference and hypotheses captions. gts = {} res = {} for idx, entry in enumerate(pred_list): gts[idx] = [{"caption": a} for a in entry["gt_answers"]] res[idx] = [{"caption": entry["pred_answer"]}] gts = self.tokenizer.tokenize(gts) res = self.tokenizer.tokenize(res) score, _ = self.scorer.compute_score(gts, res) bleu4 = score[3] # score is (Bleu-1, Bleu-2, Bleu-3, Bleu-4) return bleu4
def __init__(self, ground_truth_filenames=None, prediction_filename=None, verbose=False, all_scorer=False): # Check that the gt and submission files exist and load them if not ground_truth_filenames: raise IOError('Please input a valid ground truth file.') if not prediction_filename: raise IOError('Please input a valid prediction file.') self.verbose = verbose self.all_scorer = all_scorer self.ground_truths = self.import_ground_truths(ground_truth_filenames) self.prediction = self.import_prediction(prediction_filename) self.tokenizer = PTBTokenizer() # Set up scorers, if not verbose, we only use the one we're # testing on: METEOR if self.verbose or self.all_scorer: self.scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] else: self.scorers = [(Meteor(), "METEOR")]
def __init__(self, data, soda_type="c", tious=None, scorer="Meteor", verbose=False): #self.data = data self.preds = data.preds self.gts = data.gts self.gt_vids = data.gt_vids self.soda_type = soda_type self.tious = [0.0] if tious is None else tious self.tokenizer = PTBTokenizer() if scorer == "BertScore": from nlpeval.bert_r_score import BertScore self.scorer = eval(scorer)() self.scorer_name = scorer self.verbose = verbose if soda_type == "a": # averaging F-measure scores with IoU threshold = 0.9, 0.7, 0.5, 0.3 self.soda_func = self.soda_a elif soda_type == "b": # F-measure, where IoU threshold is set to 0. self.soda_func = self.soda_b elif soda_type == "c": # F-measure, utilizing the IoU x METEOR score self.soda_func = self.soda_c elif soda_type == "d": # F-measure of IoU score self.soda_func = self.soda_d class Dummy: def compute_score(self, x, y): return [0, 0] self.scorer = Dummy() else: raise NotImplementedError
def score(self, GT, RES, IDs): self.eval = {} self.imgToEval = {} gts = {} res = {} for ID in IDs: # print ID gts[ID] = GT[ID] res[ID] = RES[ID] print 'tokenization...' tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print 'setting up scorers...' scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print 'computing %s score...'%(scorer.method()) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, IDs, m) print "%s: %0.3f"%(m, sc) else: self.setEval(score, method) self.setImgToEvalImgs(scores, IDs, method) print "%s: %0.3f"%(method, score) #for metric, score in self.eval.items(): # print '%s: %.3f'%(metric, score) return self.eval
def create_valtokcap(data_folder=DATA_FOLDER): import gc gc.collect() vcap = COCO(COCO_VALID_CAP_FILE) valimgids, tesimgids = getValimgids(), getTesimgids() valcap = [] for i in valimgids: valcap += vcap.imgToAnns[i] tescap = [] for i in tesimgids: tescap += vcap.imgToAnns[i] vallistedCapMap = {} for i in valcap: vallistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])] valtokenizedListedCapMap = PTBTokenizer().tokenize(vallistedCapMap) teslistedCapMap = {} for i in tescap: teslistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])] testokenizedListedCapMap = PTBTokenizer().tokenize(teslistedCapMap) valtokcap = [] #map caption ids to a map of its tokenized caption and image id for i, j in valtokenizedListedCapMap.iteritems(): valtokcap += [(i, dict([('caption', j[0]), ('image_id', vallistedCapMap[i][0]['image_id'])]))] testokcap = [] for i, j in testokenizedListedCapMap.iteritems(): testokcap += [(i, dict([('caption', j[0]), ('image_id', teslistedCapMap[i][0]['image_id'])]))] f = open(data_folder + '/preprocessed/valtokcap.json', 'w') json.dump(valtokcap, f) f.close() f = open(data_folder + '/preprocessed/testokcap.json', 'w') json.dump(testokcap, f) f.close()