def compute_score(gts, val_caps, train_imgids, val_imgids, i, j): res = {} for imgid in train_imgids: res[imgid] = [val_caps[val_imgids[i]][j]] scorer = Bleu(4) score, scores = scorer.compute_score(gts, res, train_imgids) return np.array(scores)
#spaceで区切ったのを入れればOK. ground_truth = {} ground_truth['262148'] = [ 'オレンジ色 の シャツ を 着た 人 が います', 'オレンジ色 の Tシャツ を 着ている 人 が 立って います', ] #prediceted は一つだけじゃないとダメ predicted = {} predicted['262148'] = ['人 が オレンジ色 の シャツ を 着て 立って います'] #keyは数字でも文字列でもどっちでもいいけど、ground truth と predicedで対応が取れるように! #compute blue scorer = Bleu(4) score, scores = scorer.compute_score(ground_truth, predicted) print(scores) for i, value in enumerate(scores): print( i, np.mean(value) ) # not same. Blue does not use standard mean.some weighted geometric mean? #meter requires other thesaurus to #compute Rouge scorer = Rouge() score, scores = scorer.compute_score(ground_truth, predicted) print(score) print(np.mean(scores)) #compute CIDEr
class CaptionEvaluater(object): def __init__(self, ): self.blue_scorer = Bleu(4) self.rouge_scorer = Rouge() self.cider_scorer = Cider() self.truth = None remove = string.punctuation + "、。,." self.remove_pattern = r"[{}]".format(remove) # create the pattern def remove_punctuation(self, line): #I am not sure how unicode works in python, so just in case. line = line.replace(u"<unk>", "") line = line.replace("<unk>", "") line = line.replace(u"。", "") line = line.replace('\u3002', "") return re.sub(self.remove_pattern, "", line) def trnasform_utf8(self, line): # return u' '.join(line).encode('utf-8').strip() return line def set_ground_truth(self, ground_truth): ''' ground_truth should be a python dictonary whose shape is; {"image_identifier": ["a caption", "a similar caption", ...], ...} "image_identifier" can be either string or number. ''' for img in ground_truth: # ground_truth[img]=map(self.trnasform_utf8,ground_truth[img]) ground_truth[img] = map(self.remove_punctuation, ground_truth[img]) self.truth = ground_truth def evaluate(self, predicetd_captions): ''' predicetd_captions should be a python dictonary whose shape is; {"image_identifier": ["the prediced caption"], ...} "image_identifier" need to be same as used in ground truth. make sure the number of caption is only one, even though it uses python list. ''' for img in predicetd_captions: # predicetd_captions[img]=map(self.trnasform_utf8,predicetd_captions[img]) predicetd_captions[img] = map(self.remove_punctuation, predicetd_captions[img]) results = {} for i, score in enumerate(self.get_bleu(predicetd_captions)[0]): results["bleu-%d" % i] = score results["rouge"] = self.get_rouge(predicetd_captions)[0] results["cider"] = self.get_cider(predicetd_captions)[0] return results def get_bleu(self, predicetd_captions): score, scores = self.blue_scorer.compute_score(self.truth, predicetd_captions) #output is a python list [bleu-1,bleu-2,bleu-3,bleu-4] return score, scores def get_rouge(self, predicetd_captions): score, scores = self.rouge_scorer.compute_score( self.truth, predicetd_captions) return score, scores def get_cider(self, predicetd_captions): score, scores = self.cider_scorer.compute_score( self.truth, predicetd_captions) return score, scores
def calc_bleu(gts, res): bleu = Bleu() score, scores = bleu.compute_score(gts, res) return score, scores
#a[1].append(re.sub(r'[^a-zA-Z0-9 ]+', '', test_cap).lower().strip().split()) #b[1].append(re.sub(r'[^a-zA-Z0-9 ]+', '', ref_cap1).lower().strip().split()) #b[1].append(re.sub(r'[^a-zA-Z0-9 ]+', '', ref_cap2).lower().strip().split()) a[1].append(test_cap) b[1].append(ref_cap1) b[1].append(ref_cap2) print(a) print(b) print("Printed a and b") #bleu_scor = BleuScorer(n=4) #bleu_scor += (a, b) scorer = Bleu(4) score, scores = scorer.compute_score(b, a) print("Score: ") print(score) print("Scores: ") print(scores) method = ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"] print(method) print("Loop Bois!") for sc, scs, m in zip(score, scores, method): print(sc) print(scs) print(m) print("{}: {}".format(m, sc))
bluescorer = Bleu(1) # prepare METEOR scorer from meteor.meteor import Meteor meteorscorer = Meteor() # evaluate how well two captions arriving through stdin correspondend, and write the scores back to stdout if __name__ == '__main__': # read all lines from stdin lines = sys.stdin.readlines() # process each line for line in lines: # ignore empty lines if len(line) == 0: continue # split into test caption and predicted caption split = line.split('\t') if len(split) != 3: print 'ERROR\tinvalid format in line {}'.format(line) break # tokenize both captions testcapt = tokenize(split[0], split[1]) predcapt = tokenize(split[0], split[2]) # compute bleu score bleuscore, tmp1 = bluescorer.compute_score(testcapt, predcapt) bleuscore[0] # compute meteor score meteorscore = 0.0 #meteorscore, tmp2 = meteorscorer.compute_score(testcapt, predcapt) print '{:.2f}\t{:.2f}'.format(bleuscore, meteorscore) sys.stdout.flush()