Ejemplo n.º 1
0
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j):
    res = {}
    for imgid in train_imgids:
        res[imgid] = [val_caps[val_imgids[i]][j]]

    scorer = Bleu(4)
    score, scores = scorer.compute_score(gts, res, train_imgids)
    return np.array(scores)
Ejemplo n.º 2
0
#spaceで区切ったのを入れればOK.
ground_truth = {}
ground_truth['262148'] = [
    'オレンジ色 の シャツ を 着た 人 が います',
    'オレンジ色 の Tシャツ を 着ている 人 が 立って います',
]
#prediceted は一つだけじゃないとダメ
predicted = {}
predicted['262148'] = ['人 が オレンジ色 の シャツ を 着て 立って います']

#keyは数字でも文字列でもどっちでもいいけど、ground truth と predicedで対応が取れるように!

#compute blue
scorer = Bleu(4)
score, scores = scorer.compute_score(ground_truth, predicted)
print(scores)
for i, value in enumerate(scores):
    print(
        i, np.mean(value)
    )  # not same. Blue does not use standard mean.some weighted geometric mean?

#meter requires other thesaurus to

#compute Rouge
scorer = Rouge()
score, scores = scorer.compute_score(ground_truth, predicted)
print(score)
print(np.mean(scores))

#compute CIDEr
Ejemplo n.º 3
0
class CaptionEvaluater(object):
    def __init__(self, ):
        self.blue_scorer = Bleu(4)
        self.rouge_scorer = Rouge()
        self.cider_scorer = Cider()
        self.truth = None
        remove = string.punctuation + "、。,."
        self.remove_pattern = r"[{}]".format(remove)  # create the pattern

    def remove_punctuation(self, line):
        #I am not sure how unicode works in python, so just in case.
        line = line.replace(u"<unk>", "")
        line = line.replace("<unk>", "")
        line = line.replace(u"。", "")
        line = line.replace('\u3002', "")
        return re.sub(self.remove_pattern, "", line)

    def trnasform_utf8(self, line):
        # return u' '.join(line).encode('utf-8').strip()
        return line

    def set_ground_truth(self, ground_truth):
        '''
        ground_truth should be a python dictonary whose shape is; 
            {"image_identifier": ["a caption", "a similar caption", ...], ...}
        "image_identifier" can be either string or number.
        '''
        for img in ground_truth:
            # ground_truth[img]=map(self.trnasform_utf8,ground_truth[img])
            ground_truth[img] = map(self.remove_punctuation, ground_truth[img])
        self.truth = ground_truth

    def evaluate(self, predicetd_captions):
        '''
        predicetd_captions should be a python dictonary whose shape is; 
            {"image_identifier": ["the prediced caption"], ...}
        "image_identifier" need to be same as used in ground truth.
        make sure the number of caption is only one, even though it uses python list. 
        '''
        for img in predicetd_captions:
            # predicetd_captions[img]=map(self.trnasform_utf8,predicetd_captions[img])
            predicetd_captions[img] = map(self.remove_punctuation,
                                          predicetd_captions[img])

        results = {}
        for i, score in enumerate(self.get_bleu(predicetd_captions)[0]):
            results["bleu-%d" % i] = score
        results["rouge"] = self.get_rouge(predicetd_captions)[0]
        results["cider"] = self.get_cider(predicetd_captions)[0]

        return results

    def get_bleu(self, predicetd_captions):
        score, scores = self.blue_scorer.compute_score(self.truth,
                                                       predicetd_captions)
        #output is a python list [bleu-1,bleu-2,bleu-3,bleu-4]
        return score, scores

    def get_rouge(self, predicetd_captions):
        score, scores = self.rouge_scorer.compute_score(
            self.truth, predicetd_captions)
        return score, scores

    def get_cider(self, predicetd_captions):
        score, scores = self.cider_scorer.compute_score(
            self.truth, predicetd_captions)
        return score, scores
Ejemplo n.º 4
0
def calc_bleu(gts, res):
    bleu = Bleu()
    score, scores = bleu.compute_score(gts, res)
    return score, scores
Ejemplo n.º 5
0
#a[1].append(re.sub(r'[^a-zA-Z0-9 ]+', '', test_cap).lower().strip().split())
#b[1].append(re.sub(r'[^a-zA-Z0-9 ]+', '', ref_cap1).lower().strip().split())
#b[1].append(re.sub(r'[^a-zA-Z0-9 ]+', '', ref_cap2).lower().strip().split())
a[1].append(test_cap)
b[1].append(ref_cap1)
b[1].append(ref_cap2)

print(a)
print(b)
print("Printed a and b")

#bleu_scor = BleuScorer(n=4)
#bleu_scor += (a, b)

scorer = Bleu(4)

score, scores = scorer.compute_score(b, a)
print("Score: ")
print(score)
print("Scores: ")
print(scores)

method = ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]
print(method)
print("Loop Bois!")
for sc, scs, m in zip(score, scores, method):
    print(sc)
    print(scs)
    print(m)
    print("{}: {}".format(m, sc))
Ejemplo n.º 6
0
bluescorer = Bleu(1)
# prepare METEOR scorer
from meteor.meteor import Meteor

meteorscorer = Meteor()

# evaluate how well two captions arriving through stdin correspondend, and write the scores back to stdout
if __name__ == '__main__':
    # read all lines from stdin
    lines = sys.stdin.readlines()
    # process each line
    for line in lines:
        # ignore empty lines
        if len(line) == 0:
            continue
        # split into test caption and predicted caption
        split = line.split('\t')
        if len(split) != 3:
            print 'ERROR\tinvalid format in line {}'.format(line)
            break
        # tokenize both captions
        testcapt = tokenize(split[0], split[1])
        predcapt = tokenize(split[0], split[2])
        # compute bleu score
        bleuscore, tmp1 = bluescorer.compute_score(testcapt, predcapt)
        bleuscore[0]
        # compute meteor score
        meteorscore = 0.0  #meteorscore, tmp2 = meteorscorer.compute_score(testcapt, predcapt)
        print '{:.2f}\t{:.2f}'.format(bleuscore, meteorscore)
        sys.stdout.flush()