Exemple #1
0
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j):
    res = {}
    for imgid in train_imgids:
        res[imgid] = [val_caps[val_imgids[i]][j]]

    scorer = Spice()
    score, scores = scorer.compute_score(gts, res, train_imgids)
    #print(score)
    #print(len(scores))
    return np.array(scores)
Exemple #2
0
def evaluate_captions(res: dict, gts: dict):

    # =================================================
    # Set up scorers
    # =================================================
    print('tokenization...')
    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
               (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"),
               (Spice(), "SPICE")]
    rtn = {}
    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                rtn[m] = sc
        else:
            rtn[method] = score

    return rtn
Exemple #3
0
def evaluator(gts, res):
    eval = {}
    # =================================================
    # Set up scorers
    # =================================================
    print 'tokenization...'
    # Todo: use Spacy for tokenization
    gts = tokenize(gts)
    res = tokenize(res)

    # =================================================
    # Set up scorers
    # =================================================
    print 'setting up scorers...'
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        # (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE")
    ]

    # =================================================
    # Compute scores
    # =================================================
    for scorer, method in scorers:
        print 'computing %s score...' % (scorer.method())
        score, scores = scorer.compute_score(gts, res)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                eval[m] = sc
                print "%s: %0.3f" % (m, sc)
        else:
            eval[method] = score
            print "%s: %0.3f" % (method, score)
Exemple #4
0
    def __init__(self, coco, cocoRes):
        self.evalImgs = []
        self.eval = {}
        self.imgToEval = {}
        self.coco = coco
        self.cocoRes = cocoRes
        self.params = {'image_id': coco.getImgIds()}

        self.Spice = Spice()
Exemple #5
0
    def evaluate(self, gts=None, res=None):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        if gts is None and res is None:
            gts = {}
            res = {}
            for imgId in imgIds:
                gts[imgId] = self.coco.imgToAnns[imgId]
                res[imgId] = self.cocoRes.imgToAnns[imgId]

            print("before")
            print(gts[184321])
            print(res[184321])

            # =================================================
            # Set up scorers
            # =================================================
            print 'tokenization...'
            tokenizer = PTBTokenizer()
            gts = tokenizer.tokenize(gts)
            res = tokenizer.tokenize(res)

        print("after")
        return gts, res
        print(gts[184321])
        print(res[184321])

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr"), (Spice(), "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...' % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemple #6
0
class SpiceEval():
    def __init__(self):
        self.evalImgs = []
        self.eval = {}
        self.imgToEval = {}
        self.spice = Spice()
        self.tokenizer = PTBTokenizer()

    """
    The input have structure
    {'123': [{'image_id':123, 'caption': 'xxxxx'}, {'image_id':123, 'caption': 'yyy'}], ...}
    """

    def evaluate(self, gts, res):
        assert set(gts.keys()) == set(res.keys())
        imgIds = gts.keys()
        gts = self.tokenizer.tokenize(gts)
        res = self.tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================

        # =================================================
        # Compute scores
        # =================================================
        print 'computing %s score...' % (self.spice.method())
        score, scores = self.spice.compute_score(gts, res)
        print "%s: %0.3f" % ("spice", score)
        self.eval['spice'] = score
        print scores
        for imgId, score in zip(sorted(imgIds), scores):
            if not imgId in self.imgToEval:
                self.imgToEval[imgId] = {}
                self.imgToEval[imgId]["image_id"] = imgId
            self.imgToEval[imgId]["spice"] = score
        return self.eval['spice'], self.imgToEval
Exemple #7
0
    def evaluate(self):
        imgIds = self.params["image_id"]
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print "tokenization..."
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print "setting up scorers..."
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            (Spice(), "SPICE"),
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print "computing %s score..." % (scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f" % (m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f" % (method, score)
        self.setEvalImgs()
Exemple #8
0
    def evaluate(self, res, gts):

        # =================================================
        # Set up scorers
        # =================================================
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print "%s: %0.3f"%(method, score)
        self.setEvalImgs()
Exemple #9
0
 def __init__(self):
     self.evalImgs = []
     self.eval = {}
     self.imgToEval = {}
     self.spice = Spice()
     self.tokenizer = PTBTokenizer()
Exemple #10
0
# 1. parse sentence using SPICE and save the parsed information into json file (adapted from create coco_sg.py)
data_path = './data/caption_sentences.txt'
sent_list = [item for item in open(data_path, 'r')]

gts = {}
res = {}
img_ids = []
for img_id, this_sent in enumerate(sent_list):
    gts[img_id] = []
    gts[img_id].append(this_sent)
    res[img_id] = []
    res[img_id].append('place holder')
    img_ids.append(img_id)

scorer = Spice()
score, scores = scorer.compute_score(gts, res)

# 2. extract the parsed triplets from json file (adapted from process_spice_sg.py)
from nltk.stem import WordNetLemmatizer
from functools import partial


def change_word(lem, word_ori):
    """
    Lemmatizer a word, like change 'holding' to 'hold' or
    'cats' to 'cat'
    """
    word_ori = word_ori.lower()
    word_change = lem.lemmatize(word_ori)
    if word_change == word_ori:
Exemple #11
0
                    # print(comparsions_better_than_me)
                    for model in model_index:
                        rank = len(comparsions_better_than_me.get(model, [])) 
                        #print(idx)
                        sum_all_5[model][idx] = rank
                    idx += 1
                    comparsions_better_than_me = defaultdict(list)
                    
        gts = {}
        res = {}

    human_ranks = [sum_all_1,sum_all_2,sum_all_3,sum_all_4,sum_all_4]
    models = ['Leven-Const','GPT-2','BERT-Gen','UniLM','BART','T5']
    scorers = [
        (Bleu(4), "Bleu_4"),
        (Meteor(), "METEOR"),
        # (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE")
    ]
    #scorers = [Bleu(4),Meteor(),Cider(),Spice()]

    with codecs.open('human_eval.jsonl', encoding='utf-8') as f:
        lines = f.readlines()
    for scorer,method in scorers:
        agg = compute_agreement(scorer,lines)
        print "The averaged Kendall's tau of %s with 5 human annotators is %s" % (method,str(agg))
        


Exemple #12
0
def calc_spice(gts, res):
    spice = Spice()
    score, scores = spice.compute_score(gts, res)
    return score, scores