Beispiel #1
0
    def __init__(self):
        from coco_caption.pycocoevalcap.bleu.bleu import Bleu
        from coco_caption.pycocoevalcap.cider.cider import Cider
        from coco_caption.pycocoevalcap.rouge.rouge import Rouge
        from coco_caption.pycocoevalcap.meteor.meteor import Meteor

        self.bleu = Bleu()
        self.cider = Cider()
        self.rouge = Rouge()
        self.meteor = Meteor()
Beispiel #2
0
def calc_bleu(generated_captions_fn, target_captions_fn):

    with open(generated_captions_fn) as f:
        generated_captions = json.load(f)
    with open(target_captions_fn) as f:
        target_captions = json.load(f)
    id2caption = {meta['image_id']: [meta['caption']] for meta in generated_captions}
    id2targets = {meta['image_id']: meta['captions'] for meta in target_captions}

    bleu4 = Bleu(n=4)
    bleu_scores, _ = bleu4.compute_score(id2targets, id2caption)
    bleu_scores = [float("%.2f" % elem) for elem in bleu_scores]
    print("BLEU scores:", bleu_scores)
    return bleu_scores
Beispiel #3
0
def init_scorer(cached_tokens):
    global CiderD_scorer
    CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
    global Cider_scorer
    Cider_scorer = Cider_scorer or Cider(df=cached_tokens)
    global Bleu_scorer
    Bleu_scorer = Bleu_scorer or Bleu(4)
Beispiel #4
0
def validate(model, data_loader, max_caption_len, print_freq):
    """
    Perform validation of one training epoch.

    """
    word_map = model.decoder.word_map
    model.eval()

    target_captions = []
    generated_captions = []
    coco_ids = []
    bleu4 = Bleu(n=4)

    # Loop over batches
    for i, (images, all_captions_for_image, _, coco_id) in enumerate(data_loader):
        images = images.to(device)

        # Forward propagation
        decode_lengths = torch.full((images.size(0),), max_caption_len, dtype=torch.int64, device=device)
        scores, decode_lengths, alphas = model(images, None, decode_lengths)

        if i % print_freq == 0:
            logging.info("Validation: [Batch {0}/{1}]\t".format(i, len(data_loader)))

        # Target captions
        for j in range(all_captions_for_image.shape[0]):
            img_captions = [decode_caption(rm_caption_special_tokens(caption, word_map), word_map)
                            for caption in all_captions_for_image[j].tolist()]
            target_captions.append(img_captions)

        # Generated captions
        _, captions = torch.max(scores, dim=2)
        captions = [decode_caption(rm_caption_special_tokens(caption, word_map), word_map)
                    for caption in captions.tolist()]
        generated_captions.extend(captions)

        coco_ids.append(coco_id[0])

        assert len(target_captions) == len(generated_captions)

    id2targets = {coco_ids[ix]: target_captions[ix] for ix in range(len(coco_ids))}
    id2caption = {coco_ids[ix]: [generated_captions[ix]] for ix in range(len(coco_ids))}
    bleus, _ = bleu4.compute_score(id2targets, id2caption)
    bleu = bleus[-1]

    logging.info("\n * BLEU-4 - {bleu}\n".format(bleu=bleu))
    return bleu
Beispiel #5
0
class Metrics:
    def __init__(self):
        from coco_caption.pycocoevalcap.bleu.bleu import Bleu
        from coco_caption.pycocoevalcap.cider.cider import Cider
        from coco_caption.pycocoevalcap.rouge.rouge import Rouge
        from coco_caption.pycocoevalcap.meteor.meteor import Meteor

        self.bleu = Bleu()
        self.cider = Cider()
        self.rouge = Rouge()
        self.meteor = Meteor()

    def compute_single_score(self, truth, pred):
        '''
        Computer several metrics
        :param truth: <String> the ground truth sentence
        :param pred:  <String> predicted sentence
        :return: score list
        '''
        bleu_gts = {'1': [truth]}
        bleu_res = {'1': [pred]}
        bleu_score = self.bleu.compute_score(bleu_gts, bleu_res)

        rouge_gts = bleu_gts
        rouge_res = bleu_res
        rouge_score = self.rouge.compute_score(rouge_gts, rouge_res)

        return {'BLEU': bleu_score[0], 'ROUGE': rouge_score[0]}

    def compute_set_score(self, truths, preds):
        gts = {k: [v] for k, v in truths.items()}
        res = {k: [v] for k, v in preds.items()}

        bleu_score = self.bleu.compute_score(gts, res)
        rouge_score = self.rouge.compute_score(gts, res)
        cider_score = self.cider.compute_score(gts, res)

        return {
            'BLEU': bleu_score[0],
            'ROUGE': rouge_score[0],
            'CIDEr': cider_score[0]
        }
def score(ref, hypo):
    scorers = [
        (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"])
        ,(Meteor(),"METEOR")
#         ,(Rouge(),"ROUGE_L"),
#         (Cider(),"CIDEr")
    ]
    final_scores = {}
    for scorer,method in scorers:
        score,scores = scorer.compute_score(ref,hypo)
        if type(score)==list:
            for m,s in zip(method,score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores
Beispiel #7
0
    def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            #            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        # get token
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            # (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print("%s: %0.3f" % (method, score))

        # for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval
decoder.load_state_dict(ckpt['decoder'])
decoder = decoder.cuda()
decoder.eval()

module = decoder.lstm_base
w = getattr(module, 'weight_hh_raw')
del module._parameters['weight_hh_raw']
module.register_parameter('weight_hh', Parameter(w))

# Normalization
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# Different metrics
scorer = {
    'BLEU': Bleu(),
    'Meteor': Meteor(),
    'Rouge': Rouge(),
    'Cider': Cider(),
    'WMD': WMD(),
    # 'Spice': Spice()
}


def evaluate(beam_size=3):
    # Dataloader
    val_dl = DataLoader(AVACaptioningDataset(data_fd=dataset_path,
                                             mode=mode,
                                             transform=transforms.Compose(
                                                 [normalize])),
                        batch_size=1,
Beispiel #9
0
    def evaluate(self, verbose=True):
        audioIds = self.params['audio_id']
        # audioIds = self.coco.getAudioIds()
        gts = {}
        res = {}
        for audioId in audioIds:
            gts[audioId] = self.coco.audioToAnns[audioId]
            res[audioId] = self.cocoRes.audioToAnns[audioId]

        # =================================================
        # Set up scorers
        # =================================================
        if verbose:
            print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        if verbose:
            print('setting up scorers...')
        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                   (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"),
                   (Cider(), "CIDEr"), (Spice(), "SPICE")]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            if verbose:
                print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setAudioToEvalAudios(scs, gts.keys(), m)
                    if verbose:
                        print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setAudioToEvalAudios(scores, gts.keys(), method)
                if verbose:
                    print("%s: %0.3f" % (method, score))

        # Compute SPIDEr metric (average of CIDEr and SPICE)
        if verbose:
            print('computing %s score...' % ('SPIDEr'))
        score = (self.eval['CIDEr'] + self.eval['SPICE']) / 2.
        scores = list(
            (np.array([audio['CIDEr']
                       for audio in self.audioToEval.values()]) + np.array([
                           audio['SPICE']['All']['f']
                           for audio in self.audioToEval.values()
                       ])) / 2)
        self.setEval(score, 'SPIDEr')
        self.setAudioToEvalAudios(scores, gts.keys(), 'SPIDEr')
        if verbose:
            print("%s: %0.3f" % ('SPIDEr', score))

        self.setEvalAudios()