class Metrics: def __init__(self): from coco_caption.pycocoevalcap.bleu.bleu import Bleu from coco_caption.pycocoevalcap.cider.cider import Cider from coco_caption.pycocoevalcap.rouge.rouge import Rouge from coco_caption.pycocoevalcap.meteor.meteor import Meteor self.bleu = Bleu() self.cider = Cider() self.rouge = Rouge() self.meteor = Meteor() def compute_single_score(self, truth, pred): ''' Computer several metrics :param truth: <String> the ground truth sentence :param pred: <String> predicted sentence :return: score list ''' bleu_gts = {'1': [truth]} bleu_res = {'1': [pred]} bleu_score = self.bleu.compute_score(bleu_gts, bleu_res) rouge_gts = bleu_gts rouge_res = bleu_res rouge_score = self.rouge.compute_score(rouge_gts, rouge_res) return {'BLEU': bleu_score[0], 'ROUGE': rouge_score[0]} def compute_set_score(self, truths, preds): gts = {k: [v] for k, v in truths.items()} res = {k: [v] for k, v in preds.items()} bleu_score = self.bleu.compute_score(gts, res) rouge_score = self.rouge.compute_score(gts, res) cider_score = self.cider.compute_score(gts, res) return { 'BLEU': bleu_score[0], 'ROUGE': rouge_score[0], 'CIDEr': cider_score[0] }
def calc_bleu(generated_captions_fn, target_captions_fn): with open(generated_captions_fn) as f: generated_captions = json.load(f) with open(target_captions_fn) as f: target_captions = json.load(f) id2caption = {meta['image_id']: [meta['caption']] for meta in generated_captions} id2targets = {meta['image_id']: meta['captions'] for meta in target_captions} bleu4 = Bleu(n=4) bleu_scores, _ = bleu4.compute_score(id2targets, id2caption) bleu_scores = [float("%.2f" % elem) for elem in bleu_scores] print("BLEU scores:", bleu_scores) return bleu_scores
def validate(model, data_loader, max_caption_len, print_freq): """ Perform validation of one training epoch. """ word_map = model.decoder.word_map model.eval() target_captions = [] generated_captions = [] coco_ids = [] bleu4 = Bleu(n=4) # Loop over batches for i, (images, all_captions_for_image, _, coco_id) in enumerate(data_loader): images = images.to(device) # Forward propagation decode_lengths = torch.full((images.size(0),), max_caption_len, dtype=torch.int64, device=device) scores, decode_lengths, alphas = model(images, None, decode_lengths) if i % print_freq == 0: logging.info("Validation: [Batch {0}/{1}]\t".format(i, len(data_loader))) # Target captions for j in range(all_captions_for_image.shape[0]): img_captions = [decode_caption(rm_caption_special_tokens(caption, word_map), word_map) for caption in all_captions_for_image[j].tolist()] target_captions.append(img_captions) # Generated captions _, captions = torch.max(scores, dim=2) captions = [decode_caption(rm_caption_special_tokens(caption, word_map), word_map) for caption in captions.tolist()] generated_captions.extend(captions) coco_ids.append(coco_id[0]) assert len(target_captions) == len(generated_captions) id2targets = {coco_ids[ix]: target_captions[ix] for ix in range(len(coco_ids))} id2caption = {coco_ids[ix]: [generated_captions[ix]] for ix in range(len(coco_ids))} bleus, _ = bleu4.compute_score(id2targets, id2caption) bleu = bleus[-1] logging.info("\n * BLEU-4 - {bleu}\n".format(bleu=bleu)) return bleu