def __init__(self): from coco_caption.pycocoevalcap.bleu.bleu import Bleu from coco_caption.pycocoevalcap.cider.cider import Cider from coco_caption.pycocoevalcap.rouge.rouge import Rouge from coco_caption.pycocoevalcap.meteor.meteor import Meteor self.bleu = Bleu() self.cider = Cider() self.rouge = Rouge() self.meteor = Meteor()
def calc_bleu(generated_captions_fn, target_captions_fn): with open(generated_captions_fn) as f: generated_captions = json.load(f) with open(target_captions_fn) as f: target_captions = json.load(f) id2caption = {meta['image_id']: [meta['caption']] for meta in generated_captions} id2targets = {meta['image_id']: meta['captions'] for meta in target_captions} bleu4 = Bleu(n=4) bleu_scores, _ = bleu4.compute_score(id2targets, id2caption) bleu_scores = [float("%.2f" % elem) for elem in bleu_scores] print("BLEU scores:", bleu_scores) return bleu_scores
def init_scorer(cached_tokens): global CiderD_scorer CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens) global Cider_scorer Cider_scorer = Cider_scorer or Cider(df=cached_tokens) global Bleu_scorer Bleu_scorer = Bleu_scorer or Bleu(4)
def validate(model, data_loader, max_caption_len, print_freq): """ Perform validation of one training epoch. """ word_map = model.decoder.word_map model.eval() target_captions = [] generated_captions = [] coco_ids = [] bleu4 = Bleu(n=4) # Loop over batches for i, (images, all_captions_for_image, _, coco_id) in enumerate(data_loader): images = images.to(device) # Forward propagation decode_lengths = torch.full((images.size(0),), max_caption_len, dtype=torch.int64, device=device) scores, decode_lengths, alphas = model(images, None, decode_lengths) if i % print_freq == 0: logging.info("Validation: [Batch {0}/{1}]\t".format(i, len(data_loader))) # Target captions for j in range(all_captions_for_image.shape[0]): img_captions = [decode_caption(rm_caption_special_tokens(caption, word_map), word_map) for caption in all_captions_for_image[j].tolist()] target_captions.append(img_captions) # Generated captions _, captions = torch.max(scores, dim=2) captions = [decode_caption(rm_caption_special_tokens(caption, word_map), word_map) for caption in captions.tolist()] generated_captions.extend(captions) coco_ids.append(coco_id[0]) assert len(target_captions) == len(generated_captions) id2targets = {coco_ids[ix]: target_captions[ix] for ix in range(len(coco_ids))} id2caption = {coco_ids[ix]: [generated_captions[ix]] for ix in range(len(coco_ids))} bleus, _ = bleu4.compute_score(id2targets, id2caption) bleu = bleus[-1] logging.info("\n * BLEU-4 - {bleu}\n".format(bleu=bleu)) return bleu
class Metrics: def __init__(self): from coco_caption.pycocoevalcap.bleu.bleu import Bleu from coco_caption.pycocoevalcap.cider.cider import Cider from coco_caption.pycocoevalcap.rouge.rouge import Rouge from coco_caption.pycocoevalcap.meteor.meteor import Meteor self.bleu = Bleu() self.cider = Cider() self.rouge = Rouge() self.meteor = Meteor() def compute_single_score(self, truth, pred): ''' Computer several metrics :param truth: <String> the ground truth sentence :param pred: <String> predicted sentence :return: score list ''' bleu_gts = {'1': [truth]} bleu_res = {'1': [pred]} bleu_score = self.bleu.compute_score(bleu_gts, bleu_res) rouge_gts = bleu_gts rouge_res = bleu_res rouge_score = self.rouge.compute_score(rouge_gts, rouge_res) return {'BLEU': bleu_score[0], 'ROUGE': rouge_score[0]} def compute_set_score(self, truths, preds): gts = {k: [v] for k, v in truths.items()} res = {k: [v] for k, v in preds.items()} bleu_score = self.bleu.compute_score(gts, res) rouge_score = self.rouge.compute_score(gts, res) cider_score = self.cider.compute_score(gts, res) return { 'BLEU': bleu_score[0], 'ROUGE': rouge_score[0], 'CIDEr': cider_score[0] }
def score(ref, hypo): scorers = [ (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"]) ,(Meteor(),"METEOR") # ,(Rouge(),"ROUGE_L"), # (Cider(),"CIDEr") ] final_scores = {} for scorer,method in scorers: score,scores = scorer.compute_score(ref,hypo) if type(score)==list: for m,s in zip(method,score): final_scores[m] = s else: final_scores[method] = score return final_scores
def score(self, GT, RES, IDs): self.eval = {} self.imgToEval = {} gts = {} res = {} for ID in IDs: # print ID gts[ID] = GT[ID] res[ID] = RES[ID] # get token print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= print('setting up scorers...') scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), # (Spice(), "SPICE") ] # ================================================= # Compute scores # ================================================= eval = {} for scorer, method in scorers: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, IDs, m) print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, IDs, method) print("%s: %0.3f" % (method, score)) # for metric, score in self.eval.items(): # print '%s: %.3f'%(metric, score) return self.eval
decoder.load_state_dict(ckpt['decoder']) decoder = decoder.cuda() decoder.eval() module = decoder.lstm_base w = getattr(module, 'weight_hh_raw') del module._parameters['weight_hh_raw'] module.register_parameter('weight_hh', Parameter(w)) # Normalization normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Different metrics scorer = { 'BLEU': Bleu(), 'Meteor': Meteor(), 'Rouge': Rouge(), 'Cider': Cider(), 'WMD': WMD(), # 'Spice': Spice() } def evaluate(beam_size=3): # Dataloader val_dl = DataLoader(AVACaptioningDataset(data_fd=dataset_path, mode=mode, transform=transforms.Compose( [normalize])), batch_size=1,
def evaluate(self, verbose=True): audioIds = self.params['audio_id'] # audioIds = self.coco.getAudioIds() gts = {} res = {} for audioId in audioIds: gts[audioId] = self.coco.audioToAnns[audioId] res[audioId] = self.cocoRes.audioToAnns[audioId] # ================================================= # Set up scorers # ================================================= if verbose: print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) # ================================================= # Set up scorers # ================================================= if verbose: print('setting up scorers...') scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(), "METEOR"), (Rouge(), "ROUGE_L"), (Cider(), "CIDEr"), (Spice(), "SPICE")] # ================================================= # Compute scores # ================================================= for scorer, method in scorers: if verbose: print('computing %s score...' % (scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setAudioToEvalAudios(scs, gts.keys(), m) if verbose: print("%s: %0.3f" % (m, sc)) else: self.setEval(score, method) self.setAudioToEvalAudios(scores, gts.keys(), method) if verbose: print("%s: %0.3f" % (method, score)) # Compute SPIDEr metric (average of CIDEr and SPICE) if verbose: print('computing %s score...' % ('SPIDEr')) score = (self.eval['CIDEr'] + self.eval['SPICE']) / 2. scores = list( (np.array([audio['CIDEr'] for audio in self.audioToEval.values()]) + np.array([ audio['SPICE']['All']['f'] for audio in self.audioToEval.values() ])) / 2) self.setEval(score, 'SPIDEr') self.setAudioToEvalAudios(scores, gts.keys(), 'SPIDEr') if verbose: print("%s: %0.3f" % ('SPIDEr', score)) self.setEvalAudios()