def compute_score(self, gts, res): """ Main function to compute CIDEr score :param hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence> ref_for_image (dict) : dictionary with key <image> and value <tokenized reference sentence> :return: cider (float) : computed CIDEr score for the corpus """ assert(gts.keys() == res.keys()) imgIds = gts.keys() cider_scorer = CiderScorer(n=self._n, sigma=self._sigma) for id in imgIds: hypo = res[id] ref = gts[id] # Sanity check. assert(type(hypo) is list) assert(len(hypo) == 1) assert(type(ref) is list) assert(len(ref) > 0) cider_scorer += (hypo[0], ref) (score, scores) = cider_scorer.compute_score() return score, scores
def __init__(self, coco, useBleu=False, useCider=False): self.coco = coco self.useBleu = useBleu self.useCider = useCider self.params = {'image_id': coco.getImgIds()} imgIds = self.params['image_id'] gts = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] if self.useBleu: self.b_scorer = BleuScorer() if self.useCider: self.c_scorer = CiderScorer() print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) for imgId in imgIds: ref = gts[imgId] assert (type(ref) is list) assert (len(ref) > 0) if self.useCider: self.c_scorer += (None, ref) if self.useCider: self.c_scorer.compute_doc_freq() assert (len(self.c_scorer.ctest) >= max( self.c_scorer.document_frequency.values()))
def compute_score(self, gts, res): """ Main function to compute CIDEr score :param hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence> ref_for_image (dict) : dictionary with key <image> and value <tokenized reference sentence> :return: cider (float) : computed CIDEr score for the corpus """ #assert(gts.keys() == res.keys()) imgIds = gts.keys() cider_scorer = CiderScorer(n=self._n, sigma=self._sigma) for id in imgIds: hypo = res[id] ref = gts[id] # Sanity check. assert(type(hypo) is list) assert(len(hypo) == 1) assert(type(ref) is list) assert(len(ref) > 0) cider_scorer += (hypo[0], ref) (score, scores) = cider_scorer.compute_score() return score, scores
def compute_score(self, gts, res): """ Main function to compute CIDEr score : param gts (dict) : {image:tokenized reference sentence} : param res (dict) : {image:tokenized candidate sentence} : return: cider (float) : computed CIDEr score for the corpus """ cider_scorer = CiderScorer(n=self._n) for res_id in res: hypo = res_id['caption'] ref = gts[res_id['image_id']] # Sanity check. assert(type(hypo) is list) assert(len(hypo) == 1) assert(type(ref) is list) assert(len(ref) > 0) cider_scorer += (hypo[0], ref) (score, scores) = cider_scorer.compute_score(self._df) return score, scores
def __init__(self, n=4, df="corpus"): """ Initialize the CIDEr scoring function : param n (int): n-gram size : param df (string): specifies where to get the IDF values from takes values 'corpus', 'coco-train' : return: None """ # set cider to sum over 1 to 4-grams self._n = n self._df = df self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
def setup(self, bottom, top): if len(bottom) != 2: raise Exception("Inputs 2 bottom blobs - image_ids and captions.") if len(top) != 4: raise Exception("Outputs 3 top blobs - score_weights, input_sentence, target_sentence, mean_score.") params = ast.literal_eval(self.param_str) self._end_of_sequence = params['end_of_sequence'] self._ignore_label = params['ignore_label'] # Load vocab self._vocab = [] with open(params['vocab_path']) as vocab_file: for word in vocab_file: self._vocab.append(word.lower().strip()) self._cider = CiderScorer(params['gt_caption_paths'])
class evalSentence: def __init__(self, coco, useBleu=False, useCider=False): self.coco = coco self.useBleu = useBleu self.useCider = useCider self.params = {'image_id': coco.getImgIds()} imgIds = self.params['image_id'] gts = {} for imgId in imgIds: gts[imgId] = self.coco.imgToAnns[imgId] if self.useBleu: self.b_scorer = BleuScorer() if self.useCider: self.c_scorer = CiderScorer() print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) for imgId in imgIds: ref = gts[imgId] assert (type(ref) is list) assert (len(ref) > 0) if self.useCider: self.c_scorer += (None, ref) if self.useCider: self.c_scorer.compute_doc_freq() assert (len(self.c_scorer.ctest) >= max( self.c_scorer.document_frequency.values())) def eval_cider(self, test, ref): assert (self.useCider) c_score = self.c_scorer.compute_cider(test, ref) return np.array(c_score) def eval_bleu(self, test, ref): assert (self.useBleu) self.b_scorer.reset_list() for ts, rs in zip(test, ref): self.b_scorer += (ts, rs) b_score, b_scores = self.b_scorer.compute_score() return b_scores[3] # return bleu_4
class Cider: """ Main Class to compute the CIDEr metric """ def __init__(self, n=4, df="corpus"): """ Initialize the CIDEr scoring function : param n (int): n-gram size : param df (string): specifies where to get the IDF values from takes values 'corpus', 'coco-train' : return: None """ # set cider to sum over 1 to 4-grams self._n = n self._df = df self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df) def compute_score(self, gts, res): """ Main function to compute CIDEr score : param gts (dict) : {image:tokenized reference sentence} : param res (dict) : {image:tokenized candidate sentence} : return: cider (float) : computed CIDEr score for the corpus """ # clear all the previous hypos and refs self.cider_scorer.clear() for res_id in res: hypo = res_id['caption'] ref = gts[res_id['image_id']] # Sanity check. assert(type(hypo) is list) assert(len(hypo) == 1) assert(type(ref) is list) assert(len(ref) > 0) self.cider_scorer += (hypo[0], ref) (score, scores) = self.cider_scorer.compute_score() return score, scores def method(self): return "CIDEr"
def compute_score(self, gts, res): """ Main function to compute CIDEr score :param hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence> ref_for_image (dict) : dictionary with key <image> and value <tokenized reference sentence> :return: cider (float) : computed CIDEr score for the corpus """ cider_scorer = CiderScorer(n=self._n, sigma=self._sigma) for hypo,ref in zip(gts, res): cider_scorer += (hypo, ref) (score, scores) = cider_scorer.compute_score() return score
class SCSTLayer(caffe.Layer): """ Self-Critical Sequence Training (SCST) layer. Takes beam search and outputs weights for training. """ def setup(self, bottom, top): if len(bottom) != 2: raise Exception("Inputs 2 bottom blobs - image_ids and captions.") if len(top) != 4: raise Exception("Outputs 3 top blobs - score_weights, input_sentence, target_sentence, mean_score.") params = ast.literal_eval(self.param_str) self._end_of_sequence = params['end_of_sequence'] self._ignore_label = params['ignore_label'] # Load vocab self._vocab = [] with open(params['vocab_path']) as vocab_file: for word in vocab_file: self._vocab.append(word.lower().strip()) self._cider = CiderScorer(params['gt_caption_paths']) def _translate(self, blob): # Results will be lower case, tokenized, without full stop # (to match reference tokenization) caption = []; for ix in blob: next_word = self._vocab[int(ix)] if next_word == '.': break caption.append(next_word) return caption def reshape(self, bottom, top): self._batch_size = bottom[1].shape[0] self._beam_size = bottom[1].shape[2] self._sequence_length = bottom[1].shape[3] top[0].reshape(self._batch_size*self._beam_size, self._sequence_length) top[1].reshape(self._batch_size*self._beam_size, self._sequence_length) top[2].reshape(self._batch_size*self._beam_size, self._sequence_length) top[3].reshape(1) def forward(self, bottom, top): top[1].data[...] = self._end_of_sequence top[2].data[...] = self._ignore_label # Score captions and generate training input and target output image_ids = [] captions = [] for n in range(self._batch_size): for b in range(self._beam_size): image_ids.append(int(bottom[0].data[n][0])) seq = bottom[1].data[n][0][b] captions.append(self._translate(seq)) caption = seq[:len(captions[-1])].tolist() top[1].data[n*self._beam_size+b,1:min(self._sequence_length,len(caption)+1)] = \ caption[:self._sequence_length-1] # input_sentence caption.append(self._end_of_sequence) top[2].data[n*self._beam_size+b,:min(self._sequence_length,len(caption))] = \ caption[:self._sequence_length] # target_sentence raw_scores = np.array(self._cider.compute_scores(image_ids,captions)) # Generate score output for n in range(self._batch_size): baseline = np.mean(raw_scores[n*self._beam_size:(n+1)*self._beam_size]) for b in range(self._beam_size): score = raw_scores[n*self._beam_size+b] top[0].data[n*self._beam_size+b,:] = score - baseline top[3].data[0] = np.mean(raw_scores) def backward(self, top, propagate_down, bottom): """This layer does not propagate gradients.""" pass
class SCSTSamplingLayer(caffe.Layer): """ Self-Critical Sequence Training (SCST) layer. Takes argmax and sampled captions and outputs weights for training. """ def setup(self, bottom, top): if len(bottom) != 2: raise Exception("Inputs 2 bottom blobs - image_ids and captions.") if len(top) != 4: raise Exception("Outputs 4 top blobs - score_weights, target_sentence, mean_score, scores.") params = ast.literal_eval(self.param_str) self._end_of_sequence = params['end_of_sequence'] self._ignore_label = params['ignore_label'] # Load vocab self._vocab = [] with open(params['vocab_path']) as vocab_file: for word in vocab_file: self._vocab.append(word.lower().strip()) self._cider = CiderScorer(params['gt_caption_paths'], include_eos=True) def _translate(self, blob): # Results will be lower case, tokenized, without full stop # (to match reference tokenization) caption = []; for ix in blob: next_word = self._vocab[int(ix)] if next_word == '.': caption.append(next_word) # Include EOS break caption.append(next_word) return caption def reshape(self, bottom, top): self._batch_size = bottom[1].shape[0] self._sequence_length = bottom[1].shape[1] top[0].reshape(self._batch_size, self._sequence_length) top[1].reshape(self._batch_size, self._sequence_length) top[2].reshape(1) top[3].reshape(self._batch_size) def forward(self, bottom, top): top[0].data[...] = 0 top[1].data[...] = self._ignore_label # Score captions and generate target output image_ids = [] captions = [] for n in range(self._batch_size): image_ids.append(int(bottom[0].data[n/2][0])) seq = bottom[1].data[n] captions.append(self._translate(seq)) if n % 2 == 1: # Generate targets caption = seq[:len(captions[-1])].tolist() top[1].data[n,:min(self._sequence_length,len(caption))] = \ caption[:self._sequence_length] # target_sentence raw_scores = self._cider.compute_scores(image_ids,captions) # Generate score weights for n in range(self._batch_size/2): baseline_score = raw_scores[n*2] sample_score = raw_scores[n*2+1] top[3].data[n*2] = baseline_score top[3].data[n*2+1] = sample_score if sample_score > 0: sample_score = math.log(sample_score) if baseline_score > 0: baseline_score = math.log(baseline_score) top[0].data[n*2+1] = max(0.0, sample_score - baseline_score) top[2].data[0] = np.mean(raw_scores[::2]) def backward(self, top, propagate_down, bottom): """This layer does not propagate gradients.""" pass