Example #1
0
    def compute_score(self, gts, res):
        """
        Main function to compute CIDEr score
        :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
                ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
        :return: cider (float) : computed CIDEr score for the corpus 
        """

        assert(gts.keys() == res.keys())
        imgIds = gts.keys()

        cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)

        for id in imgIds:
            hypo = res[id]
            ref = gts[id]

            # Sanity check.
            assert(type(hypo) is list)
            assert(len(hypo) == 1)
            assert(type(ref) is list)
            assert(len(ref) > 0)

            cider_scorer += (hypo[0], ref)

        (score, scores) = cider_scorer.compute_score()

        return score, scores
Example #2
0
    def __init__(self, coco, useBleu=False, useCider=False):
        self.coco = coco
        self.useBleu = useBleu
        self.useCider = useCider
        self.params = {'image_id': coco.getImgIds()}

        imgIds = self.params['image_id']
        gts = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]

        if self.useBleu:
            self.b_scorer = BleuScorer()
        if self.useCider:
            self.c_scorer = CiderScorer()

        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)

        for imgId in imgIds:
            ref = gts[imgId]

            assert (type(ref) is list)
            assert (len(ref) > 0)

            if self.useCider:
                self.c_scorer += (None, ref)

        if self.useCider:
            self.c_scorer.compute_doc_freq()
            assert (len(self.c_scorer.ctest) >= max(
                self.c_scorer.document_frequency.values()))
Example #3
0
    def compute_score(self, gts, res):
        """
        Main function to compute CIDEr score
        :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
                ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
        :return: cider (float) : computed CIDEr score for the corpus 
        """

        #assert(gts.keys() == res.keys())
        imgIds = gts.keys()

        cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)

        for id in imgIds:
            hypo = res[id]
            ref = gts[id]

            # Sanity check.
            assert(type(hypo) is list)
            assert(len(hypo) == 1)
            assert(type(ref) is list)
            assert(len(ref) > 0)

            cider_scorer += (hypo[0], ref)

        (score, scores) = cider_scorer.compute_score()

        return score, scores
Example #4
0
    def compute_score(self, gts, res):
        """
        Main function to compute CIDEr score
        : param  gts (dict) : {image:tokenized reference sentence}
        : param res (dict)  : {image:tokenized candidate sentence}
        : return: cider (float) : computed CIDEr score for the corpus
        """

        cider_scorer = CiderScorer(n=self._n)

        for res_id in res:

            hypo = res_id['caption']
            ref = gts[res_id['image_id']]

            # Sanity check.
            assert(type(hypo) is list)
            assert(len(hypo) == 1)
            assert(type(ref) is list)
            assert(len(ref) > 0)
            cider_scorer += (hypo[0], ref)

        (score, scores) = cider_scorer.compute_score(self._df)

        return score, scores
Example #5
0
 def __init__(self, n=4, df="corpus"):
     """
     Initialize the CIDEr scoring function
     : param n (int): n-gram size
     : param df (string): specifies where to get the IDF values from
                 takes values 'corpus', 'coco-train'
     : return: None
     """
     # set cider to sum over 1 to 4-grams
     self._n = n
     self._df = df
     self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
Example #6
0
 def setup(self, bottom, top):
   if len(bottom) != 2:
     raise Exception("Inputs 2 bottom blobs - image_ids and captions.")
   if len(top) != 4:
     raise Exception("Outputs 3 top blobs - score_weights, input_sentence, target_sentence, mean_score.")
   params = ast.literal_eval(self.param_str)
   self._end_of_sequence = params['end_of_sequence']
   self._ignore_label = params['ignore_label']
   # Load vocab
   self._vocab = []
   with open(params['vocab_path']) as vocab_file:
     for word in vocab_file:
       self._vocab.append(word.lower().strip())
   self._cider = CiderScorer(params['gt_caption_paths'])
Example #7
0
class evalSentence:
    def __init__(self, coco, useBleu=False, useCider=False):
        self.coco = coco
        self.useBleu = useBleu
        self.useCider = useCider
        self.params = {'image_id': coco.getImgIds()}

        imgIds = self.params['image_id']
        gts = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]

        if self.useBleu:
            self.b_scorer = BleuScorer()
        if self.useCider:
            self.c_scorer = CiderScorer()

        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)

        for imgId in imgIds:
            ref = gts[imgId]

            assert (type(ref) is list)
            assert (len(ref) > 0)

            if self.useCider:
                self.c_scorer += (None, ref)

        if self.useCider:
            self.c_scorer.compute_doc_freq()
            assert (len(self.c_scorer.ctest) >= max(
                self.c_scorer.document_frequency.values()))

    def eval_cider(self, test, ref):
        assert (self.useCider)

        c_score = self.c_scorer.compute_cider(test, ref)
        return np.array(c_score)

    def eval_bleu(self, test, ref):
        assert (self.useBleu)

        self.b_scorer.reset_list()
        for ts, rs in zip(test, ref):
            self.b_scorer += (ts, rs)
        b_score, b_scores = self.b_scorer.compute_score()
        return b_scores[3]  # return bleu_4
Example #8
0
class Cider:
    """
    Main Class to compute the CIDEr metric

    """
    def __init__(self, n=4, df="corpus"):
        """
        Initialize the CIDEr scoring function
        : param n (int): n-gram size
        : param df (string): specifies where to get the IDF values from
                    takes values 'corpus', 'coco-train'
        : return: None
        """
        # set cider to sum over 1 to 4-grams
        self._n = n
        self._df = df
        self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)

    def compute_score(self, gts, res):
        """
        Main function to compute CIDEr score
        : param  gts (dict) : {image:tokenized reference sentence}
        : param res (dict)  : {image:tokenized candidate sentence}
        : return: cider (float) : computed CIDEr score for the corpus
        """

        # clear all the previous hypos and refs
        self.cider_scorer.clear()

        for res_id in res:

            hypo = res_id['caption']
            ref = gts[res_id['image_id']]

            # Sanity check.
            assert(type(hypo) is list)
            assert(len(hypo) == 1)
            assert(type(ref) is list)
            assert(len(ref) > 0)
            self.cider_scorer += (hypo[0], ref)

        (score, scores) = self.cider_scorer.compute_score()

        return score, scores

    def method(self):
        return "CIDEr"
Example #9
0
    def compute_score(self, gts, res):
        """
        Main function to compute CIDEr score
        :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
                ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
        :return: cider (float) : computed CIDEr score for the corpus
        """

        cider_scorer = CiderScorer(n=self._n, sigma=self._sigma)

        for hypo,ref in zip(gts, res):

            cider_scorer += (hypo, ref)

        (score, scores) = cider_scorer.compute_score()

        return score
Example #10
0
class SCSTLayer(caffe.Layer):
  """
  Self-Critical Sequence Training (SCST) layer. Takes beam search and
  outputs weights for training.
  """

  def setup(self, bottom, top):
    if len(bottom) != 2:
      raise Exception("Inputs 2 bottom blobs - image_ids and captions.")
    if len(top) != 4:
      raise Exception("Outputs 3 top blobs - score_weights, input_sentence, target_sentence, mean_score.")
    params = ast.literal_eval(self.param_str)
    self._end_of_sequence = params['end_of_sequence']
    self._ignore_label = params['ignore_label']
    # Load vocab
    self._vocab = []
    with open(params['vocab_path']) as vocab_file:
      for word in vocab_file:
        self._vocab.append(word.lower().strip())
    self._cider = CiderScorer(params['gt_caption_paths'])
    
  def _translate(self, blob):
    # Results will be lower case, tokenized, without full stop
    # (to match reference tokenization)
    caption = [];
    for ix in blob:
      next_word = self._vocab[int(ix)]
      if next_word == '.':
        break
      caption.append(next_word)
    return caption
      
  def reshape(self, bottom, top):
    self._batch_size = bottom[1].shape[0]
    self._beam_size = bottom[1].shape[2]
    self._sequence_length = bottom[1].shape[3]
    top[0].reshape(self._batch_size*self._beam_size, self._sequence_length)
    top[1].reshape(self._batch_size*self._beam_size, self._sequence_length)
    top[2].reshape(self._batch_size*self._beam_size, self._sequence_length)
    top[3].reshape(1)

  def forward(self, bottom, top):
    top[1].data[...] = self._end_of_sequence
    top[2].data[...] = self._ignore_label
    # Score captions and generate training input and target output
    image_ids = []
    captions = []
    for n in range(self._batch_size):
      for b in range(self._beam_size):
        image_ids.append(int(bottom[0].data[n][0]))
        seq = bottom[1].data[n][0][b]
        captions.append(self._translate(seq))
        caption = seq[:len(captions[-1])].tolist()
        top[1].data[n*self._beam_size+b,1:min(self._sequence_length,len(caption)+1)] = \
            caption[:self._sequence_length-1] # input_sentence
        caption.append(self._end_of_sequence)
        top[2].data[n*self._beam_size+b,:min(self._sequence_length,len(caption))] = \
            caption[:self._sequence_length] # target_sentence
    raw_scores = np.array(self._cider.compute_scores(image_ids,captions))
    # Generate score output
    for n in range(self._batch_size):
      baseline = np.mean(raw_scores[n*self._beam_size:(n+1)*self._beam_size])
      for b in range(self._beam_size):
        score = raw_scores[n*self._beam_size+b]
        top[0].data[n*self._beam_size+b,:] = score - baseline
    top[3].data[0] = np.mean(raw_scores)

  def backward(self, top, propagate_down, bottom):
    """This layer does not propagate gradients."""
    pass    
Example #11
0
class SCSTSamplingLayer(caffe.Layer):
  """
  Self-Critical Sequence Training (SCST) layer. Takes argmax and sampled captions and
  outputs weights for training.
  """

  def setup(self, bottom, top):
    if len(bottom) != 2:
      raise Exception("Inputs 2 bottom blobs - image_ids and captions.")
    if len(top) != 4:
      raise Exception("Outputs 4 top blobs - score_weights, target_sentence, mean_score, scores.")
    params = ast.literal_eval(self.param_str)
    self._end_of_sequence = params['end_of_sequence']
    self._ignore_label = params['ignore_label']
    # Load vocab
    self._vocab = []
    with open(params['vocab_path']) as vocab_file:
      for word in vocab_file:
        self._vocab.append(word.lower().strip())
    self._cider = CiderScorer(params['gt_caption_paths'], include_eos=True)
    
  def _translate(self, blob):
    # Results will be lower case, tokenized, without full stop
    # (to match reference tokenization)
    caption = [];
    for ix in blob:
      next_word = self._vocab[int(ix)]
      if next_word == '.':
        caption.append(next_word) # Include EOS
        break
      caption.append(next_word)
    return caption
      
  def reshape(self, bottom, top):
    self._batch_size = bottom[1].shape[0]
    self._sequence_length = bottom[1].shape[1]
    top[0].reshape(self._batch_size, self._sequence_length)
    top[1].reshape(self._batch_size, self._sequence_length)
    top[2].reshape(1)
    top[3].reshape(self._batch_size)

  def forward(self, bottom, top):
    top[0].data[...] = 0
    top[1].data[...] = self._ignore_label
    # Score captions and generate target output
    image_ids = []
    captions = []
    for n in range(self._batch_size):
      image_ids.append(int(bottom[0].data[n/2][0]))
      seq = bottom[1].data[n]
      captions.append(self._translate(seq))
      if n % 2 == 1: # Generate targets
        caption = seq[:len(captions[-1])].tolist()
        top[1].data[n,:min(self._sequence_length,len(caption))] = \
            caption[:self._sequence_length] # target_sentence
    raw_scores = self._cider.compute_scores(image_ids,captions)
    # Generate score weights
    for n in range(self._batch_size/2):
      baseline_score = raw_scores[n*2]
      sample_score = raw_scores[n*2+1]
      top[3].data[n*2] = baseline_score
      top[3].data[n*2+1] = sample_score
      if sample_score > 0:
        sample_score = math.log(sample_score)
      if baseline_score > 0:
        baseline_score = math.log(baseline_score)
      top[0].data[n*2+1] = max(0.0, sample_score - baseline_score)
    top[2].data[0] = np.mean(raw_scores[::2])

  def backward(self, top, propagate_down, bottom):
    """This layer does not propagate gradients."""
    pass