Esempio n. 1
0
 def __init__(self, metric='cider', gt_has_start_end_token=False,
              pred_has_start_end_token=True, use_end_token=True,
              subset='kptrain'):
     self.gt_has_start_end_token = gt_has_start_end_token
     self.pred_has_start_end_token = pred_has_start_end_token
     self.use_end_token = use_end_token
     if metric == 'cider':
         self.scorer = ciderEval('vqa_%s_idxs_end' % subset)
     elif metric == 'bleu':
         self.scorer = Bleu(n=4)
     assert (metric == 'cider')
     self.to_sentence = SentenceGenerator(trainset='trainval')
     self._num_call = long(0)
     self.print_iterval = 100
Esempio n. 2
0
 def __init__(self,
              graph=None,
              sess=None,
              use_vqa_reward=False,
              metric='cider'):
     self.graph = graph
     self.sess = sess
     self.gamma = 0.0
     self.use_vqa_reward = use_vqa_reward and self.gamma > 0
     # self.cider_scorer = ciderEval('ivqa_train_idxs')
     if metric == 'cider':
         self.scorer = ciderEval('v2_ivqa_train_idxs')
     elif metric == 'bleu':
         self.scorer = Bleu(n=4)
     # self.cider_scorer = CiderD(df='v2_ivqa_train_idxs')
     if self.use_vqa_reward:
         with graph.as_default():
             self._build_vqa_agent()
class IVQARewards(object):
    def __init__(self,
                 metric='cider',
                 gt_has_start_end_token=False,
                 pred_has_start_end_token=True,
                 use_end_token=True,
                 subset='kptrain'):
        self.gt_has_start_end_token = gt_has_start_end_token
        self.pred_has_start_end_token = pred_has_start_end_token
        self.use_end_token = use_end_token
        if metric == 'cider':
            self.scorer = ciderEval('vqa_%s_idxs_end' % subset)
        elif metric == 'bleu':
            self.scorer = Bleu(n=4)
        assert (metric == 'cider')
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self._num_call = long(0)
        self.print_iterval = 100

    def get_reward(self, sampled, gts):
        """
        compute rewards given a sampled sentence and gt, the reward is
        computed based on CIDEr-D
        :param sampled: a list of list of pathes
        :param gts: a list of ground-truth samples [seq, seq_len]
        :param answers: numpy.array of ground-truth top answer index
        of VQA
        :return: numpy array of size (N,) of reward for each sample
        """
        gts = self.process_gt(gts)  # convert to list
        sampled = self.process_sampled(sampled)  # convert to list
        wrapped_gt, wrapped_sample = self.wrap_samples(sampled, gts)
        _, rewards = self.scorer.evaluate(wrapped_gt, wrapped_sample)
        # if not self._num_call % self.print_iterval:
        #     self.print_questions(gts, sampled, rewards)
        # self._num_call += 1
        # rewards = supress_cider_score(rewards)
        return rewards / 10.  # normalise to [0-1]

    def print_questions(self, gts, sampled, rewards):
        n_vis = 2
        num_tot = len(gts)
        vis_ids = np.random.choice(num_tot, size=(n_vis, ), replace=False)
        offsets = np.cumsum([len(sms) for sms in sampled]).tolist()
        offsets = [0] + offsets
        for _vis_id in vis_ids:
            _gt = gts[_vis_id]
            sent = self.to_sentence.index_to_question(_gt[:-1])
            print('\nGT: %s' % sent)
            _sms = sampled[_vis_id]
            _offset = offsets[_vis_id]
            for _sid, sm in enumerate(_sms):
                _r = rewards[_offset + _sid]
                sent = self.to_sentence.index_to_question(sm[:-1])
                print('%s (%0.3f)' % (sent, _r))
        print('\n')

    @staticmethod
    def wrap_samples(sampled, gts):
        wrapped_gt = OrderedDict()
        wrapped_sample = []
        idx = 0
        for _var_s, _gt in zip(sampled, gts):
            _gt_pat = serialize_path(_gt)
            for _s in _var_s:
                _key = str(idx)
                _s_pat = serialize_path(_s)
                wrapped_gt[_key] = [_gt_pat]
                wrapped_sample.append({'image_id': _key, 'caption': [_s_pat]})
                idx += 1
        return wrapped_gt, wrapped_sample

    def process_gt(self, gts):
        capt, capt_len = gts
        seqs = []
        for c, clen in zip(capt, capt_len):
            _gt = c[:clen].tolist()
            if self.gt_has_start_end_token:
                _gt = _gt[1:]
            else:
                _gt += [END_TOKEN]
            if not self.use_end_token:
                _gt = _gt[:-1]
            seqs.append(_gt)
        return seqs

    def process_sampled(self, sampled):
        new_sampled = []
        for ps in sampled:
            tmp = []
            for p in ps:
                if self.pred_has_start_end_token:
                    _p = p[1:]
                else:
                    _p = p + [END_TOKEN]
                if not self.use_end_token:
                    _p = _p[:-1]
                tmp.append(_p)
            new_sampled.append(tmp)
        return new_sampled