def __init__(self, mode='winner_take_all'):
     self.scorer = ciderEval('vqa_%s_idxs_end' % 'kptrain')
     self.pred_has_start_end_token = True
     self.use_end_token = True
     self.thresh = 9.0
     self.verbose = False
     self.mode = mode
     self.diversity_scorer = UniqueReward()
     assert (self.mode in ['winner_take_all', 'kill_all'])
class DiversityReward(object):
    def __init__(self, mode='winner_take_all'):
        self.scorer = ciderEval('vqa_%s_idxs_end' % 'kptrain')
        self.pred_has_start_end_token = True
        self.use_end_token = True
        self.thresh = 9.0
        self.verbose = False
        self.mode = mode
        self.diversity_scorer = UniqueReward()
        assert (self.mode in ['winner_take_all', 'kill_all'])

    def set_mode(self, mode):
        self.mode = mode
        assert (self.mode in ['winner_take_all', 'kill_all'])

    def get_reward(self, sampled, scores):
        """
        Winner takes all diversity reward, only winner will be given a reward 1, otherwise 0.
        :param sampled: sampled questions
        :param scores: log likelihood of the sentence
        :return:
        """
        sampled = self.process_sampled(sampled)
        wrapped_ref, wrapped_res, path_ids = self.wrap_sampled_pairwise(
            sampled)
        _, sim = self.scorer.evaluate(wrapped_ref,
                                      wrapped_res)  # cider similarity
        diversity, is_gt = self.diversity_scorer.get_reward(sampled)
        d_rewards = []
        for _ids, _scs, _ps in zip(path_ids, scores, sampled):
            _scs = np.array(_scs)
            num_cand = len(_scs)
            _sim = sim[_ids]

            _d_reward = np.ones(shape=(num_cand, ), dtype=np.float32)
            _rows, _cols = np.tril_indices(num_cand, k=-1)

            connect_tab = _sim > self.thresh  # too close
            _edges = [(r, c)
                      for r, c in zip(_rows[connect_tab], _cols[connect_tab])]
            if _edges:
                _ccs = find_connected_components(_edges)
                for _cc in _ccs:
                    _cc_scores = _scs[_cc]
                    _max_idx = _cc[_cc_scores.argmax()]
                    _d_reward[_cc] = 0.
                    if self.mode == 'winner_take_all':
                        _d_reward[_max_idx] = 1.
            d_rewards.append(_d_reward)
            if _edges and self.verbose:
                self.print_questions(_ps, _d_reward, _scs)
        d_rewards = np.concatenate(d_rewards)
        d_rewards *= diversity
        return d_rewards, is_gt

    def print_questions(self, sampled, rewards, scores):
        for sm, r, sc in zip(sampled, rewards, scores):
            sent = _SENT.index_to_question(sm[:-1])
            print('%s (%0.3f, %0.3f)' % (sent, r, sc))
        print('\n')

    def process_sampled(self, sampled):
        new_sampled = []
        for ps in sampled:
            tmp = []
            for p in ps:
                if self.pred_has_start_end_token:
                    _p = p[1:]
                else:
                    _p = p + [END_TOKEN]
                if not self.use_end_token:
                    _p = _p[:-1]
                tmp.append(_p)
            new_sampled.append(tmp)
        return new_sampled

    @staticmethod
    def wrap_sampled_pairwise(sampled):
        wrapped_ref = OrderedDict()
        wrapped_res = []
        idx = 0
        path_ids = []
        for _var_s in sampled:
            _u_tmp = []
            _rows, _cols = np.tril_indices(len(_var_s), k=-1)

            for _res_id, ref_id in zip(_rows, _cols):
                _key = str(idx)
                wrapped_ref[_key] = [serialize_path(_var_s[ref_id])]
                wrapped_res.append({
                    'image_id': _key,
                    'caption': [serialize_path(_var_s[_res_id])]
                })
                _u_tmp.append(idx)
                idx += 1
            path_ids.append(_u_tmp)
        return wrapped_ref, wrapped_res, path_ids