class AttentionVQARewards(object): def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000', use_dis_reward=False): self.g = tf.Graph() self.ckpt_file = ckpt_file from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig config = ModelConfig() self.ans2id = AnswerTokenToTopAnswer() self.use_dis_reward = use_dis_reward with self.g.as_default(): self.sess = tf.Session() self.model = AttentionModel(config, phase='test_broadcast') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file) def process_answers(self, ans, ans_len): ans_pathes = _parse_gt_questions(ans, ans_len) return self.ans2id.get_top_answer(ans_pathes) def get_reward(self, sampled, inputs): if len(inputs) == 4: images, res5c, ans, ans_len = inputs top_ans_ids = self.process_answers(ans, ans_len) else: assert (len(inputs) == 5) images, res5c, ans, ans_len, top_ans_ids = inputs images_aug = [] top_ans_ids_aug = [] answer_aug = [] answer_len_aug = [] pathes = [] for _idx, ps in enumerate(sampled): for p in ps: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token images_aug.append(images[_idx][np.newaxis, :]) answer_aug.append(ans[_idx][np.newaxis, :]) answer_len_aug.append(ans_len[_idx]) top_ans_ids_aug.append(top_ans_ids[_idx]) # put to arrays arr, arr_len = put_to_array(pathes) images_aug = np.concatenate(images_aug) answer_aug = np.concatenate(answer_aug).astype(np.int32) top_ans_ids_aug = np.array(top_ans_ids_aug) answer_len_aug = np.array(answer_len_aug, dtype=np.int32) # run inference in VQA scores = self.model.inference(self.sess, [res5c, arr, arr_len]) if self.use_dis_reward: vqa_scores = np.require(scores.argmax(axis=1) == top_ans_ids_aug, np.float32) else: _this_batch_size = scores.shape[0] vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug] is_valid = top_ans_ids_aug != 2000 return vqa_scores, [images_aug, answer_aug, answer_len_aug, is_valid]
class VQARewards(object): def __init__(self, use_dis_reward=False): self.g = tf.Graph() self.ans2id = AnswerTokenToTopAnswer() self.use_dis_reward = use_dis_reward self.model = None def set_vqa_model(self, vqa): self.model = vqa def process_answers(self, ans, ans_len): ans_pathes = _parse_gt_questions(ans, ans_len) return self.ans2id.get_top_answer(ans_pathes) def get_reward(self, sampled, inputs): if len(inputs) == 3: images, ans, ans_len = inputs top_ans_ids = self.process_answers(ans, ans_len) else: assert (len(inputs) == 4) images, ans, ans_len, top_ans_ids = inputs images_aug = [] top_ans_ids_aug = [] answer_aug = [] answer_len_aug = [] pathes = [] for _idx, ps in enumerate(sampled): for p in ps: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token images_aug.append(images[_idx][np.newaxis, :]) answer_aug.append(ans[_idx][np.newaxis, :]) answer_len_aug.append(ans_len[_idx]) top_ans_ids_aug.append(top_ans_ids[_idx]) # put to arrays arr, arr_len = put_to_array(pathes) images_aug = np.concatenate(images_aug) answer_aug = np.concatenate(answer_aug).astype(np.int32) top_ans_ids_aug = np.array(top_ans_ids_aug) answer_len_aug = np.array(answer_len_aug, dtype=np.int32) # run inference in VQA scores = self.model.inference([images_aug, arr, arr_len]) if self.use_dis_reward: vqa_scores = np.require( scores.argmax(axis=1) == top_ans_ids_aug, np.float32) else: _this_batch_size = scores.shape[0] vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug] is_valid = top_ans_ids_aug != 2000 return vqa_scores, [ images_aug, arr, arr_len, answer_aug, answer_len_aug, top_ans_ids_aug, is_valid ]