class AttentionVQARewards(object): def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000', use_dis_reward=False): self.g = tf.Graph() self.ckpt_file = ckpt_file from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig config = ModelConfig() self.ans2id = AnswerTokenToTopAnswer() self.use_dis_reward = use_dis_reward with self.g.as_default(): self.sess = tf.Session() self.model = AttentionModel(config, phase='test_broadcast') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file) def process_answers(self, ans, ans_len): ans_pathes = _parse_gt_questions(ans, ans_len) return self.ans2id.get_top_answer(ans_pathes) def get_reward(self, sampled, inputs): if len(inputs) == 4: images, res5c, ans, ans_len = inputs top_ans_ids = self.process_answers(ans, ans_len) else: assert (len(inputs) == 5) images, res5c, ans, ans_len, top_ans_ids = inputs images_aug = [] top_ans_ids_aug = [] answer_aug = [] answer_len_aug = [] pathes = [] for _idx, ps in enumerate(sampled): for p in ps: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token images_aug.append(images[_idx][np.newaxis, :]) answer_aug.append(ans[_idx][np.newaxis, :]) answer_len_aug.append(ans_len[_idx]) top_ans_ids_aug.append(top_ans_ids[_idx]) # put to arrays arr, arr_len = put_to_array(pathes) images_aug = np.concatenate(images_aug) answer_aug = np.concatenate(answer_aug).astype(np.int32) top_ans_ids_aug = np.array(top_ans_ids_aug) answer_len_aug = np.array(answer_len_aug, dtype=np.int32) # run inference in VQA scores = self.model.inference(self.sess, [res5c, arr, arr_len]) if self.use_dis_reward: vqa_scores = np.require(scores.argmax(axis=1) == top_ans_ids_aug, np.float32) else: _this_batch_size = scores.shape[0] vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug] is_valid = top_ans_ids_aug != 2000 return vqa_scores, [images_aug, answer_aug, answer_len_aug, is_valid]
class AttentionModel(BaseVQAModel): def __init__( self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'): BaseVQAModel.__init__(self) self.g = tf.Graph() self.ckpt_file = ckpt_file from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig config = ModelConfig() self.name = ' ------- MLB-attention ------- ' with self.g.as_default(): self.sess = tf.Session() self.model = AttentionModel(config, phase='test_broadcast') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file) def _load_image(self, image_id): FEAT_ROOT = '/usr/data/fl302/data/VQA/ResNet152/resnet_res5c' filename = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id) f = np.load(os.path.join(FEAT_ROOT, filename + '.npz'))['x'] return f.transpose((1, 2, 0))[np.newaxis, ::]
def create_model(): from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig model = AttentionModel(ModelConfig(), phase='test_broadcast') model.build() checkpoint_path = 'model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000' sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) return sess, model
def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'): self.g = tf.Graph() self.ckpt_file = ckpt_file from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig config = ModelConfig() self.name = ' ------- MLB-attention ------- ' with self.g.as_default(): self.sess = tf.Session() self.model = AttentionModel(config, phase='test_broadcast') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file)
def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000', use_dis_reward=False): self.g = tf.Graph() self.ckpt_file = ckpt_file from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig config = ModelConfig() self.ans2id = AnswerTokenToTopAnswer() self.use_dis_reward = use_dis_reward with self.g.as_default(): self.sess = tf.Session() self.model = AttentionModel(config, phase='test_broadcast') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file)
class MLBWrapper(object): def __init__( self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000'): self.g = tf.Graph() self.ckpt_file = ckpt_file from models.vqa_soft_attention import AttentionModel from vqa_config import ModelConfig config = ModelConfig() self.name = ' ------- MLB-attention ------- ' with self.g.as_default(): self.sess = tf.Session() self.model = AttentionModel(config, phase='test_broadcast') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file) def _load_image(self, image_id): FEAT_ROOT = '/usr/data/fl302/data/VQA/ResNet152/resnet_res5c' filename = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id) f = np.load(os.path.join(FEAT_ROOT, filename + '.npz'))['x'] return f.transpose((1, 2, 0))[np.newaxis, ::] def get_scores(self, sampled, image_id, top_ans_id): # process questions pathes = [] for p in sampled: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token num_unk = len(sampled) arr, arr_len = put_to_array(pathes) # load image res5c = self._load_image(image_id) images_aug = np.tile(res5c, [num_unk, 1, 1, 1]) # inference scores = self.model.inference(self.sess, [images_aug, arr, arr_len]) vqa_scores = scores[:, top_ans_id].flatten() return vqa_scores
class IVQARewards(object): def __init__(self, graph=None, sess=None, use_vqa_reward=False, metric='cider'): self.graph = graph self.sess = sess self.gamma = 0.0 self.use_vqa_reward = use_vqa_reward and self.gamma > 0 # self.cider_scorer = ciderEval('ivqa_train_idxs') if metric == 'cider': self.scorer = ciderEval('v2_ivqa_train_idxs') elif metric == 'bleu': self.scorer = Bleu(n=4) # self.cider_scorer = CiderD(df='v2_ivqa_train_idxs') if self.use_vqa_reward: with graph.as_default(): self._build_vqa_agent() def _build_vqa_agent(self): with tf.variable_scope('vqa_agent'): self.vqa_agent = VQAAgent(config=ModelConfig(), phase='test') def get_reward(self, sampled, gts, answers=None): """ compute rewards given a sampled sentence and gt, the reward is computed based on CIDEr-D :param sampled: a list of sampled samples, [seq, seq_len] :param gts: a list of ground-truth samples [seq, seq_len] :param answers: numpy.array of ground-truth top answer index of VQA :return: numpy array of size (N,) of reward for each sample """ rewards = compute_rewards(self.scorer, sampled, gts) if self.use_vqa_reward: vqa_rewards = self._compute_vqa_reward(sampled, answers) rewards = (1. - self.gamma) * rewards + self.gamma * vqa_rewards return rewards def _compute_vqa_reward(self, sampled, answers): probs = self.vqa_agent.prob feed_dict = self.vqa_agent.fill_feed_dict(sampled + [answers]) preds = self.sess.run(probs, feed_dict=feed_dict) rewards = np.equal(preds.argmax(axis=1), answers).astype(np.float32) return rewards
def __init__(self, ckpt_file='', use_dis_reward=False, use_attention_model=False): self.g = tf.Graph() self.ckpt_file = ckpt_file self.use_attention_model = use_attention_model from models.vqa_base import BaseModel from vqa_config import ModelConfig config = ModelConfig() self.ans2id = AnswerTokenToTopAnswer() self.use_dis_reward = use_dis_reward with self.g.as_default(): self.sess = tf.Session() if self.use_attention_model: self.model = AttentionModel(config, phase='test') self.model.build() else: self.model = BaseModel(config, phase='test') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, ckpt_file)
def __init__(self): cmd.Cmd.__init__(self) self.image_id = None # self.model = AttentionModel() self.models = [AttentionModel(), VanillaModel()]
def _build_vqa_agent(self): with tf.variable_scope('vqa_agent'): self.vqa_agent = VQAAgent(config=ModelConfig(), phase='test')