Beispiel #1
0
def process():
    def _parse_image_id(image):
        return int(image.split('.')[0].split('_')[-1])

    model = AttentionModel()
    ans2top_ans = AnswerTokenToTopAnswer()

    task_data_dir = '/usr/data/fl302/code/utils/bs_data_maker'
    task_data_file = os.path.join(task_data_dir, 'task_data_for_verif.json')
    task_data = load_json(task_data_file)
    is_valid = []
    num = len(task_data)
    for i, info in enumerate(task_data):
        print('%d/%d' % (i, num))
        image = info['image']
        image_id = _parse_image_id(image)
        question = info['target']
        answer = info['answer']
        scores = model.inference(image_id, question)
        scores[:, -1] = -10.
        # pdb.set_trace()
        top_ans_id = ans2top_ans.direct_query(answer)
        if top_ans_id == 2000:
            raise Exception('Warning: answer oov')
        scores = scores.flatten()
        pred_top_ans_id = scores.argmax()
        is_valid.append(int(pred_top_ans_id == top_ans_id))

    n_valid = sum(is_valid)
    print('valid: %d/%d' % (n_valid, num))
    save_json(os.path.join(task_data_dir, 'task_data_verif_state.json'),
              is_valid)
 def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000',
              use_dis_reward=False):
     self.g = tf.Graph()
     self.ckpt_file = ckpt_file
     from models.vqa_soft_attention import AttentionModel
     from vqa_config import ModelConfig
     config = ModelConfig()
     self.ans2id = AnswerTokenToTopAnswer()
     self.use_dis_reward = use_dis_reward
     with self.g.as_default():
         self.sess = tf.Session()
         self.model = AttentionModel(config, phase='test_broadcast')
         self.model.build()
         vars = tf.trainable_variables()
         self.saver = tf.train.Saver(var_list=vars)
         self.saver.restore(self.sess, ckpt_file)
class AttentionVQARewards(object):
    def __init__(self, ckpt_file='model/v1_vqa_VQA/v1_vqa_VQA_best2/model.ckpt-135000',
                 use_dis_reward=False):
        self.g = tf.Graph()
        self.ckpt_file = ckpt_file
        from models.vqa_soft_attention import AttentionModel
        from vqa_config import ModelConfig
        config = ModelConfig()
        self.ans2id = AnswerTokenToTopAnswer()
        self.use_dis_reward = use_dis_reward
        with self.g.as_default():
            self.sess = tf.Session()
            self.model = AttentionModel(config, phase='test_broadcast')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, ckpt_file)

    def process_answers(self, ans, ans_len):
        ans_pathes = _parse_gt_questions(ans, ans_len)
        return self.ans2id.get_top_answer(ans_pathes)

    def get_reward(self, sampled, inputs):
        if len(inputs) == 4:
            images, res5c, ans, ans_len = inputs
            top_ans_ids = self.process_answers(ans, ans_len)
        else:
            assert (len(inputs) == 5)
            images, res5c, ans, ans_len, top_ans_ids = inputs
        images_aug = []
        top_ans_ids_aug = []
        answer_aug = []
        answer_len_aug = []
        pathes = []
        for _idx, ps in enumerate(sampled):
            for p in ps:
                if p[-1] == END_TOKEN:
                    pathes.append(p[1:-1])  # remove start end token
                else:
                    pathes.append(p[1:])  # remove start end token
                images_aug.append(images[_idx][np.newaxis, :])
                answer_aug.append(ans[_idx][np.newaxis, :])
                answer_len_aug.append(ans_len[_idx])
                top_ans_ids_aug.append(top_ans_ids[_idx])
        # put to arrays
        arr, arr_len = put_to_array(pathes)
        images_aug = np.concatenate(images_aug)
        answer_aug = np.concatenate(answer_aug).astype(np.int32)
        top_ans_ids_aug = np.array(top_ans_ids_aug)
        answer_len_aug = np.array(answer_len_aug, dtype=np.int32)
        # run inference in VQA
        scores = self.model.inference(self.sess, [res5c, arr, arr_len])
        if self.use_dis_reward:
            vqa_scores = np.require(scores.argmax(axis=1) == top_ans_ids_aug,
                                    np.float32)
        else:
            _this_batch_size = scores.shape[0]
            vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug]
        is_valid = top_ans_ids_aug != 2000
        return vqa_scores, [images_aug, answer_aug, answer_len_aug, is_valid]
Beispiel #4
0
 def __init__(self, ckpt_file='/usr/data/fl302/code/inverse_vqa/model/mlb_attention_v2/model.ckpt-170000',
              use_dis_reward=False):
     self.g = tf.Graph()
     self.ckpt_file = ckpt_file
     self.v1tov2 = TopAnswerVersionConverter()
     from models.vqa_soft_attention_v2 import AttentionModel
     from vqa_config import ModelConfig
     config = ModelConfig()
     self.ans2id = AnswerTokenToTopAnswer()
     self.use_dis_reward = use_dis_reward
     with self.g.as_default():
         self.sess = tf.Session()
         self.model = AttentionModel(config, phase='test_broadcast')
         self.model.build()
         vars = tf.trainable_variables()
         self.saver = tf.train.Saver(var_list=vars)
         self.saver.restore(self.sess, ckpt_file)
class VQARewards(object):
    def __init__(self, use_dis_reward=False):
        self.g = tf.Graph()
        self.ans2id = AnswerTokenToTopAnswer()
        self.use_dis_reward = use_dis_reward
        self.model = None

    def set_vqa_model(self, vqa):
        self.model = vqa

    def process_answers(self, ans, ans_len):
        ans_pathes = _parse_gt_questions(ans, ans_len)
        return self.ans2id.get_top_answer(ans_pathes)

    def get_reward(self, sampled, inputs):
        if len(inputs) == 3:
            images, ans, ans_len = inputs
            top_ans_ids = self.process_answers(ans, ans_len)
        else:
            assert (len(inputs) == 4)
            images, ans, ans_len, top_ans_ids = inputs
        images_aug = []
        top_ans_ids_aug = []
        answer_aug = []
        answer_len_aug = []
        pathes = []
        for _idx, ps in enumerate(sampled):
            for p in ps:
                if p[-1] == END_TOKEN:
                    pathes.append(p[1:-1])  # remove start end token
                else:
                    pathes.append(p[1:])  # remove start end token
                images_aug.append(images[_idx][np.newaxis, :])
                answer_aug.append(ans[_idx][np.newaxis, :])
                answer_len_aug.append(ans_len[_idx])
                top_ans_ids_aug.append(top_ans_ids[_idx])
        # put to arrays
        arr, arr_len = put_to_array(pathes)
        images_aug = np.concatenate(images_aug)
        answer_aug = np.concatenate(answer_aug).astype(np.int32)
        top_ans_ids_aug = np.array(top_ans_ids_aug)
        answer_len_aug = np.array(answer_len_aug, dtype=np.int32)
        # run inference in VQA
        scores = self.model.inference([images_aug, arr, arr_len])
        if self.use_dis_reward:
            vqa_scores = np.require(
                scores.argmax(axis=1) == top_ans_ids_aug, np.float32)
        else:
            _this_batch_size = scores.shape[0]
            vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug]
        is_valid = top_ans_ids_aug != 2000
        return vqa_scores, [
            images_aug, arr, arr_len, answer_aug, answer_len_aug,
            top_ans_ids_aug, is_valid
        ]
Beispiel #6
0
 def __init__(self, ckpt_file='', use_dis_reward=False,
              use_attention_model=False):
     self.g = tf.Graph()
     self.ckpt_file = ckpt_file
     self.use_attention_model = use_attention_model
     from models.vqa_base import BaseModel
     from vqa_config import ModelConfig
     config = ModelConfig()
     self.ans2id = AnswerTokenToTopAnswer()
     self.use_dis_reward = use_dis_reward
     with self.g.as_default():
         self.sess = tf.Session()
         if self.use_attention_model:
             self.model = AttentionModel(config, phase='test')
             self.model.build()
         else:
             self.model = BaseModel(config, phase='test')
             self.model.build()
         vars = tf.trainable_variables()
         self.saver = tf.train.Saver(var_list=vars)
         self.saver.restore(self.sess, ckpt_file)
 def __init__(self, use_dis_reward=False):
     self.g = tf.Graph()
     self.ans2id = AnswerTokenToTopAnswer()
     self.use_dis_reward = use_dis_reward
     self.model = None