def process_test(): from util import save_hdf5, save_json # load data meta = load_bsir_dataset() # process data labels, images, image_ids, questions = [], [], [], [] encoder = SentenceEncoder() for item in meta: image_id = item['image_id'] image = item['image'] tokens = encoder.encode_sentence(item['question']) images.append(image) image_ids.append(image_id) questions.append(tokens) labels.append(item['label']) # put to array from post_process_variation_questions import put_to_array arr, arr_len = put_to_array(questions) save_json('data/QRD_irrelevant_meta_test.json', { 'images': images, 'image_ids': image_ids }) image_ids = np.array(image_ids, dtype=np.int32) labels = np.array(labels, dtype=np.float32) import pdb pdb.set_trace() save_hdf5( 'data/QRD_irrelevant_data_test.data', { 'image_ids': image_ids, 'quest': arr, 'quest_len': arr_len, 'labels': labels })
def process(): # load data blacklist = make_blacklist() save_json('data/kptest_blacklist.json', blacklist) qrpe = load_qrpe_data(blacklist) vtfp = load_vtfp_data(blacklist) import pdb pdb.set_trace() meta = qrpe + vtfp # process data images, image_ids, questions = [], [], [] encoder = SentenceEncoder() for item in meta: image_id = item['image_id'] image = item['image'] tokens = encoder.encode_sentence(item['question']) images.append(image) image_ids.append(image_id) questions.append(tokens) # put to array from post_process_variation_questions import put_to_array arr, arr_len = put_to_array(questions) save_json('data/QRD_irrelevant_meta.json', { 'images': images, 'image_ids': image_ids }) image_ids = np.array(image_ids, dtype=np.int32) save_hdf5('data/QRD_irrelevant_data.data', { 'image_ids': image_ids, 'quest': arr, 'quest_len': arr_len })
def __init__(self, batch_size, pad_token): self.batch_size = batch_size from util import load_hdf5 data_file = 'data/vqa_std_mscoco_kprestval.data' d = load_hdf5(data_file) gts = _parse_gt_questions(d['quest_arr'], d['quest_len']) gts = [_g + [2] for _g in gts] self._quest, self._quest_len = put_to_array(gts, pad_token, max_length=20) self.num = self._quest_len.size
def query(self, quest_ids): pathes = [] mask = [] for quest_id in quest_ids: is_valid, p = self._query_one(quest_id) pathes.append(p) mask.append(is_valid) arr, arr_len = put_to_array(pathes, pad_token=0) mask = np.array(mask, dtype=np.float32) return mask, arr, arr_len
def get_batch(self): # random sample #bs positive real_pathes = self.random_pick_from_set(self.pos_data, self.num_pos_in_batch) real_arr, real_arr_len = put_to_array(real_pathes, pad_token=self.pad_token, max_length=20) # random sample from other negatives fake_pathes = [] num_in_policy = min(len(self.policy_neg_data), self.neg_in_policy) # print('Samping %d from Neg Policy' % num_in_policy) fake_pathes += self.random_pick_from_set(self.policy_neg_data, num_in_policy) # random sample from init negative num_in_init = max(self.neg_in_init, self.num_neg_in_batch - num_in_policy) # print('Samping %d from Neg Init' % num_in_init) fake_pathes += self.random_pick_from_set(self.init_neg_data, num_in_init) fake_arr, fake_arr_len = put_to_array(fake_pathes, pad_token=self.pad_token, max_length=20) return [fake_arr, fake_arr_len, real_arr, real_arr_len]
def get_scores(self, sampled, image, top_ans_id): pathes = [] for p in sampled: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token num_unk = len(sampled) images_aug = np.tile(image, [num_unk, 1]) # put to arrays arr, arr_len = put_to_array(pathes) scores = self.model.inference(self.sess, [images_aug, arr, arr_len]) vqa_scores = scores[:, top_ans_id].flatten() return vqa_scores
def get_scores(self, sampled, image_id, top_ans_id): # process questions pathes = [] for p in sampled: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token num_unk = len(sampled) arr, arr_len = put_to_array(pathes) # load image res5c = self._load_image(image_id) images_aug = np.tile(res5c, [num_unk, 1, 1, 1]) # inference scores = self.model.inference(self.sess, [images_aug, arr, arr_len]) vqa_scores = scores[:, top_ans_id].flatten() return vqa_scores
def get_reward(self, sampled, inputs): if len(inputs) == 3: images, ans, ans_len = inputs top_ans_ids = self.process_answers(ans, ans_len) else: assert (len(inputs) == 4) images, ans, ans_len, top_ans_ids = inputs images_aug = [] top_ans_ids_aug = [] answer_aug = [] answer_len_aug = [] pathes = [] for _idx, ps in enumerate(sampled): for p in ps: if p[-1] == END_TOKEN: pathes.append(p[1:-1]) # remove start end token else: pathes.append(p[1:]) # remove start end token images_aug.append(images[_idx][np.newaxis, :]) answer_aug.append(ans[_idx][np.newaxis, :]) answer_len_aug.append(ans_len[_idx]) top_ans_ids_aug.append(top_ans_ids[_idx]) # put to arrays arr, arr_len = put_to_array(pathes) images_aug = np.concatenate(images_aug) answer_aug = np.concatenate(answer_aug).astype(np.int32) top_ans_ids_aug = np.array(top_ans_ids_aug) answer_len_aug = np.array(answer_len_aug, dtype=np.int32) # run inference in VQA scores = self.model.inference([images_aug, arr, arr_len]) if self.use_dis_reward: vqa_scores = np.require( scores.argmax(axis=1) == top_ans_ids_aug, np.float32) else: _this_batch_size = scores.shape[0] vqa_scores = scores[np.arange(_this_batch_size), top_ans_ids_aug] is_valid = top_ans_ids_aug != 2000 return vqa_scores, [ images_aug, arr, arr_len, answer_aug, answer_len_aug, top_ans_ids_aug, is_valid ]
def process_questions(pathes): token_pathes = [] for _p in pathes: tokens = [int(t) for t in _p.split(' ')] token_pathes.append(tokens[1:-1]) return put_to_array(token_pathes)