def __getitem__(self, idx): img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) ans = self.ans_list[idx] qid = int(ans['question_id']) ques = self.id_to_ques[qid] # ques_path = self.id_to_ques_path[qid] # ques_ix = np.load(ques_path) id = int(ans['image_id']) img_path = self.id_to_img_path[id] img_feat = np.load(img_path) img_feat_x = img_feat['x'].transpose((1, 0)) if img_feat_x.shape[0] > my_cfg.img_feat_pad_size: img_feat_x = img_feat_x[:my_cfg.img_feat_pad_size] img_feat_x = np.pad( img_feat_x, ((0, my_cfg.img_feat_pad_size - img_feat_x.shape[0]), (0, 0)), mode='constant', constant_values=0) ques_ix = np.zeros(my_cfg.max_token, np.int64) words = re.sub(r"([.,'!?\"()*#:;])", '', ques['question'].lower()).replace('-', ' ').replace( '/', ' ').split() for ix, word in enumerate(words): if word in self.token_to_ix: ques_ix[ix] = self.token_to_ix[word] else: ques_ix[ix] = self.token_to_ix['UNK'] if ix + 1 == my_cfg.max_token: break # Process answer ans_score = np.zeros(self.ans_to_ix.__len__(), np.float32) ans_prob_dict = {} for ans_ in ans['answers']: ans_proc = pre.prep_ans(ans_['answer']) if ans_proc not in ans_prob_dict: ans_prob_dict[ans_proc] = 1 else: ans_prob_dict[ans_proc] += 1 for ans_ in ans_prob_dict: if ans_ in self.ans_to_ix: ans_score[self.ans_to_ix[ans_]] = pre.get_score( ans_prob_dict[ans_]) # np.save(my_cfg.TRAIN['ProcessedA'] + str(qid) + 'npy', ques_ix) return torch.from_numpy(img_feat_x), \ torch.from_numpy(ques_ix), \ torch.from_numpy(ans_score), idx
def __getitem__(self, idx): img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) ans = self.ans_list[idx] qid = int(ans['question_id']) ques_path = self.id_to_ques_path[qid] ques_ix = np.load(ques_path) if ques_ix.shape[0] > my_cfg.max_token: sep = ques_ix[-1] ques_ix = ques_ix[:my_cfg.max_token] ques_ix[-1] = sep ques_ix = np.pad(ques_ix, ((0, my_cfg.max_token - ques_ix.shape[0]), (0, 0)), mode='constant', constant_values=0) id = int(ans['image_id']) img_path = self.id_to_img_path[id] img_feat = np.load(img_path) boxes = img_feat['boxes'] img_feat_x = img_feat['x'] if img_feat_x.shape[0] > my_cfg.img_feat_pad_size: img_feat_x = img_feat_x[:my_cfg.img_feat_pad_size] img_feat_x = np.pad( img_feat_x, ((0, my_cfg.img_feat_pad_size - img_feat_x.shape[0]), (0, 0)), mode='constant', constant_values=0) # Process answer ans_score = np.zeros(self.ans_to_ix.__len__(), np.float32) ans_prob_dict = {} for ans_ in ans['answers']: ans_proc = pre.prep_ans(ans_['answer']) if ans_proc not in ans_prob_dict: ans_prob_dict[ans_proc] = 1 else: ans_prob_dict[ans_proc] += 1 for ans_ in ans_prob_dict: if ans_ in self.ans_to_ix: ans_score[self.ans_to_ix[ans_]] = pre.get_score( ans_prob_dict[ans_]) return torch.from_numpy(img_feat_x), \ torch.from_numpy(ques_ix), \ torch.from_numpy(ans_score), \ torch.from_numpy(boxes).permute(1, 0), idx
def __getitem__(self, idx): img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) ans = self.ans_list[idx] qid = int(ans['question_id']) ques_path = self.id_to_ques_path[qid] ques_ix = np.load(ques_path) id = int(ans['image_id']) img_path = self.id_to_img_path[id] img_feat = np.load(img_path, allow_pickle=True)['arr_0'][()] img_feat_x = img_feat['x'] boxes = img_feat['boxes'] if img_feat_x.shape[0] > my_cfg.img_feat_pad_size: img_feat_x = img_feat_x[:my_cfg.img_feat_pad_size] boxes = boxes[:my_cfg.img_feat_pad_size] img_feat_x = np.pad( img_feat_x, ((0, my_cfg.img_feat_pad_size - img_feat_x.shape[0]), (0, 0)), mode='constant', constant_values=0) boxes = np.pad(boxes, ((0, my_cfg.img_feat_pad_size - boxes.shape[0]), (0, 0)), mode='constant', constant_values=0) # Process answer ans_score = np.zeros(self.ans_to_ix.__len__(), np.float32) ans_prob_dict = {} for ans_ in ans['answers']: ans_proc = pre.prep_ans(ans_['answer']) if ans_proc not in ans_prob_dict: ans_prob_dict[ans_proc] = 1 else: ans_prob_dict[ans_proc] += 1 for ans_ in ans_prob_dict: if ans_ in self.ans_to_ix: ans_score[self.ans_to_ix[ans_]] = pre.get_score( ans_prob_dict[ans_]) # np.save(my_cfg.TRAIN['ProcessedA'] + str(qid) + 'npy', ques_ix) return torch.from_numpy(img_feat_x), \ torch.from_numpy(ques_ix), \ torch.from_numpy(ans_score), \ torch.from_numpy(boxes).permute(1, 0), idx
def __getitem__(self, idx): img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) ans = self.ans_list[idx] qid = int(ans['question_id']) ques_path = self.id_to_ques_path[qid] ques_ix = np.load(ques_path) if ques_ix.shape[0] > my_cfg.max_token: sep = ques_ix[-1] ques_ix = ques_ix[:my_cfg.max_token] ques_ix[-1] = sep ques_ix = np.pad(ques_ix, ((0, my_cfg.max_token - ques_ix.shape[0]), (0, 0)), mode='constant', constant_values=0) id = int(ans['image_id']) img_path = self.id_to_img_path[id] if 'val' in img_path: texts_path = my_cfg.VAL['Texts'] + img_path.split('/')[-1].split( '.')[0] + '.jpg' feats_path = my_cfg.VAL['Texts'][:-1] + '_1/' + img_path.split( '/')[-1].split('.')[0] + '.jpg' else: texts_path = my_cfg.TRAIN['Texts'] + img_path.split('/')[-1].split( '.')[0] + '.jpg' feats_path = my_cfg.TRAIN['Texts'][:-1] + '_1/' + img_path.split( '/')[-1].split('.')[0] + '.jpg' texts = list(np.load(texts_path + '.npy')) text_feats = np.load(feats_path + '.npy') img_feat = np.load(img_path) img_feat_x = img_feat['arr_0'] if img_feat_x.shape[0] > my_cfg.img_feat_pad_size: img_feat_x = img_feat_x[:my_cfg.img_feat_pad_size] if len(texts) > 14: texts = texts[:14] img_feat_x = np.pad( img_feat_x, ((0, my_cfg.img_feat_pad_size - img_feat_x.shape[0]), (0, 0)), mode='constant', constant_values=0) # Process answer ans_score = np.zeros(self.ans_to_ix.__len__() + 14, np.float32) ans_prob_dict = {} for ans_ in ans['answers']: ans_proc = pre.prep_ans(ans_['answer']) if ans_proc not in ans_prob_dict: ans_prob_dict[ans_proc] = 1 else: ans_prob_dict[ans_proc] += 1 while len(texts) < 14: texts.append('') for ans_ in ans_prob_dict: for j, _text in enumerate(texts): text_ = pre.prep_ans(_text) texts[j] = text_ if ans_ == text_: ans_score[self.ans_to_ix.__len__() + j] = pre.get_score( ans_prob_dict[ans_]) if ans_ in self.ans_to_ix: ans_score[self.ans_to_ix[ans_]] = pre.get_score( ans_prob_dict[ans_]) if text_feats.shape[0]: text_feats = np.pad(text_feats, ((0, 14 - text_feats.shape[0]), (0, 0)), mode='constant', constant_values=0) else: text_feats = np.zeros((14, 300)) # cat = np.array([]) cat = np.ones(self.ans_to_ix.__len__() + 14, np.bool) for i in self.types_dict[ans['answer_type']]: cat[i] = False for j in range(14): cat[j + self.ans_to_ix.__len__()] = False return img_feat_x, \ torch.from_numpy(ques_ix), \ torch.from_numpy(ans_score), idx, torch.from_numpy(cat), torch.from_numpy(text_feats.astype(np.float32)), texts