def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) abs_iter = np.zeros(1) loss_masks = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train', 'val']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer # ans_iter = proc_ans(ans, self.ans_to_ix) ans_iter, abs_iter, loss_masks = self.proc_ans_and_abs(ans) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter), \ torch.from_numpy(abs_iter), \ loss_masks
def __getitem__(self, idx): ''' self:ans_list,ans_toix,.. :param idx: idx=0 :return:torch类型的:img_feat_iter,ques_feat_iter,ans_iter ''' # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: #加载答案数据,每次加载一个annotation数据包含answers:10个答案,image_id,question_id, # {'answers':[{'answer':'skatebodarding'},...,{'answer':'skatebodarding'}],"image_id":139831,"question_id"='VG_1293929' ans = self.ans_list[idx] #加载问题数据,每次加载一个question如下:{'image_id': 139831, 'question': "What's the man doing?", 'question_id': 'VG_1293929'} ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: #如果为真,返回image_id的npz文件 img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: #之间load image_id的npz文件 img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) #将数据转换维度 img_feat_x = img_feat['x'].transpose((1, 0)) #图像特征迭代器,图像特征输入x,特征填充大小:100 img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) #问题特征迭代器,调用data_utils的proc_ques函数,输入ques,token_to_ix,max_token ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) #答案迭代器,调用data_utils的proc_ans函数,传入ans,ans_to_ix数据,输出答案分数矩阵 ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), torch.from_numpy( ques_ix_iter), torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] # print(ques) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: # '551018' -> '../datasets-vqa/coco_extract/val2014/COCO_val2014_000000551018.jpg.npz' img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) # ndarray: (2048, 41) -> ndarray: (41, 2048) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) pad = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ques = self.ques_list[idx] # ques = self.qid_to_ques[str(current_ques['ques_id'])] # Process image feature from (.npz) file # CHANGED try: img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) bboxes = img_feat['bbox'] img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) except: print('false') print(self.iid_to_img_feat_path[str(ques['img_id'])]) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques') # Process answer ans_iter = proc_ans_oe(ques, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) bboxes = img_feat['bbox'] img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques') return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter), \ ques['img_id'], \ pad, \ ques['ques_id'], \ pad, pad, pad, pad
def get_ans_ix(self): """ Get answer embeddings in the same token embedding as questions """ ans_embedding_ixs = [] answers = [self.ix_to_ans[str(i)] for i in range(len(self.ix_to_ans.keys()))] for ans in answers: ans_embedding_ixs.append(proc_ques(ans, self.token_to_ix, 4)) ans_embedding_ixs = np.stack(ans_embedding_ixs) return ans_embedding_ixs
def __getitem__(self, idx): img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # ['show'] ques = self.ques_list[idx] # Process image feature from (.npz) file img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question feature ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter)
def __getitem__(self, idx): # For code safety pad = np.zeros(1) img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ques = self.ques_list[idx] # CHANGED try: img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) bboxes = img_feat['bbox'] img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) except: print('false') print(self.iid_to_img_feat_path[str(ques['img_id'])]) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques') # Process answer ans_score, ans_label, ans_mc_ix, ans_gt_ix ans_iter, ans_label, ans_mc_ix, ans_ix = proc_ans_mc(ques, self.ans_to_ix, self.token_to_ix) # ans_score, ans_label, ans_mc_ix, ans_ix return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter), \ ques['img_id'], \ pad, \ ques['ques_id'], \ torch.from_numpy(ans_label), \ torch.from_numpy(ans_mc_ix), \ ans_ix, \ ques['mc'] else: # Load the run data from list ques = self.ques_list[idx] img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) bboxes = img_feat['bbox'] img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques') ans_mc_ix, ans_ix = proc_ans_mc_test(ques, self.ans_to_ix, self.token_to_ix) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ pad, \ ques['img_id'], \ pad, \ ques['ques_id'], \ pad, \ torch.from_numpy(ans_mc_ix), \ ans_ix, \ ques['mc']
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_x = img_feat img_feat_iter = img_feat_x # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) # get 36 objects from img_obj_train.json objects = self.img_obj_train[str( ans['image_id'])] # ['person', 'windows', ..., 'apple'] # get top 10 objects top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ) while len(top_objects) < self.__C.TOP_OBJ: top_objects.append('pad_obj') # print('image id:', str(ans['image_id']), 'top_objects:', top_objects) # query object from react_obj_rel, and only get top 5 sentences top_fact = [] pad_rel = self.react_obj_rel['pad_obj'][0] for obj in top_objects: relations = self.react_obj_rel[obj] curr_top_fact = relations[:self.__C.TOP_REL] while len(curr_top_fact) < self.__C.TOP_REL: curr_top_fact.append(pad_rel) top_fact.extend(curr_top_fact) # we get about 50 fact sentence index num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token, self.__C.FACT_TOKEN) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_x = img_feat img_feat_iter = img_feat_x # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # get 36 objects from img_obj_train.json objects = self.img_obj_val[str( ques['image_id'])] # ['person', 'windows', ...] # get top 10 objects top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ) while len(top_objects) < self.__C.TOP_OBJ: top_objects.append('pad_obj') # print('image id:', str(ans['image_id']), 'top_objects:', top_objects) # query object from react_obj_rel, and only get top 5 sentences top_fact = [] pad_rel = self.react_obj_rel['pad_obj'][0] for obj in top_objects: relations = self.react_obj_rel[obj] curr_top_fact = relations[:self.__C.TOP_REL] while len(curr_top_fact) < self.__C.TOP_REL: curr_top_fact.append(pad_rel) top_fact.extend(curr_top_fact) # we get about 50 fact sentence index num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token, self.__C.FACT_TOKEN) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter), \ torch.from_numpy(fact_idx_iter)
def __getitem__(self, idx): # For code safety img_feat_iter = np.zeros(1) ques_ix_iter = np.zeros(1) ans_iter = np.zeros(1) # Process ['train'] and ['val', 'test'] respectively if self.__C.RUN_MODE in ['train']: # Load the run data from list ans = self.ans_list[idx] ques = self.qid_to_ques[str(ans['question_id'])] # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ans['image_id'])] else: # modified by Tan Wang img_feat = np.load(self.iid_to_img_feat_path[str( ans['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_vc = np.load(self.feature_path_vc + '/' + str(ans['image_id']) + '.npy') try: assert img_feat_x.shape[0] == img_feat_vc.shape[0] except: print(ans['image_id']) img_feat = np.load(self.feature_path_bu + '/' + str(ans['image_id']) + '.npy') img_feat_x = img_feat[:img_feat_vc.shape[0], :] assert img_feat_x.shape[0] == img_feat_vc.shape[0] img_feat_x = np.hstack((img_feat_x, img_feat_vc)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) # Process answer ans_iter = proc_ans(ans, self.ans_to_ix) else: # Load the run data from list ques = self.ques_list[idx] # # Process image feature from (.npz) file # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])]) # img_feat_x = img_feat['x'].transpose((1, 0)) # Process image feature from (.npz) file if self.__C.PRELOAD: img_feat_x = self.iid_to_img_feat[str(ques['image_id'])] else: # modified by Tan Wang img_feat = np.load(self.iid_to_img_feat_path[str( ques['image_id'])]) img_feat_x = img_feat['x'].transpose((1, 0)) img_feat_vc = np.load(self.feature_path_vc + '/' + str(ques['image_id']) + '.npy') try: assert img_feat_x.shape[0] == img_feat_vc.shape[0] except: print(ques['image_id']) img_feat = np.load(self.feature_path_bu + '/' + str(ques['image_id']) + '.npy') img_feat_x = img_feat[:img_feat_vc.shape[0], :] assert img_feat_x.shape[0] == img_feat_vc.shape[0] img_feat_x = np.hstack((img_feat_x, img_feat_vc)) img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE) # Process question ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN) return torch.from_numpy(img_feat_iter), \ torch.from_numpy(ques_ix_iter), \ torch.from_numpy(ans_iter)