예제 #1
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                # CHANGED
                try:
                    img_feat = np.load(self.iid_to_img_feat_path[str(
                        ans['image_id'])])
                    img_feat_x = img_feat['x'].transpose((1, 0))
                except:
                    print('false')
                    print(self.iid_to_img_feat_path[str(ans['image_id'])])
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
예제 #2
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train', 'val']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

            ans_embedding_sampled, ans_score_sampled = self.get_sampled_ans(ans_iter)

            return {
                "img_feat" : torch.from_numpy(img_feat_iter),
                "ques_ix" : torch.from_numpy(ques_ix_iter),
                "ans_score" : torch.from_numpy(ans_iter),
                "ans_embedding_sampled" : torch.from_numpy(ans_embedding_sampled),
                "ans_score_sampled" : torch.from_numpy(ans_score_sampled)
            }

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

            return {
                "img_feat" : torch.from_numpy(img_feat_iter),
                "ques_ix" : torch.from_numpy(ques_ix_iter),
                "ans_score" : torch.from_numpy(ans_iter)
            }
    def __getitem__(self, idx):
        '''
        self:ans_list,ans_toix,..
        :param idx: idx=0
        :return:torch类型的:img_feat_iter,ques_feat_iter,ans_iter
        '''

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            #加载答案数据,每次加载一个annotation数据包含answers:10个答案,image_id,question_id,
            # {'answers':[{'answer':'skatebodarding'},...,{'answer':'skatebodarding'}],"image_id":139831,"question_id"='VG_1293929'
            ans = self.ans_list[idx]
            #加载问题数据,每次加载一个question如下:{'image_id': 139831, 'question': "What's the man doing?", 'question_id': 'VG_1293929'}
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:  #如果为真,返回image_id的npz文件
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                #之间load image_id的npz文件
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                #将数据转换维度
                img_feat_x = img_feat['x'].transpose((1, 0))
            #图像特征迭代器,图像特征输入x,特征填充大小:100
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            #问题特征迭代器,调用data_utils的proc_ques函数,输入ques,token_to_ix,max_token
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            #答案迭代器,调用data_utils的proc_ans函数,传入ans,ans_to_ix数据,输出答案分数矩阵
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

        return torch.from_numpy(img_feat_iter), torch.from_numpy(
            ques_ix_iter), torch.from_numpy(ans_iter)
예제 #4
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]
            # print(ques)
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                # '551018' -> '../datasets-vqa/coco_extract/val2014/COCO_val2014_000000551018.jpg.npz'
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])

                # ndarray: (2048, 41) -> ndarray: (41, 2048)
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
예제 #5
0
파일: load_data.py 프로젝트: yyyanglz/KAN
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                # img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_x = img_feat
            img_feat_iter = img_feat_x  # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

            # get 36 objects from img_obj_train.json
            objects = self.img_obj_train[str(
                ans['image_id'])]  # ['person', 'windows', ..., 'apple']

            # get top 10 objects
            top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ)
            while len(top_objects) < self.__C.TOP_OBJ:
                top_objects.append('pad_obj')
                # print('image id:', str(ans['image_id']), 'top_objects:', top_objects)

            # query object from react_obj_rel, and only get top 5 sentences
            top_fact = []
            pad_rel = self.react_obj_rel['pad_obj'][0]
            for obj in top_objects:
                relations = self.react_obj_rel[obj]
                curr_top_fact = relations[:self.__C.TOP_REL]
                while len(curr_top_fact) < self.__C.TOP_REL:
                    curr_top_fact.append(pad_rel)
                top_fact.extend(curr_top_fact)

            # we get about 50 fact sentence index
            num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL
            fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token,
                                      self.__C.FACT_TOKEN)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                # img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_x = img_feat
            img_feat_iter = img_feat_x  # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # get 36 objects from img_obj_train.json
            objects = self.img_obj_val[str(
                ques['image_id'])]  # ['person', 'windows', ...]

            # get top 10 objects
            top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ)
            while len(top_objects) < self.__C.TOP_OBJ:
                top_objects.append('pad_obj')
                # print('image id:', str(ans['image_id']), 'top_objects:', top_objects)

            # query object from react_obj_rel, and only get top 5 sentences
            top_fact = []
            pad_rel = self.react_obj_rel['pad_obj'][0]
            for obj in top_objects:
                relations = self.react_obj_rel[obj]
                curr_top_fact = relations[:self.__C.TOP_REL]
                while len(curr_top_fact) < self.__C.TOP_REL:
                    curr_top_fact.append(pad_rel)
                top_fact.extend(curr_top_fact)

            # we get about 50 fact sentence index
            num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL
            fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token,
                                      self.__C.FACT_TOKEN)

        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter), \
               torch.from_numpy(fact_idx_iter)
예제 #6
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                # modified by Tan Wang
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_vc = np.load(self.feature_path_vc + '/' +
                                      str(ans['image_id']) + '.npy')
                try:
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                except:
                    print(ans['image_id'])
                    img_feat = np.load(self.feature_path_bu + '/' +
                                       str(ans['image_id']) + '.npy')
                    img_feat_x = img_feat[:img_feat_vc.shape[0], :]
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                img_feat_x = np.hstack((img_feat_x, img_feat_vc))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                # modified by Tan Wang
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_vc = np.load(self.feature_path_vc + '/' +
                                      str(ques['image_id']) + '.npy')
                try:
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                except:
                    print(ques['image_id'])
                    img_feat = np.load(self.feature_path_bu + '/' +
                                       str(ques['image_id']) + '.npy')
                    img_feat_x = img_feat[:img_feat_vc.shape[0], :]
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                img_feat_x = np.hstack((img_feat_x, img_feat_vc))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
예제 #7
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            # ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            # ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

        question = ques['question']
        inputs = self.tokenizer.encode_plus(question,
                                            add_special_tokens=True,
                                            max_length=self.__C.MAX_TOKEN,
                                            return_attention_mask=True,
                                            return_token_type_ids=True,
                                            pad_to_max_length=True)

        ques_idx = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']

        ques_idx = torch.tensor(ques_idx)
        attention_mask = torch.tensor(attention_mask)
        token_type_ids = torch.tensor(token_type_ids)

        return torch.from_numpy(
            img_feat_iter
        ), ques_idx, attention_mask, token_type_ids, torch.from_numpy(ans_iter)