Example #1
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)
        abs_iter = np.zeros(1)
        loss_masks = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train', 'val']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            # ans_iter = proc_ans(ans, self.ans_to_ix)
            ans_iter, abs_iter, loss_masks = self.proc_ans_and_abs(ans)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter), \
               torch.from_numpy(abs_iter), \
               loss_masks
    def __getitem__(self, idx):
        '''
        self:ans_list,ans_toix,..
        :param idx: idx=0
        :return:torch类型的:img_feat_iter,ques_feat_iter,ans_iter
        '''

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            #加载答案数据,每次加载一个annotation数据包含answers:10个答案,image_id,question_id,
            # {'answers':[{'answer':'skatebodarding'},...,{'answer':'skatebodarding'}],"image_id":139831,"question_id"='VG_1293929'
            ans = self.ans_list[idx]
            #加载问题数据,每次加载一个question如下:{'image_id': 139831, 'question': "What's the man doing?", 'question_id': 'VG_1293929'}
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:  #如果为真,返回image_id的npz文件
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                #之间load image_id的npz文件
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                #将数据转换维度
                img_feat_x = img_feat['x'].transpose((1, 0))
            #图像特征迭代器,图像特征输入x,特征填充大小:100
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            #问题特征迭代器,调用data_utils的proc_ques函数,输入ques,token_to_ix,max_token
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            #答案迭代器,调用data_utils的proc_ans函数,传入ans,ans_to_ix数据,输出答案分数矩阵
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

        return torch.from_numpy(img_feat_iter), torch.from_numpy(
            ques_ix_iter), torch.from_numpy(ans_iter)
Example #3
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]
            # print(ques)
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                # '551018' -> '../datasets-vqa/coco_extract/val2014/COCO_val2014_000000551018.jpg.npz'
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])

                # ndarray: (2048, 41) -> ndarray: (41, 2048)
                img_feat_x = img_feat['x'].transpose((1, 0))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
Example #4
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)
        pad = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ques = self.ques_list[idx]
            # ques = self.qid_to_ques[str(current_ques['ques_id'])]

            # Process image feature from (.npz) file
            # CHANGED
            try:
                img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                bboxes = img_feat['bbox']
                img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)
            except:
                print('false')
                print(self.iid_to_img_feat_path[str(ques['img_id'])])

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

            # Process answer
            ans_iter = proc_ans_oe(ques, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
            img_feat_x = img_feat['x'].transpose((1, 0))
            bboxes = img_feat['bbox']
            img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter), \
               ques['img_id'], \
               pad, \
               ques['ques_id'], \
               pad, pad, pad, pad
Example #5
0
 def get_ans_ix(self):
     """
         Get answer embeddings in the same token embedding as questions
     """
     ans_embedding_ixs = []
     answers = [self.ix_to_ans[str(i)] for i in range(len(self.ix_to_ans.keys()))]
     for ans in answers:
         ans_embedding_ixs.append(proc_ques(ans, self.token_to_ix, 4))
     ans_embedding_ixs = np.stack(ans_embedding_ixs)
     return ans_embedding_ixs
Example #6
0
    def __getitem__(self, idx):
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # ['show']
        ques = self.ques_list[idx]

        # Process image feature from (.npz) file
        img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
        img_feat_x = img_feat['x'].transpose((1, 0))
        img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

        # Process question feature
        ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN)

        return torch.from_numpy(img_feat_iter), torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)
Example #7
0
    def __getitem__(self, idx):

        # For code safety
        pad = np.zeros(1)
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ques = self.ques_list[idx]

            # CHANGED
            try:
                img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                bboxes = img_feat['bbox']
                img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)
            except:
                print('false')
                print(self.iid_to_img_feat_path[str(ques['img_id'])])

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

            # Process answer ans_score, ans_label, ans_mc_ix, ans_gt_ix
            ans_iter, ans_label, ans_mc_ix, ans_ix = proc_ans_mc(ques, self.ans_to_ix, self.token_to_ix)
            # ans_score, ans_label, ans_mc_ix, ans_ix
            return torch.from_numpy(img_feat_iter), \
                   torch.from_numpy(ques_ix_iter), \
                   torch.from_numpy(ans_iter), \
                   ques['img_id'], \
                   pad, \
                   ques['ques_id'], \
                   torch.from_numpy(ans_label), \
                   torch.from_numpy(ans_mc_ix), \
                   ans_ix, \
                   ques['mc']

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            img_feat = np.load(self.iid_to_img_feat_path[str(ques['img_id'])])
            img_feat_x = img_feat['x'].transpose((1, 0))
            bboxes = img_feat['bbox']
            img_feat_iter = proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix, self.__C.MAX_TOKEN, element_name='ques')

            ans_mc_ix, ans_ix = proc_ans_mc_test(ques, self.ans_to_ix, self.token_to_ix)

            return torch.from_numpy(img_feat_iter), \
                   torch.from_numpy(ques_ix_iter), \
                   pad, \
                   ques['img_id'], \
                   pad, \
                   ques['ques_id'], \
                   pad, \
                   torch.from_numpy(ans_mc_ix), \
                   ans_ix, \
                   ques['mc']
Example #8
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                # img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_x = img_feat
            img_feat_iter = img_feat_x  # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

            # get 36 objects from img_obj_train.json
            objects = self.img_obj_train[str(
                ans['image_id'])]  # ['person', 'windows', ..., 'apple']

            # get top 10 objects
            top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ)
            while len(top_objects) < self.__C.TOP_OBJ:
                top_objects.append('pad_obj')
                # print('image id:', str(ans['image_id']), 'top_objects:', top_objects)

            # query object from react_obj_rel, and only get top 5 sentences
            top_fact = []
            pad_rel = self.react_obj_rel['pad_obj'][0]
            for obj in top_objects:
                relations = self.react_obj_rel[obj]
                curr_top_fact = relations[:self.__C.TOP_REL]
                while len(curr_top_fact) < self.__C.TOP_REL:
                    curr_top_fact.append(pad_rel)
                top_fact.extend(curr_top_fact)

            # we get about 50 fact sentence index
            num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL
            fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token,
                                      self.__C.FACT_TOKEN)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                # img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_x = img_feat
            img_feat_iter = img_feat_x  # proc_img_feat(img_feat_x, self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # get 36 objects from img_obj_train.json
            objects = self.img_obj_val[str(
                ques['image_id'])]  # ['person', 'windows', ...]

            # get top 10 objects
            top_objects = get_top_obj(objects, top=self.__C.TOP_OBJ)
            while len(top_objects) < self.__C.TOP_OBJ:
                top_objects.append('pad_obj')
                # print('image id:', str(ans['image_id']), 'top_objects:', top_objects)

            # query object from react_obj_rel, and only get top 5 sentences
            top_fact = []
            pad_rel = self.react_obj_rel['pad_obj'][0]
            for obj in top_objects:
                relations = self.react_obj_rel[obj]
                curr_top_fact = relations[:self.__C.TOP_REL]
                while len(curr_top_fact) < self.__C.TOP_REL:
                    curr_top_fact.append(pad_rel)
                top_fact.extend(curr_top_fact)

            # we get about 50 fact sentence index
            num_token = self.__C.FACT_TOKEN * self.__C.TOP_OBJ * self.__C.TOP_REL
            fact_idx_iter = proc_fact(top_fact, self.token_to_ix, num_token,
                                      self.__C.FACT_TOKEN)

        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter), \
               torch.from_numpy(fact_idx_iter)
Example #9
0
    def __getitem__(self, idx):

        # For code safety
        img_feat_iter = np.zeros(1)
        ques_ix_iter = np.zeros(1)
        ans_iter = np.zeros(1)

        # Process ['train'] and ['val', 'test'] respectively
        if self.__C.RUN_MODE in ['train']:
            # Load the run data from list
            ans = self.ans_list[idx]
            ques = self.qid_to_ques[str(ans['question_id'])]

            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ans['image_id'])]
            else:
                # modified by Tan Wang
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ans['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_vc = np.load(self.feature_path_vc + '/' +
                                      str(ans['image_id']) + '.npy')
                try:
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                except:
                    print(ans['image_id'])
                    img_feat = np.load(self.feature_path_bu + '/' +
                                       str(ans['image_id']) + '.npy')
                    img_feat_x = img_feat[:img_feat_vc.shape[0], :]
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                img_feat_x = np.hstack((img_feat_x, img_feat_vc))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)

            # Process answer
            ans_iter = proc_ans(ans, self.ans_to_ix)

        else:
            # Load the run data from list
            ques = self.ques_list[idx]

            # # Process image feature from (.npz) file
            # img_feat = np.load(self.iid_to_img_feat_path[str(ques['image_id'])])
            # img_feat_x = img_feat['x'].transpose((1, 0))
            # Process image feature from (.npz) file
            if self.__C.PRELOAD:
                img_feat_x = self.iid_to_img_feat[str(ques['image_id'])]
            else:
                # modified by Tan Wang
                img_feat = np.load(self.iid_to_img_feat_path[str(
                    ques['image_id'])])
                img_feat_x = img_feat['x'].transpose((1, 0))
                img_feat_vc = np.load(self.feature_path_vc + '/' +
                                      str(ques['image_id']) + '.npy')
                try:
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                except:
                    print(ques['image_id'])
                    img_feat = np.load(self.feature_path_bu + '/' +
                                       str(ques['image_id']) + '.npy')
                    img_feat_x = img_feat[:img_feat_vc.shape[0], :]
                    assert img_feat_x.shape[0] == img_feat_vc.shape[0]
                img_feat_x = np.hstack((img_feat_x, img_feat_vc))
            img_feat_iter = proc_img_feat(img_feat_x,
                                          self.__C.IMG_FEAT_PAD_SIZE)

            # Process question
            ques_ix_iter = proc_ques(ques, self.token_to_ix,
                                     self.__C.MAX_TOKEN)


        return torch.from_numpy(img_feat_iter), \
               torch.from_numpy(ques_ix_iter), \
               torch.from_numpy(ans_iter)