def get_item(self, idx): data = self.vqamb_data[idx] current_sample = Sample() # store queston and image id current_sample.img_id = data['id'] current_sample.qa_id = data['qa_id'] # process question question = data["question"] tokens = tokenize(question, remove=["?"], keep=["'s"]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] # process answers processed = self.answer_processor({"answers": [data['answer']]}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"][1:] # remove unknown index # Detectron features ---------------- # TODO: read in detectron image instead if detectron is to be built detectron_path = self.detectron_folder + str(data['id']) if self.config.spatial: point = data['point'] # current_sample.point = point detectron_path += ',' + str(point['x']) + ',' + str(point['y']) detectron_path += '.pt' detectron_feat = torch.load(detectron_path, map_location=torch.device('cpu')) # Pad features to fixed length if self.config.pad_detectron: if detectron_feat.shape[0] > 100: detectron_feat = detectron_feat[:100] elif detectron_feat.shape[0] < 100: pad = torch.zeros(100 - detectron_feat.shape[0], detectron_feat.shape[1]) detectron_feat = torch.cat([detectron_feat, pad], dim=0) current_sample.image_feature_0 = detectron_feat # --------------------------------------------- return current_sample
def get_item(self, idx): data = self.vqamb_data[idx] current_sample = Sample() # store queston and image id current_sample.img_id = data['id'] current_sample.qa_id = data['qa_index'] # store points current_sample.points = data['points'] obj = data['all_objs'][0] xmin, ymin, xmax, ymax = obj['x'], obj[ 'y'], obj['x'] + obj['w'], obj['y'] + obj['h'] current_sample.gt_bbox = torch.Tensor([xmin, ymin, xmax, ymax]) # process question question = data["question"] tokens = tokenize(question, remove=["?"]) processed = self.text_processor({"tokens": tokens}) current_sample.text = processed["text"] # process answers processed = self.answer_processor({"answers": data['all_ans']}) current_sample.answers = processed["answers"] current_sample.targets = processed["answers_scores"][ 1:] # remove unknown index # Detectron features ---------------- # TODO: read in detectron image instead if detectron is to be built detectron_path = self.detectron_folder + str(data['id']) bbox_path = self.bbox_folder + str(data['id']) if 'pt' in self.detectron_folder: point = data['points'][0] detectron_path += ',' + str(point['x']) + ',' + str(point['y']) bbox_path += ',' + str(point['x']) + ',' + str(point['y']) detectron_path += '.pt' bbox_path += '.pt' detectron_feat = torch.load( detectron_path, map_location=torch.device('cpu')).squeeze() # bbox_feat = torch.load(bbox_path, map_location=torch.device('cpu')).squeeze() '''if detectron_feat.shape[0] == 2048: detectron_feat = detectron_feat.unsqueeze(0) bbox_feat = bbox_feat.unsqueeze(0) ''' ''' if self.config.grid: detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T ''' # x_down = max(int(round(pt['x']/600)), 18) # y_down = int(round(pt['y']/800), 25) # preproessing for grid features only # detectron_feat = detectron_feat.view(detectron_feat.shape[0], -1).T # Pad features to fixed length if self.config.grid: MAX_FEAT = 608 else: MAX_FEAT = 100 if self.config.pad_detectron: if detectron_feat.shape[0] > MAX_FEAT: detectron_feat = detectron_feat[:MAX_FEAT] # bbox_feat = bbox_feat[:MAX_FEAT] elif detectron_feat.shape[0] < MAX_FEAT: pad = torch.zeros(MAX_FEAT - detectron_feat.shape[0], detectron_feat.shape[1]) detectron_feat = torch.cat([detectron_feat, pad], dim=0) pad = torch.zeros(MAX_FEAT - bbox_feat.shape[0], bbox_feat.shape[1]) bbox_feat = torch.cat([bbox_feat, pad], dim=0) ''' else: if detectron_feat.dim() > 1: detectron_feat = torch.zeros(2048) ''' # current_sample.bbox = bbox_feat current_sample.image_feature_0 = detectron_feat # --------------------------------------------- return current_sample