def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() text_processor_argument = {"tokens": sample_info["question_tokens"]} processed_question = self.text_processor(text_processor_argument) current_sample.text = processed_question["text"] current_sample.question_id = torch.tensor( sample_info["question_id"], dtype=torch.int ) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor( sample_info["image_id"], dtype=torch.int ) else: current_sample.image_id = sample_info["image_id"] current_sample.text_len = torch.tensor( len(sample_info["question_tokens"]), dtype=torch.int ) if self._use_features is True: features = self.features_db[idx] current_sample.update(features) # Add details for OCR like OCR bbox, vectors, tokens here current_sample = self.add_ocr_details(sample_info, current_sample) # Depending on whether we are using soft copy this can add # dynamic answer space current_sample = self.add_answer_info(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() if self._dataset_type != "test": text_processor_argument = {"tokens": sample_info["caption_tokens"]} processed_caption = self.text_processor(text_processor_argument) current_sample.text = processed_caption["text"] current_sample.caption_id = torch.tensor(sample_info["caption_id"], dtype=torch.int) current_sample.caption_len = torch.tensor(len( sample_info["caption_tokens"]), dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) # Add reference captions to sample current_sample = self.add_reference_caption(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] sample_info = self.preprocess_sample_info(sample_info) current_sample = Sample() # breaking change from VQA2Dataset: load question_id current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = str(sample_info["image_id"]) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) current_sample = self.add_sample_details(sample_info, current_sample) current_sample = self.add_answer_info(sample_info, current_sample) current_sample = self.add_anchor_graph(sample_info, current_sample) # only the 'max_features' key is needed # pop other keys to minimize data loading overhead for k in list(current_sample.image_info_0): if k != 'max_features': current_sample.image_info_0.pop(k) for k in list(current_sample.image_info_1): if k != 'max_features': current_sample.image_info_1.pop(k) return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) # 1. Load text (question words) # breaking change from VQA2Dataset: # load the entire question string, not tokenized questions, since we # switch to BERT tokenizer in M4C and do online tokenization question_str = (sample_info['question'] if 'question' in sample_info else sample_info['question_str']) processed_question = self.text_processor({"question": question_str}) current_sample.text = processed_question['token_inds'] current_sample.text_len = processed_question['token_num'] # 2. Load object # object bounding box information current_sample.obj_bbox = self.copy_processor( {"blob": sample_info["obj_normalized_boxes"]})["bbox"] current_sample.obj_mask = min( self.copy_processor({"blob": sample_info["obj_normalized_boxes"]})["mask"], 36) # Add details for OCR like OCR bbox, vectors, tokens here current_sample = self.add_ocr_details(sample_info, current_sample) # Depending on whether we are using soft copy this can add # dynamic answer space current_sample = self.add_answer_info(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] sample_info = self.preprocess_sample_info(sample_info) current_sample = Sample() # breaking change from VQA2Dataset: load question_id current_sample.question_id = torch.tensor( sample_info["question_id"], dtype=torch.int ) if isinstance(sample_info["image_id"], int): current_sample.image_id = str(sample_info["image_id"]) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) current_sample = self.add_sample_details(sample_info, current_sample) current_sample = self.add_answer_info(sample_info, current_sample) # only the 'max_features' key is needed # pop other keys to minimize data loading overhead for k in list(current_sample.image_info_0): if k != 'max_features': current_sample.image_info_0.pop(k) for k in list(current_sample.image_info_1): if k != 'max_features': current_sample.image_info_1.pop(k) overlap_flag = torch.zeros(150, 150) obj_obj_relation = self.compute_similarity_by_cosine(current_sample.image_feature_0, current_sample.image_feature_0) ocr_ocr_relation = self.compute_similarity_by_cosine(current_sample.context_feature_0, current_sample.context_feature_0) obj_ocr_relation = self.overlap(current_sample.obj_bbox_coordinates, current_sample.ocr_bbox_coordinates) overlap_flag[:100, :100] = obj_obj_relation overlap_flag[100:, 100:] = ocr_ocr_relation overlap_flag[:100, 100:] = obj_ocr_relation overlap_flag[100:, :100] = obj_ocr_relation.transpose(1, 0) current_sample.overlap_flag = overlap_flag return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() # breaking change from VQA2Dataset: load question_id current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) current_sample = self.add_sample_details(sample_info, current_sample) current_sample = self.add_answer_info(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() current_sample.dataset_name = self.dataset if self.dataset == 'train_vqa': text_processor_argument = { "tokens": sample_info["question_tokens"] } processed_question = self.text_processor(text_processor_argument) current_sample.text_len = torch.tensor(len( sample_info["question_tokens"]), dtype=torch.int) current_sample.text = processed_question["text"] current_sample.question_text = sample_info["question_str"] current_sample.text_sq = current_sample.text current_sample.text_oq = current_sample.text current_sample.reasoning_question = sample_info["question_str"] current_sample.reasoning_answer = sample_info["answers"][0] current_sample.sub_question = sample_info["question_str"] current_sample.other_question = sample_info["question_str"] elif self.dataset == 'train_introspect' or self.dataset == 'test': text_processor_argument = { "text": sample_info["main_question_str"] } processed_question = self.text_processor(text_processor_argument) current_sample.text = processed_question["text"] if "sub_question_str" in sample_info: text_processor_argument_sq = { "text": sample_info["sub_question_str"] } processed_question_sq = self.text_processor( text_processor_argument_sq) current_sample.text_sq = processed_question_sq["text"] if "other_question_str" in sample_info: text_processor_argument_oq = { "text": sample_info["other_question_str"] } processed_question_oq = self.text_processor( text_processor_argument_oq) current_sample.text_oq = processed_question_oq["text"] current_sample.question_text = sample_info["main_question_str"] current_sample.reasoning_question = sample_info[ "main_question_str"] current_sample.reasoning_answer = sample_info["main_answer_str"][0] current_sample.sub_question = sample_info["sub_question_str"] current_sample.other_question = sample_info["other_question_str"] current_sample.text_len = torch.tensor(len( sample_info["main_question_tokens"]), dtype=torch.int) else: text_processor_argument = {"text": sample_info["question_str"]} processed_question = self.text_processor(text_processor_argument) current_sample.text = processed_question["text"] if "sub_question_str" in sample_info: text_processor_argument_sq = { "text": sample_info["sub_question_str"] } processed_question_sq = self.text_processor( text_processor_argument_sq) current_sample.text_sq = processed_question_sq["text"] if "other_question_str" in sample_info: text_processor_argument_oq = { "text": sample_info["other_question_str"] } processed_question_oq = self.text_processor( text_processor_argument_oq) current_sample.text_oq = processed_question_oq["text"] else: current_sample.text_oq = current_sample.text_sq current_sample.question_text = sample_info["question_str"] current_sample.reasoning_question = sample_info["question_str"] current_sample.reasoning_answer = sample_info["answers"][0] current_sample.sub_question = sample_info["sub_question_str"] current_sample.other_question = sample_info["sub_question_str"] current_sample.text_len = torch.tensor(len( sample_info["question_tokens"]), dtype=torch.int) current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) # Add details for OCR like OCR bbox, vectors, tokens here current_sample = self.add_ocr_details(sample_info, current_sample) # Depending on whether we are using soft copy this can add # dynamic answer space current_sample = self.add_answer_info(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() if "question_tokens" in sample_info: text_processor_argument = { "tokens": sample_info["question_tokens"] } else: #text_processor_argument = {"text": sample_info["question"]} text_processor_argument = { "text": sample_info["main_question_str"] } if "sub_question_str" in sample_info: text_processor_argument_sq = { "text": sample_info["sub_question_str"] } if "other_question_str" in sample_info: text_processor_argument_oq = { "text": sample_info["other_question_str"] } processed_question = self.text_processor(text_processor_argument) processed_question_sq = self.text_processor(text_processor_argument_sq) processed_question_oq = self.text_processor(text_processor_argument_oq) current_sample.text = processed_question["text"] current_sample.text_sq = processed_question_sq["text"] current_sample.text_oq = processed_question_oq["text"] current_sample.question_text = sample_info["main_question_str"] current_sample.reasoning_question = sample_info["main_question_str"] current_sample.reasoning_answer = sample_info["main_answer_str"][0] #current_sample.image_url = sample_info["img_path"] current_sample.image_url = sample_info["image_path"] current_sample.sub_question = sample_info["sub_question_str"] current_sample.other_question = sample_info["other_question_str"] current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] current_sample.text_len = torch.tensor( #len(sample_info["question_tokens"]), dtype=torch.int len(sample_info["main_question_tokens"]), dtype=torch.int) if self._use_features is True: features = self.features_db[idx] current_sample.update(features) # Add details for OCR like OCR bbox, vectors, tokens here current_sample = self.add_ocr_details(sample_info, current_sample) # Depending on whether we are using soft copy this can add # dynamic answer space current_sample = self.add_answer_info(sample_info, current_sample) #print("current sample : {}".format(current_sample)) #pdb.set_trace() #print("Current sample : {}".format(current_sample)) return current_sample