def __getitem__(self, idx): sample_info = self.annotation_db[idx] sample_info = self.preprocess_sample_info(sample_info) current_sample = Sample() # breaking change from VQA2Dataset: load question_id current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = str(sample_info["image_id"]) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) current_sample = self.add_sample_details(sample_info, current_sample) current_sample = self.add_answer_info(sample_info, current_sample) # only the 'max_features' key is needed # pop other keys to minimize data loading overhead if hasattr(current_sample, "image_info_0"): for k in list(current_sample.image_info_0): if k != "max_features": current_sample.image_info_0.pop(k) if hasattr(current_sample, "image_info_1"): for k in list(current_sample.image_info_1): if k != "max_features": current_sample.image_info_1.pop(k) return current_sample
def __getitem__(self, idx): sample_info = self.annotation_db[idx] current_sample = Sample() text_processor_argument = {"text": sample_info["question_str"]} processed_question = self.text_processor(text_processor_argument) current_sample.text = processed_question["text"] if "input_ids" in processed_question: current_sample.update(processed_question) current_sample.question_id = torch.tensor( sample_info["question_id"], dtype=torch.int ) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor( sample_info["image_id"], dtype=torch.int ) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] if hasattr(self, "transformer_bbox_processor"): features["image_info_0"] = self.transformer_bbox_processor( features["image_info_0"] ) current_sample.update(features) # Depending on whether we are using soft copy this can add # dynamic answer space current_sample = self.add_answer_info(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.annotation_db[idx] current_sample = Sample() if "question_tokens" in sample_info: text_processor_argument = { "tokens": sample_info["question_tokens"], "text": sample_info["question_str"], } else: text_processor_argument = {"text": sample_info["question"]} processed_question = self.text_processor(text_processor_argument) current_sample.text = processed_question["text"] if "input_ids" in processed_question: current_sample.update(processed_question) current_sample.question_id = torch.tensor(sample_info["question_id"], dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if "question_tokens" in sample_info: current_sample.text_len = torch.tensor(len( sample_info["question_tokens"]), dtype=torch.int) if self._use_features: features = self.features_db[idx] if hasattr(self, "transformer_bbox_processor"): features["image_info_0"] = self.transformer_bbox_processor( features["image_info_0"]) current_sample.update(features) else: image_path = sample_info["image_name"] + ".jpg" current_sample.image = self.image_db.from_path( image_path)["images"][0] # Add details for OCR like OCR bbox, vectors, tokens here current_sample = self.add_ocr_details(sample_info, current_sample) # Depending on whether we are using soft copy this can add # dynamic answer space current_sample = self.add_answer_info(sample_info, current_sample) return current_sample
def __getitem__(self, idx: int) -> Sample: sample_info = self.annotation_db[idx] current_sample = Sample() processed_caption = self.masked_token_processor({ "text_a": sample_info["caption"], "text_b": "", "is_correct": True }) current_sample.update(processed_caption) current_sample.image_id = sample_info["image_id"] current_sample.feature_path = sample_info["feature_path"] # Get the image features if self._use_features: features = self.features_db[idx] image_info_0 = features["image_info_0"] if image_info_0 and "image_id" in image_info_0.keys(): image_info_0["feature_path"] = image_info_0["image_id"] image_info_0.pop("image_id") current_sample.update(features) return current_sample
def load_item(self, idx): sample_info = self.annotation_db[idx] sample_info = self.preprocess_sample_info(sample_info) current_sample = Sample() if self._dataset_type != "test": text_processor_argument = {"tokens": sample_info["caption_tokens"]} processed_caption = self.text_processor(text_processor_argument) current_sample.text = processed_caption["text"] current_sample.caption_id = torch.tensor(sample_info["caption_id"], dtype=torch.int) current_sample.caption_len = torch.tensor(len( sample_info["caption_tokens"]), dtype=torch.int) current_sample.image_id = object_to_byte_tensor( sample_info["image_id"]) if self._use_features: features = self.features_db[idx] current_sample.update(features) else: image_path = str(sample_info["image_name"]) + ".jpg" current_sample.image = self.image_db.from_path( image_path)["images"][0] # Add reference captions to sample current_sample = self.add_reference_caption(sample_info, current_sample) return current_sample
def load_item(self, idx): sample_info = self.annotation_db[idx] current_sample = Sample() processed_caption = self.text_processor( {"text": sample_info["captions"][0]}) current_sample.text = processed_caption["text"] current_sample.caption_len = torch.tensor(len( processed_caption["text"]), dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) current_sample.answers = torch.stack([processed_caption["text"]]) return current_sample