def load_item(self, idx): sample_info = self.imdb[idx] current_sample = Sample() if self._dataset_type != "test": text_processor_argument = {"tokens": sample_info["caption_tokens"]} processed_caption = self.text_processor(text_processor_argument) current_sample.text = processed_caption["text"] current_sample.caption_id = torch.tensor(sample_info["caption_id"], dtype=torch.int) current_sample.caption_len = torch.tensor(len( sample_info["caption_tokens"]), dtype=torch.int) if isinstance(sample_info["image_id"], int): current_sample.image_id = torch.tensor(sample_info["image_id"], dtype=torch.int) else: current_sample.image_id = sample_info["image_id"] if self._use_features is True: features = self.features_db[idx] current_sample.update(features) # Add reference captions to sample current_sample = self.add_reference_caption(sample_info, current_sample) return current_sample
def load_item(self, idx): sample = Sample() image_id = self.annotations[idx][0] image_folder = image_id.split('_')[0] caption = self.annotations[idx][1] tokens = tokenize(caption) tokens = ['<s>'] + tokens + ['</s>'] # use text_processor to process caption # pad sequence, convert token to indices and add SOS, EOS token # text_processor already contains a pre-processor to tokenize caption caption_p = self.text_processor({'tokens': tokens}) sample.text = caption_p['text'] sample.caption_len = torch.tensor(len(tokens), dtype=torch.int) # sample.target = caption_p['text'] sample.answers = torch.stack([caption_p['text']]) # generate image features image_path = os.path.join(self.image_dir, image_folder, image_id) image, image_scale = self._image_transform(image_path) with torch.no_grad(): image_features = self.feature_extractor([image], [image_scale]) image_features = image_features[0] sample.image_feature_0 = image_features.cpu() return sample