Beispiel #1
0
    def load_item(self, idx):
        sample_info = self.imdb[idx]
        current_sample = Sample()

        if self._dataset_type != "test":
            text_processor_argument = {"tokens": sample_info["caption_tokens"]}
            processed_caption = self.text_processor(text_processor_argument)
            current_sample.text = processed_caption["text"]
            current_sample.caption_id = torch.tensor(sample_info["caption_id"],
                                                     dtype=torch.int)
            current_sample.caption_len = torch.tensor(len(
                sample_info["caption_tokens"]),
                                                      dtype=torch.int)

        if isinstance(sample_info["image_id"], int):
            current_sample.image_id = torch.tensor(sample_info["image_id"],
                                                   dtype=torch.int)
        else:
            current_sample.image_id = sample_info["image_id"]

        if self._use_features is True:
            features = self.features_db[idx]
            current_sample.update(features)

        # Add reference captions to sample
        current_sample = self.add_reference_caption(sample_info,
                                                    current_sample)

        return current_sample
Beispiel #2
0
 def load_item(self, idx):
     sample = Sample()
     image_id = self.annotations[idx][0]
     image_folder = image_id.split('_')[0]
     caption = self.annotations[idx][1]
     tokens = tokenize(caption)
     tokens = ['<s>'] + tokens + ['</s>']
     # use text_processor to process caption
     # pad sequence, convert token to indices and add SOS, EOS token
     # text_processor already contains a pre-processor to tokenize caption
     caption_p = self.text_processor({'tokens': tokens})
     sample.text = caption_p['text']
     sample.caption_len = torch.tensor(len(tokens), dtype=torch.int)
     # sample.target = caption_p['text']
     sample.answers = torch.stack([caption_p['text']])
     # generate image features
     image_path = os.path.join(self.image_dir, image_folder, image_id)
     image, image_scale = self._image_transform(image_path)
     with torch.no_grad():
         image_features = self.feature_extractor([image], [image_scale])
     image_features = image_features[0]
     sample.image_feature_0 = image_features.cpu()
     return sample