コード例 #1
0
    def predict(self, url, feat_name, get_features=False):
        with torch.no_grad():
            detectron_features = get_detectron_features([url],
                                                        self.detection_model,
                                                        False, feat_name,
                                                        self.cuda_device)
            # returns a single-element list
            detectron_features = detectron_features[0]

            sample = Sample()
            sample.dataset_name = "coco"
            sample.dataset_type = "test"
            sample.image_feature_0 = detectron_features
            sample.answers = torch.zeros((5, 10), dtype=torch.long)

            sample_list = SampleList([sample])
            sample_list = sample_list.to(self.cuda_device)

            tokens = self.caption_model(sample_list)["captions"]

        gc.collect()
        torch.cuda.empty_cache()

        if not get_features:
            return tokens
        else:
            return tokens, detectron_features
コード例 #2
0
    def forward(self, images, image_scales, transitions=None):
        feature_list = self.encoder(images, image_scales)
        image_features = feature_list[0]
        assert len(
            feature_list) == 1, 'current model only support batch size 1'

        sample = Sample()
        sample.dataset_name = "coco"
        sample.dataset_type = "test"
        sample.image_feature_0 = image_features
        # it seems answers work as a place holder here
        # hence, it does not matter what it's size is
        sample.answers = torch.zeros((1, 10), dtype=torch.long)
        sample_list = SampleList([sample])
        sample_list = sample_list.to(device)
        # set_trace()
        if transitions is not None:
            sample_list.transitions = transitions

        output = self.decoder(sample_list)
        tokens = output['captions']
        caption = tokens.tolist()[0]
        caption = self.decoder.caption_processor(caption)['caption']

        return caption
コード例 #3
0
    def test_nucleus_sampling(self):
        vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES)

        model_config = self.config.model_attributes.butd
        model = TestDecoderModel(model_config, vocab)
        model.build()
        model.to("cuda")
        model.eval()

        sample = Sample()
        sample.dataset_name = "coco"
        sample.dataset_type = "test"
        sample.image_feature_0 = torch.randn(100, 2048)
        sample.answers = torch.zeros((5, 10), dtype=torch.long)
        sample_list = SampleList([sample])

        tokens = model(sample_list)["captions"]

        # these are expected tokens for sum_threshold = 0.5
        expected_tokens = [
            1.0000e+00, 2.9140e+03, 5.9210e+03, 2.2040e+03, 5.0550e+03,
            9.2240e+03, 4.5120e+03, 1.8200e+02, 3.6490e+03, 6.4090e+03,
            2.0000e+00
        ]

        self.assertEqual(tokens[0].tolist(), expected_tokens)
コード例 #4
0
    def predict(self, url):
        with torch.no_grad():
            detectron_features = self.get_detectron_features(url)

            sample = Sample()
            sample.dataset_name = "coco"
            sample.dataset_type = "test"
            sample.image_feature_0 = detectron_features
            sample.answers = torch.zeros((5, 10), dtype=torch.long)

            sample_list = SampleList([sample])
            sample_list = sample_list.to("cuda")

            tokens = self.pythia_model(sample_list)["captions"]

        gc.collect()
        torch.cuda.empty_cache()

        return tokens
コード例 #5
0
ファイル: dataset.py プロジェクト: sameerdharur/sorting-vqa
    def load_item(self, idx):
        sample_info = self.imdb[idx]
        current_sample = Sample()
        current_sample.dataset_name = self.dataset

        if self.dataset == 'train_vqa':

            text_processor_argument = {
                "tokens": sample_info["question_tokens"]
            }
            processed_question = self.text_processor(text_processor_argument)
            current_sample.text_len = torch.tensor(len(
                sample_info["question_tokens"]),
                                                   dtype=torch.int)
            current_sample.text = processed_question["text"]
            current_sample.question_text = sample_info["question_str"]
            current_sample.text_sq = current_sample.text
            current_sample.text_oq = current_sample.text
            current_sample.reasoning_question = sample_info["question_str"]
            current_sample.reasoning_answer = sample_info["answers"][0]
            current_sample.sub_question = sample_info["question_str"]
            current_sample.other_question = sample_info["question_str"]

        elif self.dataset == 'train_introspect' or self.dataset == 'test':

            text_processor_argument = {
                "text": sample_info["main_question_str"]
            }
            processed_question = self.text_processor(text_processor_argument)
            current_sample.text = processed_question["text"]
            if "sub_question_str" in sample_info:
                text_processor_argument_sq = {
                    "text": sample_info["sub_question_str"]
                }
                processed_question_sq = self.text_processor(
                    text_processor_argument_sq)
                current_sample.text_sq = processed_question_sq["text"]

            if "other_question_str" in sample_info:
                text_processor_argument_oq = {
                    "text": sample_info["other_question_str"]
                }
                processed_question_oq = self.text_processor(
                    text_processor_argument_oq)
                current_sample.text_oq = processed_question_oq["text"]

            current_sample.question_text = sample_info["main_question_str"]
            current_sample.reasoning_question = sample_info[
                "main_question_str"]
            current_sample.reasoning_answer = sample_info["main_answer_str"][0]
            current_sample.sub_question = sample_info["sub_question_str"]
            current_sample.other_question = sample_info["other_question_str"]
            current_sample.text_len = torch.tensor(len(
                sample_info["main_question_tokens"]),
                                                   dtype=torch.int)

        else:

            text_processor_argument = {"text": sample_info["question_str"]}
            processed_question = self.text_processor(text_processor_argument)
            current_sample.text = processed_question["text"]
            if "sub_question_str" in sample_info:
                text_processor_argument_sq = {
                    "text": sample_info["sub_question_str"]
                }
                processed_question_sq = self.text_processor(
                    text_processor_argument_sq)
                current_sample.text_sq = processed_question_sq["text"]

            if "other_question_str" in sample_info:
                text_processor_argument_oq = {
                    "text": sample_info["other_question_str"]
                }
                processed_question_oq = self.text_processor(
                    text_processor_argument_oq)
                current_sample.text_oq = processed_question_oq["text"]
            else:
                current_sample.text_oq = current_sample.text_sq

            current_sample.question_text = sample_info["question_str"]
            current_sample.reasoning_question = sample_info["question_str"]
            current_sample.reasoning_answer = sample_info["answers"][0]
            current_sample.sub_question = sample_info["sub_question_str"]
            current_sample.other_question = sample_info["sub_question_str"]
            current_sample.text_len = torch.tensor(len(
                sample_info["question_tokens"]),
                                                   dtype=torch.int)

        current_sample.question_id = torch.tensor(sample_info["question_id"],
                                                  dtype=torch.int)

        if isinstance(sample_info["image_id"], int):
            current_sample.image_id = torch.tensor(sample_info["image_id"],
                                                   dtype=torch.int)
        else:
            current_sample.image_id = sample_info["image_id"]

        if self._use_features is True:
            features = self.features_db[idx]
            current_sample.update(features)

        # Add details for OCR like OCR bbox, vectors, tokens here
        current_sample = self.add_ocr_details(sample_info, current_sample)
        # Depending on whether we are using soft copy this can add
        # dynamic answer space
        current_sample = self.add_answer_info(sample_info, current_sample)

        return current_sample