コード例 #1
0
    def predict(self, img_paths, qud):
        """
        We enable batch prediction here
        :return:
        """
        with torch.no_grad():
            detectron_features = self.get_detectron_features(
                img_paths)  # a list of image features
            resnet_features = self.get_resnet_features(
                img_paths)  # [batch_size, 196, 2048]

            sample_list = []
            for i in range(len(detectron_features)):
                sample = Sample()
                processed_text = self.vqa_demo.text_processor({"text": qud})
                sample.text = processed_text["text"]
                sample.text_len = len(processed_text["tokens"])

                sample.image_feature_0 = detectron_features[i]
                sample.image_info_0 = Sample(
                    {"max_features": torch.tensor(100, dtype=torch.long)})
                sample.image_feature_1 = resnet_features[i]
                sample_list.append(sample)

            sample_list = SampleList(sample_list)
            sample_list = sample_list.to("cuda")

            scores = self.vqa_demo.pythia_model(sample_list)["scores"]
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            batch_probs = []
            batch_answers = []

            for i in range(scores.shape[0]):
                top_indices = indices[i]
                top_scores = actual[i]

                probs = []
                answers = []

                for idx, score in enumerate(top_scores):
                    probs.append(score.item())
                    answers.append(
                        self.vqa_demo.answer_processor.idx2word(
                            top_indices[idx].item()))
                batch_probs.append(probs)
                batch_answers.append(answers)

        ## if the memory becomes an issue, we then clear this
        # gc.collect()
        # torch.cuda.empty_cache()

        # list is of batch_size
        # [[ans_1, ans_2], [ans_1, ans2]]
        return batch_probs, batch_answers
コード例 #2
0
    def predict(self, url, question):
        with torch.no_grad():
            detectron_features = self.get_detectron_features(url)
            resnet_features = self.get_resnet_features(url)

            sample = Sample()

            processed_text = self.text_processor({"text": question})
            sample.text = processed_text["text"]
            sample.text_len = len(processed_text["tokens"])

            sample.image_feature_0 = detectron_features
            sample.image_info_0 = Sample({
                "max_features": torch.tensor(100, dtype=torch.long)
            })

            sample.image_feature_1 = resnet_features

            sample_list = SampleList([sample])
            sample_list = sample_list.to("cuda")

            scores = self.pythia_model(sample_list)["scores"]
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            top_indices = indices[0]
            top_scores = actual[0]

            probs = []
            answers = []

            for idx, score in enumerate(top_scores):
                probs.append(score.item())
                answers.append(
                    self.answer_processor.idx2word(top_indices[idx].item())
                )

        gc.collect()
        torch.cuda.empty_cache()
        return probs, answers
コード例 #3
0
ファイル: tiki.py プロジェクト: psnonis/TikiAI
    def getAnswers(self, image, question, meta=None):

        first = time.time()
        meta = meta or str(image)
        image = Image.open(image).convert('RGB') if isinstance(image, str) else \
                image.convert('RGB')

        print(f'Tiki : Getting Answers : {meta}, {question}')

        with torch.no_grad():

            detectron_features = self.get_detectron_features(image)
            resnet152_features = self.get_resnet152_features(image)

            start = time.time()
            sample = Sample()

            processed_text = self.text_processor({'text': question})
            sample.text = processed_text['text']
            sample.text_len = len(processed_text['tokens'])

            sample.image_feature_0 = detectron_features
            sample.image_info_0 = Sample(
                {'max_features': torch.tensor(100, dtype=torch.long)})

            sample.image_feature_1 = resnet152_features

            sample_list = SampleList([sample])

            sample_list = sample_list.to(self.device.type)

            scores = self.pythiaVQA_model(sample_list)['scores']
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            top_indices = indices[0]
            top_scores = actual[0]

            answers = []

            for rank, score in enumerate(top_scores):
                answers.append({
                    'rank':
                    rank,
                    'answer':
                    self.answer_processor.idx2word(top_indices[rank].item()),
                    'probability':
                    score.item()
                })

            answer = answers[0]['answer']

            end = time.time()

        print(
            f'Tiki : Getting Answers : PythiaVQA - Finished in {end-start:7.3f} Seconds'
        )

        processing['PythiaVQA'] = end - start

        gc.collect()

        torch.cuda.empty_cache()

        last = time.time()

        processing['InferTime'] = last - first

        return question, answer, answers