def predict(self, img_paths, qud): """ We enable batch prediction here :return: """ with torch.no_grad(): detectron_features = self.get_detectron_features( img_paths) # a list of image features resnet_features = self.get_resnet_features( img_paths) # [batch_size, 196, 2048] sample_list = [] for i in range(len(detectron_features)): sample = Sample() processed_text = self.vqa_demo.text_processor({"text": qud}) sample.text = processed_text["text"] sample.text_len = len(processed_text["tokens"]) sample.image_feature_0 = detectron_features[i] sample.image_info_0 = Sample( {"max_features": torch.tensor(100, dtype=torch.long)}) sample.image_feature_1 = resnet_features[i] sample_list.append(sample) sample_list = SampleList(sample_list) sample_list = sample_list.to("cuda") scores = self.vqa_demo.pythia_model(sample_list)["scores"] scores = torch.nn.functional.softmax(scores, dim=1) actual, indices = scores.topk(5, dim=1) batch_probs = [] batch_answers = [] for i in range(scores.shape[0]): top_indices = indices[i] top_scores = actual[i] probs = [] answers = [] for idx, score in enumerate(top_scores): probs.append(score.item()) answers.append( self.vqa_demo.answer_processor.idx2word( top_indices[idx].item())) batch_probs.append(probs) batch_answers.append(answers) ## if the memory becomes an issue, we then clear this # gc.collect() # torch.cuda.empty_cache() # list is of batch_size # [[ans_1, ans_2], [ans_1, ans2]] return batch_probs, batch_answers
def predict(self, url, question): with torch.no_grad(): detectron_features = self.get_detectron_features(url) resnet_features = self.get_resnet_features(url) sample = Sample() processed_text = self.text_processor({"text": question}) sample.text = processed_text["text"] sample.text_len = len(processed_text["tokens"]) sample.image_feature_0 = detectron_features sample.image_info_0 = Sample({ "max_features": torch.tensor(100, dtype=torch.long) }) sample.image_feature_1 = resnet_features sample_list = SampleList([sample]) sample_list = sample_list.to("cuda") scores = self.pythia_model(sample_list)["scores"] scores = torch.nn.functional.softmax(scores, dim=1) actual, indices = scores.topk(5, dim=1) top_indices = indices[0] top_scores = actual[0] probs = [] answers = [] for idx, score in enumerate(top_scores): probs.append(score.item()) answers.append( self.answer_processor.idx2word(top_indices[idx].item()) ) gc.collect() torch.cuda.empty_cache() return probs, answers
def getAnswers(self, image, question, meta=None): first = time.time() meta = meta or str(image) image = Image.open(image).convert('RGB') if isinstance(image, str) else \ image.convert('RGB') print(f'Tiki : Getting Answers : {meta}, {question}') with torch.no_grad(): detectron_features = self.get_detectron_features(image) resnet152_features = self.get_resnet152_features(image) start = time.time() sample = Sample() processed_text = self.text_processor({'text': question}) sample.text = processed_text['text'] sample.text_len = len(processed_text['tokens']) sample.image_feature_0 = detectron_features sample.image_info_0 = Sample( {'max_features': torch.tensor(100, dtype=torch.long)}) sample.image_feature_1 = resnet152_features sample_list = SampleList([sample]) sample_list = sample_list.to(self.device.type) scores = self.pythiaVQA_model(sample_list)['scores'] scores = torch.nn.functional.softmax(scores, dim=1) actual, indices = scores.topk(5, dim=1) top_indices = indices[0] top_scores = actual[0] answers = [] for rank, score in enumerate(top_scores): answers.append({ 'rank': rank, 'answer': self.answer_processor.idx2word(top_indices[rank].item()), 'probability': score.item() }) answer = answers[0]['answer'] end = time.time() print( f'Tiki : Getting Answers : PythiaVQA - Finished in {end-start:7.3f} Seconds' ) processing['PythiaVQA'] = end - start gc.collect() torch.cuda.empty_cache() last = time.time() processing['InferTime'] = last - first return question, answer, answers