def __init__(self, arch="vgg19_bn"): super().__init__() self.model, _ = get_model(arch) if torch.cuda.is_available(): self.model = self.model.cuda() self.model = self.model.eval()
def __init__(self, arch="vgg16"): super().__init__() self.model, _ = get_model(arch)
def main(): args = parse_args() print("Loading encoded data...") data, vocab, word_to_wid, wid_to_word, \ ans_to_aid, aid_to_ans = process_vqa_dataset(args.questions, args.annotations, "train", maps=None) # Get VGG model to process the image vision_model, _ = image.get_model(args.embedding_arch) # Get our VQA model model = Models[args.model].value(len(vocab)) # The final classifier classifier = nn.Softmax(dim=1) try: weights = torch.load(args.weights) except (Exception, ): print( "ERROR: Default weights missing. Please specify weights for the VQA model" ) exit(0) model.load_state_dict(weights["model"]) if torch.cuda.is_available(): vision_model.cuda() model.cuda() vision_model.eval() model.eval() img_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print("Processing image") im = Image.open(args.image) img = img_transforms(im) img = img.unsqueeze(0) # add batch dimension if torch.cuda.is_available(): img = img.cuda() img_features = vision_model(img) print("Processing question") q = text.process_single_question(args.question, vocab, word_to_wid) # Convert the question to a sequence of 1 hot vectors over the vocab one_hot_vec = np.zeros((len(q["question_wids"]), len(vocab))) for k in range(len(q["question_wids"])): one_hot_vec[k, q['question_wids'][k]] = 1 q = torch.from_numpy(one_hot_vec) if torch.cuda.is_available(): q = q.cuda() # Add the batch dimension q = q.unsqueeze(0).float() # Get the model output and classify for the final value output = model(img_features, q) output = classifier(output).data _, ans_id = torch.max(output, dim=1) # index into ans_id since it is a tensor ans = generate(ans_id[0], aid_to_ans) display_result(im, args.question, ans)
def __init__(self, arch="vgg19_bn"): super().__init__() self.model, _ = get_model(arch) self.model = self.model.eval()
def __init__(self, arch="vgg16"): super(FeatureExtractor, self).__init__() self.model, _ = get_model(arch)