Beispiel #1
0
    def __init__(self, arch="vgg19_bn"):
        super().__init__()
        self.model, _ = get_model(arch)

        if torch.cuda.is_available():
            self.model = self.model.cuda()

        self.model = self.model.eval()
Beispiel #2
0
 def __init__(self, arch="vgg16"):
     super().__init__()
     self.model, _ = get_model(arch)
Beispiel #3
0
def main():
    args = parse_args()

    print("Loading encoded data...")
    data, vocab, word_to_wid, wid_to_word, \
    ans_to_aid, aid_to_ans = process_vqa_dataset(args.questions, args.annotations, "train", maps=None)

    # Get VGG model to process the image
    vision_model, _ = image.get_model(args.embedding_arch)
    # Get our VQA model
    model = Models[args.model].value(len(vocab))
    # The final classifier
    classifier = nn.Softmax(dim=1)

    try:
        weights = torch.load(args.weights)
    except (Exception, ):
        print(
            "ERROR: Default weights missing. Please specify weights for the VQA model"
        )
        exit(0)

    model.load_state_dict(weights["model"])

    if torch.cuda.is_available():
        vision_model.cuda()
        model.cuda()

    vision_model.eval()
    model.eval()

    img_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    print("Processing image")
    im = Image.open(args.image)
    img = img_transforms(im)
    img = img.unsqueeze(0)  # add batch dimension

    if torch.cuda.is_available():
        img = img.cuda()

    img_features = vision_model(img)

    print("Processing question")
    q = text.process_single_question(args.question, vocab, word_to_wid)

    # Convert the question to a sequence of 1 hot vectors over the vocab
    one_hot_vec = np.zeros((len(q["question_wids"]), len(vocab)))
    for k in range(len(q["question_wids"])):
        one_hot_vec[k, q['question_wids'][k]] = 1

    q = torch.from_numpy(one_hot_vec)
    if torch.cuda.is_available():
        q = q.cuda()

    # Add the batch dimension
    q = q.unsqueeze(0).float()

    # Get the model output and classify for the final value
    output = model(img_features, q)
    output = classifier(output).data

    _, ans_id = torch.max(output, dim=1)
    # index into ans_id since it is a tensor
    ans = generate(ans_id[0], aid_to_ans)

    display_result(im, args.question, ans)
Beispiel #4
0
    def __init__(self, arch="vgg19_bn"):
        super().__init__()
        self.model, _ = get_model(arch)

        self.model = self.model.eval()
Beispiel #5
0
 def __init__(self, arch="vgg16"):
     super(FeatureExtractor, self).__init__()
     self.model, _ = get_model(arch)