def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers):

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}_resnet_feature.npy")
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}_resnet_feature.npy")

        model = CoattentionNet(n_emb=512,
                               n_img=2048,
                               n_ques=len(train_dataset.dictionary),
                               n_ans=len(train_dataset.answers))

        super().__init__(train_dataset,
                         val_dataset,
                         model,
                         batch_size,
                         num_epochs,
                         num_data_loader_workers=num_data_loader_workers)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.RMSprop(self._model.parameters(),
                                       lr=4e-4,
                                       alpha=0.99,
                                       eps=1e-8)
Esempio n. 2
0
    def __init__(self, train_image_dir, train_question_path, train_annotation_path,
                 test_image_dir, test_question_path,test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers, preprocessing):

        train_h5_path = "./features/resnet/train/train_feat_resnet_{}.h5"
        test_h5_path = "./features/resnet/test/test_feat_resnet_{}.h5"

        train_dataset = VqaDataset(image_dir=train_image_dir,
                                   question_json_file_path=train_question_path,
                                   annotation_json_file_path=train_annotation_path,
                                   image_filename_pattern="COCO_train2014_{}.jpg", existing_format=None, prepro=preprocessing, prepro_path=train_h5_path)
        val_dataset = VqaDataset(image_dir=test_image_dir,
                                 question_json_file_path=test_question_path,
                                 annotation_json_file_path=test_annotation_path,
                                 image_filename_pattern="COCO_val2014_{}.jpg", existing_format=train_dataset, prepro=preprocessing, prepro_path=test_h5_path)

        embed_size = 512
        vocab_size = train_dataset.quesVecSize
        ans_size = train_dataset.ansVecSize
        seq_len = train_dataset.seq_len

        self._model = CoattentionNet(embed_size, vocab_size, ans_size, seq_len)

        super().__init__(train_dataset, val_dataset, self._model, batch_size, num_epochs,
                         num_data_loader_workers, preprocessing)

        # self.optimizer = torch.optim.RMSprop(self._model.parameters(), lr=4e-4, momentum=0.99, weight_decay=1e-8) 
        # self.optimizer = torch.optim.SGD(self._model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-8)
        self.optimizer = torch.optim.Adam(self._model.parameters())
 def __init__(self, train_image_dir, train_question_path,
              train_annotation_path, test_image_dir, test_question_path,
              test_annotation_path, batch_size, num_epochs,
              num_data_loader_workers, args):
     if not args.debug:
         train_dataset = VqaDataset(
             image_dir=train_image_dir,
             question_json_file_path=train_question_path,
             annotation_json_file_path=train_annotation_path,
             image_filename_pattern="COCO_train2014_{}.jpg",
         )
         val_dataset = VqaDataset(
             image_dir=test_image_dir,
             question_json_file_path=test_question_path,
             annotation_json_file_path=test_annotation_path,
             image_filename_pattern="COCO_val2014_{}.jpg",
         )
         model = SimpleBaselineNet(num_ans_candidates=2185,
                                   ntoken=train_dataset.dictionary.ntoken)
     else:
         train_dataset = VqaDataset(
             image_dir=train_image_dir,
             question_json_file_path=train_question_path,
             annotation_json_file_path=train_annotation_path,
             image_filename_pattern="COCO_train2014_{}.jpg",
             debug=True)
         val_dataset = train_dataset
         model = SimpleBaselineNet(num_ans_candidates=2,
                                   ntoken=train_dataset.dictionary.ntoken)
     super().__init__(train_dataset, val_dataset, model, batch_size,
                      num_epochs, num_data_loader_workers, args.use_cuda,
                      'simple_baseline')
     self.optim = torch.optim.Adamax(self._model.parameters())
Esempio n. 4
0
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers):

        self.vqa_loader = VQA(annotation_file=train_annotation_path,
                              question_file=train_question_path)

        self.entries = self.vqa_loader.qqa
        self.qa = self.vqa_loader.qa

        bag_word_question = self.get_bag_of_word_question()
        bag_word_answer = self.get_bag_of_word_answer()

        pdb.set_trace()

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            bag_word_question=bag_word_question,
            bag_word_answer=bag_word_answer)

        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            bag_word_question=bag_word_question,
            bag_word_answer=bag_word_answer)

        num_question = train_dataset.bag_size_question
        num_answer = train_dataset.bag_size_answer
        model = SimpleBaselineNet(num_question, num_answer)

        # could be added outside
        # lr = 0.01
        # momentum = 0.9

        # pdb.set_trace()

        # self.optimizer = torch.optim.SGD(
        #     model.parameters(), lr=lr, momentum=momentum)
        self.optimizer = torch.optim.SGD(
            [{
                'params': model.fc.parameters()
            }, {
                'params': model.feature.parameters()
            }, {
                'params': model.embedding.parameters(),
                'lr': 0.8
            }],
            lr=0.01,
            momentum=0.9)
        self.criterion = nn.CrossEntropyLoss().cuda()

        super().__init__(train_dataset, val_dataset, model, batch_size,
                         num_epochs, num_data_loader_workers)
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers, cache_location, lr, log_validation):

        ############ 2.3 TODO: set up transform

        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ])

        ############

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            transform=transform,
            ############ 2.4 TODO: fill in the arguments
            question_word_to_id_map=None,
            answer_to_id_map=None,
            ############
        )

        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            transform=transform,
            ############ 2.4 TODO: fill in the arguments
            question_word_to_id_map=train_dataset.question_word_to_id_map,
            answer_to_id_map=train_dataset.answer_to_id_map,
            ############
        )

        model = SimpleBaselineNet()

        super().__init__(train_dataset, val_dataset, model, batch_size,
                         num_epochs, num_data_loader_workers)

        ############ 2.5 TODO: set up optimizer
        #self.optimizer = torch.optim.SGD([{'params':model.WordNet.parameters(), 'lr':0.8},
        #                                    {'params':model.LinearLayer.parameters(), 'lr':0.01}])
        ############
        self.optimizer = torch.optim.SGD([{
            'params': model.WordNet.parameters(),
            'lr': 0.01
        }, {
            'params':
            model.LinearLayer.parameters(),
            'lr':
            0.8
        }])
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers, cache_location, lr, log_validation):

        ############ 3.1 TODO: set up transform and image encoder
        transform = None
        image_encoder = None
        ############

        question_word_list_length = 5746
        answer_list_length = 1000

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            transform=transform,
            question_word_list_length=question_word_list_length,
            answer_list_length=answer_list_length,
            cache_location=os.path.join(cache_location, "tmp_train"),
            ############ 3.1 TODO: fill in the arguments
            question_word_to_id_map='change this argument',
            answer_to_id_map='change this argument',
            ############
            pre_encoder=image_encoder)
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            transform=transform,
            question_word_list_length=question_word_list_length,
            answer_list_length=answer_list_length,
            cache_location=os.path.join(cache_location, "tmp_val"),
            ############ 3.1 TODO: fill in the arguments
            question_word_to_id_map='change this argument',
            answer_to_id_map='change this argument',
            ############
            pre_encoder=image_encoder)

        self._model = CoattentionNet()

        super().__init__(train_dataset,
                         val_dataset,
                         self._model,
                         batch_size,
                         num_epochs,
                         num_data_loader_workers=num_data_loader_workers,
                         log_validation=False)
Esempio n. 7
0
  def __init__(self, train_image_dir, train_question_path, train_annotation_path,
               test_image_dir, test_question_path, test_annotation_path, batch_size, num_epochs,
               num_data_loader_workers):

    self.vqa_loader = VQA(annotation_file=train_annotation_path,
                          question_file=train_question_path)

    self.entries = self.vqa_loader.qqa
    self.qa = self.vqa_loader.qa

    bag_word_question = self.get_bag_of_word_question()
    bag_word_answer = self.get_bag_of_word_answer()

    train_dataset = VqaDataset(image_dir=train_image_dir,
                               question_json_file_path=train_question_path,
                               annotation_json_file_path=train_annotation_path,
                               image_filename_pattern="COCO_train2014_{}.jpg",
                               bag_word_question=bag_word_question,
                               bag_word_answer=bag_word_answer,
                               img_dir="./data_val/train.hdf5")
    val_dataset = VqaDataset(image_dir=test_image_dir,
                             question_json_file_path=test_question_path,
                             annotation_json_file_path=test_annotation_path,
                             image_filename_pattern="COCO_val2014_{}.jpg",
                             bag_word_question=bag_word_question,
                             bag_word_answer=bag_word_answer,
                             img_dir="./data_val/val.hdf5")

    num_question = train_dataset.bag_size_question
    num_answer = train_dataset.bag_size_answer

    # pdb.set_trace()

    max_len_train = train_dataset.max_len
    max_len_val = val_dataset.max_len

    print('max_len for train:{}, max_len for val:{}'.format(
        max_len_train, max_len_val))

    self._model = CoattentionNet(num_question, num_answer, 26)

    # self.optimizer = torch.optim.SGD(
    #     self._model.parameters(), lr=0.001, momentum=0.9)
    # pdb.set_trace()
    self.optimizer = torch.optim.Adam(
        self._model.parameters(), lr=4e-4, eps=1e-8)
    self.criterion = nn.CrossEntropyLoss().cuda()

    super().__init__(train_dataset, val_dataset, self._model, batch_size, num_epochs,
                     num_data_loader_workers=num_data_loader_workers)
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers):

        train_image_transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        val_image_transform = transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            is_training=True,
            transform=train_image_transform)
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            is_training=False,
            transform=val_image_transform)

        model = SimpleBaselineNet(len(train_dataset.dictionary),
                                  len(train_dataset.answers))
        super().__init__(train_dataset, val_dataset, model, batch_size,
                         num_epochs, num_data_loader_workers)

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.SGD([{
            'params': model.ques_feat.parameters(),
            'lr': 0.8
        }, {
            'params': model.fc.parameters()
        }],
                                   lr=0.01,
                                   momentum=0.9)
Esempio n. 9
0
    def test_load_dataset(self):
        """
        This method gives you a quick way to run your dataset to make sure it loads files correctly.
        It doesn't assert a particular result from indexing the dataset; that will depend on your design.
        Feel free to fill in more asserts here, to validate your design.
        """
        # Arrange
        current_dir = os.path.dirname(__file__)
        question_file = os.path.join(current_dir, "test_questions.json")
        annotation_file = os.path.join(current_dir, "test_annotations.json")

        vqa_dataset = VqaDataset(
            question_json_file_path=question_file,
            annotation_json_file_path=annotation_file,
            image_dir=current_dir,
            image_filename_pattern="COCO_train2014_{}.jpg",
            img_features_dir="features/img_train",
            vocab_json_filename="features/vocab.json")

        # Act
        vqa_len = len(vqa_dataset)
        dataset_item = vqa_dataset[0]

        # Assert
        self.assertEqual(vqa_len, 2)
        self.assertTrue(type(dataset_item) is dict)
Esempio n. 10
0
    def test_use_dataset_loader(self):
        """
        Verify that the dataset can be successfully loaded using the DatasetLoader class.
        """
        # Arrange
        current_dir = os.path.dirname(__file__)
        question_file = os.path.join(current_dir, "test_questions.json")
        annotation_file = os.path.join(current_dir, "test_annotations.json")

        vqa_dataset = VqaDataset(
            question_json_file_path=question_file,
            annotation_json_file_path=annotation_file,
            image_dir=current_dir,
            image_filename_pattern="COCO_train2014_{}.jpg",
            ques_thres=0,
            ans_thres=0,
            seq_len=20)
        dataset_loader = DataLoader(vqa_dataset, batch_size=2)

        # Act & Assert - the test will fail if iterating through the data loader fails
        for id, data in enumerate(dataset_loader):
            # Not doing anything here. Feel free to fill this in, if you like.

            # Test the image visualizations
            img = data['images'][1, :, :, :]
            img = img.numpy().transpose((1, 2, 0))
            plt.imshow(img)
            plt.show()

            # Test BoW representations
            # print("answers ",data['answers'])
            print("questions ", data['questions'])
            print("questions size", data['questions'].shape)
            print("gt_answer ", data['gt_answer'])
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers, preprocessing):

        train_h5_path = "./features/train_feat_googlenet.h5"
        test_h5_path = "./features/test_feat_googlenet.h5"

        embedding_size = 1024

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            existing_format=None,
            prepro=preprocessing,
            prepro_path=train_h5_path)
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            existing_format=train_dataset,
            prepro=preprocessing,
            prepro_path=test_h5_path)

        model = SimpleBaselineNet(vocab_size=train_dataset.quesVecSize,
                                  embedding_size=embedding_size,
                                  ans_size=train_dataset.ansVecSize)

        super().__init__(train_dataset, val_dataset, model, batch_size,
                         num_epochs, num_data_loader_workers, preprocessing)

        self.optimizer = torch.optim.SGD(
            [{
                'params': model.embedding.parameters(),
                'lr': 0.8
            }, {
                'params': model.softmax.parameters()
            }, {
                'params': model.linear.parameters()
            }],
            lr=1e-2,
            momentum=0.9)
Esempio n. 12
0
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, train_img_feat_path, test_image_dir,
                 test_question_path, test_annotation_path, test_img_feat_path,
                 vocab_path, batch_size, num_epochs, num_data_loader_workers):

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            img_features_dir=train_img_feat_path,
            vocab_json_filename=vocab_path)
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            img_features_dir=test_img_feat_path,
            vocab_json_filename=vocab_path)

        img_feat_size = 1024  #TODO Better way to do this
        ques_embedding_lr = 0.8
        classifier_lr = 0.01

        q_vocab_size = train_dataset.q_vocab_size
        a_vocab_size = train_dataset.a_vocab_size
        model = SimpleBaselineNet(img_feat_size, q_vocab_size, a_vocab_size)

        self.optimizer = torch.optim.SGD([{
            'params': model.fc_ques.parameters(),
            'lr': ques_embedding_lr
        }, {
            'params':
            model.classifier.parameters(),
            'lr':
            classifier_lr
        }])

        self.criterion = torch.nn.CrossEntropyLoss()

        super().__init__(train_dataset, val_dataset, model, batch_size,
                         num_epochs, num_data_loader_workers)
Esempio n. 13
0
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, train_img_feat_path, test_image_dir,
                 test_question_path, test_annotation_path, test_img_feat_path,
                 vocab_path, batch_size, num_epochs, num_data_loader_workers):

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            img_features_dir=train_img_feat_path,
            vocab_json_filename=vocab_path)
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            img_features_dir=test_img_feat_path,
            vocab_json_filename=vocab_path)

        img_feat_size = 512  #TODO Better way to do this
        embedding_size = 512

        q_vocab_size = train_dataset.q_vocab_size
        a_vocab_size = train_dataset.a_vocab_size
        self._model = CoattentionNet(img_feat_size, embedding_size,
                                     q_vocab_size, a_vocab_size).cuda()

        params = self._model.parameters()

        # self.optimizer = torch.optim.RMSprop(params=params, lr=4e-4, weight_decay=1e-8, momentum=0.99)
        self.optimizer = torch.optim.Adam(params=params, lr=1e-4)

        self.criterion = torch.nn.CrossEntropyLoss().cuda()

        super().__init__(train_dataset,
                         val_dataset,
                         self._model,
                         batch_size,
                         num_epochs,
                         num_data_loader_workers=num_data_loader_workers)
Esempio n. 14
0
def save_features(args):

    transform = transforms.Compose([
        transforms.Resize((448, 448)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    train_dataset = VqaDataset(image_dir=args.train_image_dir,
                               question_json_file_path=args.train_question_path,
                               annotation_json_file_path=args.train_annotation_path,
                               image_filename_pattern="COCO_train2014_{}.jpg",
                               is_training=True,
                               transform=transform)
    val_dataset = VqaDataset(image_dir=args.test_image_dir,
                             question_json_file_path=args.test_question_path,
                             annotation_json_file_path=args.test_annotation_path,
                             image_filename_pattern="COCO_val2014_{}.jpg",
                             is_training=False,
                             transform=transform)

    train_data = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_data_loader_workers)
    val_data = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_data_loader_workers)

    model = models.resnet152(pretrained=True)
    model = nn.Sequential(*list(model.children())[:-2])

    if torch.cuda.is_available():
        model = model.cuda()

    model.eval()

    for batch_id, batch_data in enumerate(train_data):
        print('Training data {}/{}'.format(batch_id, len(train_data)))
        extract_features(model, batch_data, os.path.join(args.output_path, 'train2014'))

    for batch_id, batch_data in enumerate(val_data):
        print('Validation data {}/{}'.format(batch_id, len(train_data)))
        extract_features(model, batch_data, os.path.join(args.output_path, 'val2014'))
    def __init__(self, train_image_dir, train_question_path,
                 train_annotation_path, test_image_dir, test_question_path,
                 test_annotation_path, batch_size, num_epochs,
                 num_data_loader_workers, cache_location, lr, log_validation):

        ############ 2.3 TODO: set up transform

        transform = None

        ############

        train_dataset = VqaDataset(
            image_dir=train_image_dir,
            question_json_file_path=train_question_path,
            annotation_json_file_path=train_annotation_path,
            image_filename_pattern="COCO_train2014_{}.jpg",
            transform=transform,
            ############ 2.4 TODO: fill in the arguments
            question_word_to_id_map='change this argument',
            answer_to_id_map='change this argument',
            ############
        )
        val_dataset = VqaDataset(
            image_dir=test_image_dir,
            question_json_file_path=test_question_path,
            annotation_json_file_path=test_annotation_path,
            image_filename_pattern="COCO_val2014_{}.jpg",
            transform=transform,
            ############ 2.4 TODO: fill in the arguments
            question_word_to_id_map='change this argument',
            answer_to_id_map='change this argument',
            ############
        )

        model = SimpleBaselineNet()

        super().__init__(train_dataset, val_dataset, model, batch_size,
                         num_epochs, num_data_loader_workers)
Esempio n. 16
0
    def test_use_dataset_loader(self):
        """
        Verify that the dataset can be successfully loaded using the DatasetLoader class.
        """
        # Arrange
        current_dir = os.path.dirname(__file__)
        question_file = os.path.join(current_dir, "test_questions.json")
        annotation_file = os.path.join(current_dir, "test_annotations.json")

        vqa_dataset = VqaDataset(
            question_json_file_path=question_file,
            annotation_json_file_path=annotation_file,
            image_dir=current_dir,
            image_filename_pattern="COCO_train2014_{}.jpg")
        dataset_loader = DataLoader(vqa_dataset, batch_size=2)

        # Act & Assert - the test will fail if iterating through the data loader fails
        for id, data in enumerate(dataset_loader):
            # Not doing anything here. Feel free to fill this in, if you like.
            pass