def setUp(self):
        # Configurations 3-way 3-shot with 3 query set
        model_dir = 'experiments/base_model'
        json_path = os.path.join(model_dir, 'params.json')
        assert os.path.isfile(
            json_path), "No json configuration file found at {}".format(
                json_path)
        params = utils.Params(json_path)
        params.in_channels = 3
        params.num_classes = 5
        params.dataset = 'ImageNet'
        params.cuda = True

        # Data setting
        N = 5
        self.X = torch.ones([N, params.in_channels, 84, 84])
        self.Y = torch.randint(params.num_classes, (N, ), dtype=torch.long)

        # Optim & loss setting
        if params.cuda:
            self.model = MetaLearner(params).cuda()
            self.X = self.X.cuda()
            self.Y = self.Y.cuda()
        else:
            self.model = MetaLearner(params)

        self.model.define_task_lr_params()
        model_params = list(self.model.parameters()) + list(
            self.model.task_lr.values())
        self.optim = torch.optim.SGD(model_params, lr=1e-3)
        self.loss_fn = nn.NLLLoss()
Example #2
0
    def test_train_maml_not_working_1(self):
        start = datetime.now()
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)

        meta_optim = torch.optim.SGD(self.model.parameters(), lr=0.1)
        meta_optim.zero_grad()
        loss.backward(create_graph=True)
        print((datetime.now() - start).total_seconds())
        adapted_state_dict = self.model.cloned_state_dict()
        for key, val in self.model.named_parameters():
            adapted_state_dict[key] = val - 1e-2 * val.grad

        # define another meta-learner with adpated_state_dict
        # NOTE this approach is not working!!
        task_learner = MetaLearner(self.params)
        task_learner.load_state_dict(adapted_state_dict)

        # compute loss with task_learner and optim by original params
        start = datetime.now()
        Y_hat_task = task_learner(self.X)
        loss_task = self.loss_fn(Y_hat_task, self.Y)
        meta_optim.zero_grad()
        for key, val in self.model.named_parameters():
            print(key)
            print(val.grad)
        loss_task.backward()
        for key, val in self.model.named_parameters():
            print(key)
            print(val.grad)
        print((datetime.now() - start).total_seconds())
class test_meta_learner(unittest.TestCase):
    def setUp(self):
        # Configurations 3-way 3-shot with 3 query set
        model_dir = 'experiments/base_model'
        json_path = os.path.join(model_dir, 'params.json')
        assert os.path.isfile(
            json_path), "No json configuration file found at {}".format(
                json_path)
        params = utils.Params(json_path)
        params.in_channels = 3
        params.num_classes = 5
        params.dataset = 'ImageNet'
        params.cuda = True

        # Data setting
        N = 5
        self.X = torch.ones([N, params.in_channels, 84, 84])
        self.Y = torch.randint(params.num_classes, (N, ), dtype=torch.long)

        # Optim & loss setting
        if params.cuda:
            self.model = MetaLearner(params).cuda()
            self.X = self.X.cuda()
            self.Y = self.Y.cuda()
        else:
            self.model = MetaLearner(params)

        self.model.define_task_lr_params()
        model_params = list(self.model.parameters()) + list(
            self.model.task_lr.values())
        self.optim = torch.optim.SGD(model_params, lr=1e-3)
        self.loss_fn = nn.NLLLoss()

    def test_params(self):
        for key, val in self.model.state_dict().items():
            print(key)
        for key, val in self.model.task_lr.items():
            print(key, val.requires_grad)

    def test_grad_check(self):
        # Update the model once with data
        stored_params = {
            key: val.clone()
            for key, val in self.model.named_parameters()
        }
        task_lr_params = {
            key: val.clone()
            for key, val in self.model.task_lr.items()
        }
        stored_params.update(task_lr_params)

        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)
        self.optim.zero_grad()
        loss.backward()
        self.optim.step()

        # Test grad check
        for key, val in self.model.named_parameters():
            self.assertTrue((val != stored_params[key]).any())
Example #4
0
    def setUp(self):
        # Configurations 3-way 3-shot with 3 query set
        model_dir = 'experiments/base_model'
        json_path = os.path.join(model_dir, 'params.json')
        assert os.path.isfile(
            json_path), "No json configuration file found at {}".format(
                json_path)
        self.params = utils.Params(json_path)

        self.params.in_channels = 3
        self.params.num_classes = 5
        self.params.dataset = 'ImageNet'
        self.model = MetaLearner(self.params)

        # Data setting
        N = 5
        self.X = torch.ones([N, self.params.in_channels, 84, 84])
        self.Y = torch.randint(self.params.num_classes, (N, ),
                               dtype=torch.long)

        # Optim & loss setting
        self.loss_fn = nn.NLLLoss()
Example #5
0
class test_grad_update(unittest.TestCase):
    def setUp(self):
        # Configurations 3-way 3-shot with 3 query set
        model_dir = 'experiments/base_model'
        json_path = os.path.join(model_dir, 'params.json')
        assert os.path.isfile(
            json_path), "No json configuration file found at {}".format(
                json_path)
        self.params = utils.Params(json_path)

        self.params.in_channels = 3
        self.params.num_classes = 5
        self.params.dataset = 'ImageNet'
        self.model = MetaLearner(self.params)

        # Data setting
        N = 5
        self.X = torch.ones([N, self.params.in_channels, 84, 84])
        self.Y = torch.randint(self.params.num_classes, (N, ),
                               dtype=torch.long)

        # Optim & loss setting
        self.loss_fn = nn.NLLLoss()

    @unittest.skip("too complicated test; may not be a correct approach")
    def test_inner_and_meta_update(self):
        # Store current parameters
        self.model.store_cur_params()

        # Update the model once with data
        meta_optim = torch.optim.SGD(self.model.stored_params, lr=0.1)
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)
        meta_optim.zero_grad()

        # grads are in the order of model.parameters()
        grads = torch.autograd.grad(loss,
                                    self.model.parameters(),
                                    create_graph=True)

        # performs updates using calculated gradients
        # we manually compute adpated parameters since optimizer.step() operates in-place
        adapted_params = {
            key: val
            for key, val in self.model.stored_params.items()
        }
        for (key, val), grad in zip(self.model.named_parameters(), grads):
            adapted_params[key] = self.model.stored_params[key] - 1e-2 * grad

        # Check parameter not changed
        self.model.check_params_not_changed()

        # Confirm that adapted_params are different from current params
        for key, val in self.model.named_parameters():
            self.assertTrue((val != adapted_params[key]).any())

        #################################
        ### META-UPDATE
        #################################
        # # clone a model (for debugging)
        # model_init = copy.deepcopy(self.model)

        # # load adapted_params to model
        # self.model.adapt_params(adapted_params)
        # a = copy.deepcopy(self.model.state_dict()['meta_learner.fc.weight'])

        # # clone a model (for debugging)
        # model_adap = copy.deepcopy(self.model)

        # # check current loaded params differ from the original params
        # for key, val in self.model.named_parameters():
        #     self.assertTrue((val != self.model.stored_params[key]).any())

        # # compute loss using adapted_params
        # start = datetime.now()
        # Y_hat = self.model(self.X)
        # loss = self.loss_fn(Y_hat, self.Y)
        # interval = (datetime.now() - start).total_seconds()

        # # load again original params
        # self.model.init_params()
        # b = copy.deepcopy(self.model.state_dict()['meta_learner.fc.weight'])

        # # check current loaded params differ from the adapted_params
        # for key, val in self.model.named_parameters():
        #     self.assertTrue((val != adapted_params[key]).any())

        # # update original params using the loss computed
        # self.model.store_cur_params()

        # start = datetime.now()
        # meta_optim = torch.optim.SGD(self.model.parameters(), lr=1e-3)
        # meta_optim.zero_grad()
        # loss.backward()
        # meta_optim.step()
        # print('meta {}'.format(interval +
        #                        (datetime.now() - start).total_seconds()))

        # # check original params updated
        # c = copy.deepcopy(self.model.state_dict()['meta_learner.fc.weight'])
        # print(a)
        # print(b)
        # print(c)
        # for key, val in self.model.named_parameters():
        #     # print(key)
        #     # print(val)
        #     # print(self.model.stored_params[key])
        #     self.assertTrue((val != self.model.stored_params[key]).any())

        # # TODO does this procedure really compute hessian??
        # # update model_init
        # start = datetime.now()
        # Y_hat_init = model_init(self.X)
        # loss_init = self.loss_fn(Y_hat_init, self.Y)
        # init_optim = torch.optim.SGD(model_init.parameters(), lr=1e-3)
        # init_optim.zero_grad()
        # loss_init.backward()
        # init_optim.step()
        # print('init {}'.format((datetime.now() - start).total_seconds()))
        # for key, val in self.model.named_parameters():
        #     self.assertTrue((val != model_init.state_dict()[key]).any())

        # # update model_adap
        # start = datetime.now()
        # Y_hat_adap = model_adap(self.X)
        # loss_adap = self.loss_fn(Y_hat_adap, self.Y)
        # adap_optim = torch.optim.SGD(model_adap.parameters(), lr=1e-3)
        # adap_optim.zero_grad()
        # loss_adap.backward()
        # adap_optim.step()
        # print('adap {}'.format((datetime.now() - start).total_seconds()))
        # for key, val in self.model.named_parameters():
        #     self.assertTrue((val != model_adap.state_dict()[key]).any())

    @unittest.skip("loading state_dict might break computational graph")
    def test_train_maml_not_working_1(self):
        start = datetime.now()
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)

        meta_optim = torch.optim.SGD(self.model.parameters(), lr=0.1)
        meta_optim.zero_grad()
        loss.backward(create_graph=True)
        print((datetime.now() - start).total_seconds())
        adapted_state_dict = self.model.cloned_state_dict()
        for key, val in self.model.named_parameters():
            adapted_state_dict[key] = val - 1e-2 * val.grad

        # define another meta-learner with adpated_state_dict
        # NOTE this approach is not working!!
        task_learner = MetaLearner(self.params)
        task_learner.load_state_dict(adapted_state_dict)

        # compute loss with task_learner and optim by original params
        start = datetime.now()
        Y_hat_task = task_learner(self.X)
        loss_task = self.loss_fn(Y_hat_task, self.Y)
        meta_optim.zero_grad()
        for key, val in self.model.named_parameters():
            print(key)
            print(val.grad)
        loss_task.backward()
        for key, val in self.model.named_parameters():
            print(key)
            print(val.grad)
        print((datetime.now() - start).total_seconds())

    def test_train_maml(self):
        """
        This might be a correct approach.
        """
        start = datetime.now()
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)

        meta_optim = torch.optim.SGD(self.model.parameters(), lr=0.1)
        meta_optim.zero_grad()
        loss.backward(create_graph=True)
        print('\n 1st gradient computation takes {}'.format(
            (datetime.now() - start).total_seconds()))
        adapted_state_dict = self.model.cloned_state_dict()
        for key, val in self.model.named_parameters():
            adapted_state_dict[key] = val - 1e-2 * val.grad

        # compute loss using adapted params and optim by original params
        start = datetime.now()
        Y_hat_task = self.model(self.X, adapted_state_dict)
        loss_task = self.loss_fn(Y_hat_task, self.Y)
        meta_optim.zero_grad()
        loss_task.backward()
        # for key, val in self.model.named_parameters():
        #     print(key)
        #     print(val.grad)
        print('2nd gradient computation takes {}'.format(
            (datetime.now() - start).total_seconds()))

    @unittest.skip("okay get a concept")
    def test_simple_maml_case(self):
        """
        What we found:
            If create_graph=True, w.grad.requires_grad is True.
            If create_graph=False, w.grad.requires_grad is False.
        """
        print('\n')
        x = torch.tensor(1.)
        y = torch.tensor(1.)
        w = torch.tensor(2., requires_grad=True)

        # If change w
        w_c = torch.tensor(2., requires_grad=True)

        loss = (y - w * x)**2
        optim = torch.optim.SGD([w], lr=0.1)
        optim.zero_grad()
        print('[1st] w-before {}'.format(w.grad))  # 0
        loss.backward(create_graph=True)
        print('[1st] w-after {}'.format(w.grad))  # 2
        print(w.grad.requires_grad)

        w_ = w - 0.1 * w.grad
        w_c_ = w_c - 0.1 * w.grad

        loss_ = (y - w_ * x)**2
        optim.zero_grad()
        print('[2nd] w-before {}'.format(w.grad))  # 0
        loss_.backward(retain_graph=True)
        print('[2nd] w-after {}'.format(w.grad))  # 1.28

        loss_c = (y - w_c_ * x)**2
        optim.zero_grad()
        print('[2nd] w-before {}'.format(w_c.grad))  # 0
        loss_c.backward()
        print('[2nd] w-after {}'.format(w_c.grad))  # 1.6
    utils.set_logger(os.path.join(args.model_dir, 'eval.log'))

    # Use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    torch.manual_seed(SEED)
    if params.cuda: torch.cuda.manual_seed(SEED)

    params.in_channels = 3
    meta_train_classes, meta_test_classes = split_emotions(args.data_dir, SEED)
    task_type = SER

    if params.cuda:
        model = MetaLearner(params).cuda()
    else:
        model = MetaLearner(params)
    meta_optimizer = torch.optim.Adam(model.parameters(), lr=meta_lr)

    # fetch loss function and metrics
    loss_fn = nn.NLLLoss()
    model_metrics = metrics

    restore_path = os.path.join(args.model_dir, args.restore_file + '.pth.tar')

    logging.info("Eval metrics for dataset {}".format(','.join(datasets_test)))

    test_logs = []

    logging.info("Restoring parameters from {}".format(restore_path))
Example #7
0
        params.in_channels = 1
        meta_train_classes, meta_test_classes = split_omniglot_characters(
            args.data_dir, SEED)
        task_type = OmniglotTask
    elif ('miniImageNet' in args.data_dir or 'tieredImageNet'
          in args.data_dir) and params.dataset == 'ImageNet':
        params.in_channels = 3
        meta_train_classes, meta_test_classes = load_imagenet_images(
            args.data_dir)
        task_type = ImageNetTask
    else:
        raise ValueError("I don't know your dataset")

    # Define the model and optimizer
    if params.cuda:
        model = MetaLearner(params).cuda()
    else:
        model = MetaLearner(params)
    # NOTE we need to define task_lr after defining model
    model.define_task_lr_params()
    model_params = list(model.parameters()) + list(model.task_lr.values())
    meta_optimizer = torch.optim.Adam(model_params, lr=meta_lr)

    # fetch loss function and metrics
    loss_fn = nn.NLLLoss()
    model_metrics = metrics

    # Train the model
    logging.info("Starting training for {} episode(s)".format(num_episodes))
    train_and_evaluate(model, meta_train_classes, meta_test_classes, task_type,
                       meta_optimizer, loss_fn, model_metrics, params,
Example #8
0
         meta_test_classes) = split_omniglot_characters(
             args.data_dir, params.SEED)
        task_type = OmniglotTask
    elif ('miniImageNet' in args.data_dir or
          'tieredImageNet' in args.data_dir) and params.dataset == 'ImageNet':
        params.in_channels = 3
        params.in_features_fc = 4
        (meta_train_classes, meta_val_classes,
         meta_test_classes) = load_imagenet_images(args.data_dir)
        task_type = ImageNetTask
    else:
        raise ValueError("I don't know your dataset")

    # Define the model and optimizer
    if params.cuda:
        model = MetaLearner(params).cuda()
    else:
        model = MetaLearner(params)
    # NOTE we need to define task_lr after defining model
    model.define_task_lr_params()

    # fetch loss function and metrics
    loss_fn = nn.NLLLoss()
    model_metrics = metrics

    # Reload weights from the saved file
    utils.load_checkpoint(
        os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model)

    # Evaluate
    test_metrics = evaluate(model, loss_fn, meta_test_classes, task_type,
Example #9
0
class test_meta_learner(unittest.TestCase):
    def setUp(self):
        # Configurations 3-way 3-shot with 3 query set
        model_dir = 'experiments/base_model'
        json_path = os.path.join(model_dir, 'params.json')
        assert os.path.isfile(
            json_path), "No json configuration file found at {}".format(
                json_path)
        params = utils.Params(json_path)

        params.in_channels = 3
        params.num_classes = 5
        params.dataset = 'ImageNet'
        self.model = MetaLearner(params)

        # Data setting
        N = 5
        self.X = torch.ones([N, params.in_channels, 84, 84])
        self.Y = torch.randint(params.num_classes, (N, ), dtype=torch.long)

        # Optim & loss setting
        self.optim = torch.optim.SGD(self.model.parameters(), lr=1e-3)
        self.loss_fn = nn.NLLLoss()

    def test_store_cur_params(self):
        # Store current parameters
        self.model.store_cur_params()

        # Update the model once with data
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)
        self.optim.zero_grad()
        loss.backward()
        self.optim.step()

        # Test stored_params deep copied
        for key, val in self.model.state_dict().items():
            self.assertTrue((val != self.model.stored_params[key]).any())

    # @unittest.skip("Adaptation process does not work..")
    def test_adapt_and_init_params(self):
        # Store current parameters
        self.model.store_cur_params()

        # Update the model once with data
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)
        self.optim.zero_grad()
        # grads are in the order of model.parameters()
        grads = torch.autograd.grad(
            loss, self.model.parameters(), create_graph=True)
        # performs updates using calculated gradients
        # we manually compute adpated parameters since optimizer.step() operates in-place
        adapted_params = {
            key: val.clone()
            for key, val in self.model.state_dict().items()
        }
        for (key, val), grad in zip(self.model.named_parameters(), grads):
            adapted_params[key] = self.model.stored_params[key] - 1e-2 * grad

        # Check parameter not changed
        # self.model.check_params_not_changed()

        # Confirm that adapted_params are different from current params
        for key, val in self.model.named_parameters():
            self.assertTrue((val != adapted_params[key]).any())

        # Adapt adapted_params to the model
        # And confirm that adapted_params are the same to current params
        self.model.adapt_params(adapted_params)
        for key, val in adapted_params.items():
            self.assertTrue((val == self.model.state_dict()[key]).all())

        # Compute loss with adapted parameters
        # And optimize w.r.t. meta-parameters
        Y_hat = self.model(self.X)
        loss = self.loss_fn(Y_hat, self.Y)
        # Return to meta-parameters
        before_optim = copy.deepcopy(self.model.state_dict())
        self.model.init_params()
        meta_optim = torch.optim.SGD(self.model.parameters(), lr=1e-3)
        # self.optim.zero_grad()
        meta_optim.zero_grad()
        loss.backward()
        # self.optim.step()
        meta_optim.step()

        # Check meta-parameters updated
        for key, val in self.model.named_parameters():
            self.assertTrue((val != before_optim[key]).any())

        # Check adapted-parameters still same

    @unittest.skip("For debugging purpose")
    def test_parameter_name(self):
        print(self.model.state_dict().keys())
        print(len(self.model.state_dict().keys()))
        print(self.model.meta_learner.state_dict().keys())
        print(len(self.model.meta_learner.state_dict().keys()))

        self.model.meta_learner.state_dict()['fc.bias'][0] = 0
        print(self.model.state_dict()['meta_learner.fc.bias'])
        print(self.model.meta_learner.state_dict()['fc.bias'])