Example #1
0
            param.data.copy_(v[offset:offset + param.numel()].view(
                param.size()).to(args.device))
            offset += param.numel()
        utils.bn_update(train_loader, model)
        print("Performance of model", i, "on the curve", end=":")
        utils.mata_eval(model, meta_valloader, meta_testloader, 'curve')

else:
    assert len(args.checkpoint) == 1
    if args.subspace == 'MNPCA_MANY':
        swag_model = SWAG(
            model_cfg.base,
            args.subspace,
            {
                'max_rank': 20,
                'cov_mat': args.sample_collect
                #'pca_rank': args.rank,
            },
            1e-6,
            *model_cfg.args,
            num_classes=num_classes,
            **model_cfg.kwargs)
        swag_model_pca = SWAGPCA(model_cfg.base,
                                 'pca', {
                                     'max_rank': 20,
                                     'pca_rank': args.rank,
                                 },
                                 1e-6,
                                 *model_cfg.args,
                                 num_classes=num_classes,
                                 **model_cfg.kwargs)
        swag_model.to(args.device)
Example #2
0
#     args.num_workers,
#     transform_train=model_cfg.transform_train,
#     transform_test=model_cfg.transform_test,
#     shuffle_train=True,
#     use_validation=not args.use_test,
#     split_classes=args.split_classes
# )

print('Preparing model')
print(*model_cfg.args)

swag_model = SWAG(model_cfg.base,
                  num_classes=num_classes,
                  subspace_type='pca',
                  subspace_kwargs={
                      'max_rank': args.max_rank,
                      'pca_rank': args.rank,
                  },
                  *model_cfg.args,
                  **model_cfg.kwargs)
swag_model.to(args.device)

print('Loading: %s' % args.checkpoint)
ckpt = torch.load(args.checkpoint)
swag_model.load_state_dict(ckpt['state_dict'], strict=False)

swag_model.set_swa()
#print("SWA:", utils.eval(loaders["test"], swag_model, criterion=losses.cross_entropy))
utils.mata_eval(swag_model, meta_valloader, meta_testloader, 'SWAG:')

mean, var, cov_factor = swag_model.get_space()
Example #3
0
        offset = 0
        for param in model.parameters():
            param.data.copy_(v[offset:offset + param.numel()].view(
                param.size()).to(args.device))
            offset += param.numel()
        utils.bn_update(train_loader, model)
        print("Performance of model", i, "on the curve", end=":")
        utils.mata_eval(model, meta_valloader, meta_testloader, 'curve')

else:
    assert len(args.checkpoint) == 1
    swag_model = SWAG(model_cfg.base,
                      subspace_type='pca',
                      subspace_kwargs={
                          'max_rank': 20,
                          'pca_rank': args.rank,
                      },
                      num_classes=num_classes,
                      args=model_cfg.args,
                      kwargs=model_cfg.kwargs)

    swag_model.to(args.device)

    print('Loading: %s' % args.checkpoint[0])
    ckpt = torch.load(args.checkpoint[0])
    swag_model.load_state_dict(ckpt['state_dict'], strict=False)

    # first take as input SWA
    swag_model.set_swa()
    utils.bn_update(train_loader, swag_model)
    # print(utils.eval(meta_testloader, swag_model, losses.cross_entropy))
Example #4
0
# )

print('Preparing model')
print(*model_cfg.args)
model = model_cfg.base(*model_cfg.args, num_classes=num_classes, **model_cfg.kwargs)
model.to(args.device)


if args.cov_mat:
    args.no_cov_mat = False
else:
    args.no_cov_mat = True
if args.swag:
    print('SWAG training')
    swag_model = SWAG(model_cfg.base,
                    args.subspace, subspace_kwargs={'max_rank': args.max_num_models}, num_classes=num_classes,
             args=model_cfg.args,  kwargs=model_cfg.kwargs)
    swag_model.to(args.device)
else:
    print('SGD training')


def schedule(epoch, args):
    steps = np.sum(epoch > np.asarray(args.lr_decay_epochs))
    t = (epoch) / (args.swag_start if args.swag else args.epochs)
    lr_ratio = args.swag_lr / args.lr_init if args.swag else 0.01
    if t <= 0.5:
        factor = 1.0
    elif t <= 0.9:
        factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4
    else:
Example #5
0
    def __init__(self,
                 base,
                 epochs,
                 criterion,
                 batch_size=50,
                 lr_init=1e-2,
                 momentum=0.9,
                 wd=1e-4,
                 swag_lr=1e-3,
                 swag_freq=1,
                 swag_start=50,
                 subspace_type='pca',
                 subspace_kwargs={'max_rank': 20},
                 use_cuda=False,
                 use_swag=False,
                 double_bias_lr=False,
                 model_variance=True,
                 num_samples=30,
                 scale=0.5,
                 const_lr=False,
                 *args,
                 **kwargs):

        self.base = base
        self.model = base(*args, **kwargs)
        num_pars = 0
        for p in self.model.parameters():
            num_pars += p.numel()
        print('number of parameters: ', num_pars)

        if use_cuda:
            self.model.cuda()

        if use_swag:
            self.swag_model = SWAG(base,
                                   subspace_type=subspace_type,
                                   subspace_kwargs=subspace_kwargs,
                                   *args,
                                   **kwargs)
            if use_cuda:
                self.swag_model.cuda()
        else:
            self.swag_model = None

        self.use_cuda = use_cuda

        if not double_bias_lr:
            pars = self.model.parameters()
        else:
            pars = []
            for name, module in self.model.named_parameters():
                if 'bias' in str(name):
                    print('Doubling lr of ', name)
                    pars.append({'params': module, 'lr': 2.0 * lr_init})
                else:
                    pars.append({'params': module, 'lr': lr_init})

        self.optimizer = torch.optim.SGD(pars,
                                         lr=lr_init,
                                         momentum=momentum,
                                         weight_decay=wd)

        self.const_lr = const_lr
        self.batch_size = batch_size

        # TODO: set up criterions better for classification
        if model_variance:
            self.criterion = criterion(noise_var=None)
        else:
            self.criterion = criterion(noise_var=1.0)

        if self.criterion.noise_var is not None:
            self.var = self.criterion.noise_var

        self.epochs = epochs

        self.lr_init = lr_init

        self.use_swag = use_swag
        self.swag_start = swag_start
        self.swag_lr = swag_lr
        self.swag_freq = swag_freq

        self.num_samples = num_samples
        self.scale = scale
Example #6
0
class RegressionRunner(RegressionModel):
    def __init__(self,
                 base,
                 epochs,
                 criterion,
                 batch_size=50,
                 lr_init=1e-2,
                 momentum=0.9,
                 wd=1e-4,
                 swag_lr=1e-3,
                 swag_freq=1,
                 swag_start=50,
                 subspace_type='pca',
                 subspace_kwargs={'max_rank': 20},
                 use_cuda=False,
                 use_swag=False,
                 double_bias_lr=False,
                 model_variance=True,
                 num_samples=30,
                 scale=0.5,
                 const_lr=False,
                 *args,
                 **kwargs):

        self.base = base
        self.model = base(*args, **kwargs)
        num_pars = 0
        for p in self.model.parameters():
            num_pars += p.numel()
        print('number of parameters: ', num_pars)

        if use_cuda:
            self.model.cuda()

        if use_swag:
            self.swag_model = SWAG(base,
                                   subspace_type=subspace_type,
                                   subspace_kwargs=subspace_kwargs,
                                   *args,
                                   **kwargs)
            if use_cuda:
                self.swag_model.cuda()
        else:
            self.swag_model = None

        self.use_cuda = use_cuda

        if not double_bias_lr:
            pars = self.model.parameters()
        else:
            pars = []
            for name, module in self.model.named_parameters():
                if 'bias' in str(name):
                    print('Doubling lr of ', name)
                    pars.append({'params': module, 'lr': 2.0 * lr_init})
                else:
                    pars.append({'params': module, 'lr': lr_init})

        self.optimizer = torch.optim.SGD(pars,
                                         lr=lr_init,
                                         momentum=momentum,
                                         weight_decay=wd)

        self.const_lr = const_lr
        self.batch_size = batch_size

        # TODO: set up criterions better for classification
        if model_variance:
            self.criterion = criterion(noise_var=None)
        else:
            self.criterion = criterion(noise_var=1.0)

        if self.criterion.noise_var is not None:
            self.var = self.criterion.noise_var

        self.epochs = epochs

        self.lr_init = lr_init

        self.use_swag = use_swag
        self.swag_start = swag_start
        self.swag_lr = swag_lr
        self.swag_freq = swag_freq

        self.num_samples = num_samples
        self.scale = scale

    def train(self,
              model,
              loader,
              optimizer,
              criterion,
              lr_init=1e-2,
              epochs=3000,
              swag_model=None,
              swag=False,
              swag_start=2000,
              swag_freq=50,
              swag_lr=1e-3,
              print_freq=100,
              use_cuda=False,
              const_lr=False):
        # copied from pavels regression notebook
        if const_lr:
            lr = lr_init

        train_res_list = []
        for epoch in range(epochs):
            if not const_lr:
                t = (epoch + 1) / swag_start if swag else (epoch + 1) / epochs
                lr_ratio = swag_lr / lr_init if swag else 0.05

                if t <= 0.5:
                    factor = 1.0
                elif t <= 0.9:
                    factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4
                else:
                    factor = lr_ratio

                lr = factor * lr_init
                adjust_learning_rate(optimizer, factor)

            train_res = utils.train_epoch(loader,
                                          model,
                                          criterion,
                                          optimizer,
                                          cuda=use_cuda,
                                          regression=True)
            train_res_list.append(train_res)
            if swag and epoch > swag_start:
                swag_model.collect_model(model)

            if (epoch % print_freq == 0 or epoch == epochs - 1):
                print('Epoch %d. LR: %g. Loss: %.4f' %
                      (epoch, lr, train_res['loss']))

        return train_res_list

    def fit(self, features, labels):
        self.features, self.labels = torch.FloatTensor(
            features), torch.FloatTensor(labels)

        # construct data loader
        # may want to turn shuffle = False for the very smallest datasets (e.g. uci small)
        self.data_loader = DataLoader(TensorDataset(self.features,
                                                    self.labels),
                                      batch_size=self.batch_size,
                                      shuffle=True)

        # now train with pre-specified options
        result = self.train(model=self.model,
                            loader=self.data_loader,
                            optimizer=self.optimizer,
                            criterion=self.criterion,
                            lr_init=self.lr_init,
                            swag_model=self.swag_model,
                            swag=self.use_swag,
                            swag_start=self.swag_start,
                            swag_freq=self.swag_freq,
                            swag_lr=self.swag_lr,
                            use_cuda=self.use_cuda,
                            epochs=self.epochs,
                            const_lr=self.const_lr)

        if self.criterion.noise_var is not None:
            # another forwards pass through network to estimate noise variance
            preds, targets = utils.predictions(model=self.model,
                                               test_loader=self.data_loader,
                                               regression=True,
                                               cuda=self.use_cuda)
            self.var = np.power(np.linalg.norm(preds - targets),
                                2.0) / targets.shape[0]
            print(self.var)

        return result

    def predict(self, features, swag_model=None):
        """
        default prediction method is to use built in Low rank Gaussian
        SWA: scale = 0.0, num_samples = 1
        """
        swag_model = swag_model if swag_model is not None else self.swag_model

        if self.use_cuda:
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        with torch.no_grad():

            if swag_model is None:
                self.model.eval()
                preds = self.model(
                    torch.FloatTensor(features).to(device)).data.cpu()

                if preds.size(1) == 1:
                    var = torch.ones_like(preds[:, 0]).unsqueeze(1) * self.var
                else:
                    var = preds[:, 1].view(-1, 1)
                    preds = preds[:, 0].view(-1, 1)

                print(var.mean())

            else:
                prediction = 0
                sq_prediction = 0
                for _ in range(self.num_samples):
                    swag_model.sample(scale=self.scale)
                    current_prediction = swag_model(
                        torch.FloatTensor(features).to(device)).data.cpu()
                    prediction += current_prediction
                    if current_prediction.size(1) == 2:
                        #convert to standard deviation
                        current_prediction[:, 1] = current_prediction[:,
                                                                      1]**0.5

                    sq_prediction += current_prediction**2.0
                # preds = bma/(self.num_samples)

                # compute mean of prediction
                # \mu^*
                preds = (prediction[:, 0] / self.num_samples).view(-1, 1)

                # 1/M \sum(\sigma^2(x) + \mu^2(x)) - \mu*^2
                var = torch.sum(sq_prediction, 1, keepdim=True
                                ) / self.num_samples - preds.pow(2.0)

                # add variance if not heteroscedastic
                if prediction.size(1) == 1:
                    var = var + self.var

            return preds.numpy(), var.numpy()
Example #7
0
print('Preparing model')
print(*model_cfg.args)
model = model_cfg.base(*model_cfg.args,
                       num_classes=num_classes,
                       **model_cfg.kwargs)
model.to(args.device)

if args.cov_mat:
    args.no_cov_mat = False
else:
    args.no_cov_mat = True
if args.swag:
    print('SWAG training')
    swag_model = SWAG(model_cfg.base,
                      subspace_type=args.subspace,
                      subspace_kwargs={'max_rank': args.max_num_models},
                      *model_cfg.args,
                      num_classes=num_classes,
                      **model_cfg.kwargs)
    swag_model.to(args.device)
else:
    print('SGD training')


def schedule(epoch):
    t = (epoch) / (args.swag_start if args.swag else args.epochs)
    lr_ratio = args.swag_lr / args.lr_init if args.swag else 0.01
    if t <= 0.5:
        factor = 1.0
    elif t <= 0.9:
        factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4
    else: