Exemple #1
0
    def __init__(self,
                 data,
                 linear=BayesianRegressionDense,
                 out_features=10,
                 **kwargs):
        """
        Neural linear module. Implements a deep feature extractor with an (approximate) Bayesian layer on top.
        :param data: (ActiveLearningDataset) Dataset.
        :param linear: (nn.Module) Defines the type of layer to implement approx. Bayes computation.
        :param out_features: (int) Dimensionality of model targets.
        :param kwargs: (dict) Additional parameters.
        """
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Linear(data.X.shape[1], out_features),
            nn.BatchNorm1d(out_features), nn.ReLU(),
            nn.Linear(out_features, out_features),
            nn.BatchNorm1d(out_features), nn.ReLU())

        self.linear = linear([out_features, 1], **kwargs)
        self.normalize = data.normalize

        if self.normalize:
            self.output_mean = utils.to_gpu(torch.FloatTensor([data.y_mean]))
            self.output_std = utils.to_gpu(torch.FloatTensor([data.y_std]))

        dataloader = DataLoader(Dataset(data, 'train'),
                                batch_size=len(data.index['train']),
                                shuffle=False)
        for (x_train, y_train) in dataloader:
            self.x_train, self.y_train = utils.to_gpu(x_train, y_train)
Exemple #2
0
    def get_projections(self, data, J, projection='two'):
        """
        Get projections for ACS approximate procedure
        :param data: (Object) Data object to get projections for
        :param J: (int) Number of projections to use
        :param projection: (str) Type of projection to use (currently only 'two' supported)
        :return: (torch.tensor) Projections
        """
        projections = []
        with torch.no_grad():
            theta_mean, theta_cov = self.linear._compute_posterior(
                self.encode(self.x_train), self.y_train)
            jitter = utils.to_gpu(torch.eye(len(theta_cov)) * 1e-4)
            try:
                theta_samples = MVN(theta_mean.flatten(),
                                    theta_cov + jitter).sample(torch.Size([J]))
            except:
                import pdb
                pdb.set_trace()

            dataloader = DataLoader(Dataset(data, 'unlabeled'),
                                    batch_size=len(data.index['unlabeled']),
                                    shuffle=False)
            for (x, _) in dataloader:
                x = utils.to_gpu(x)
                if projection == 'two':
                    for theta_sample in theta_samples:
                        projections.append(
                            self._compute_expected_ll(x, theta_sample))
                else:
                    raise NotImplementedError

        return utils.to_gpu(torch.sqrt(1 / torch.FloatTensor(
            [J]))) * torch.cat(projections, dim=1), torch.zeros(len(x))
Exemple #3
0
 def __init__(self, shape, a0=1., b0=1., **kwargs):
     """
     Implements Bayesian linear regression layer with a hyper-prior on the weight variances.
     :param shape: (int) Number of input features for the regression.
     :param a0: (float) Hyper-prior alpha_0 for IG distribution on weight variances
     :param b0: (float) Hyper-prior beta_0 for IG distribution on weight variances
     """
     super().__init__(shape, **kwargs)
     self.a0 = utils.to_gpu(torch.FloatTensor([a0]))
     self.b0 = utils.to_gpu(torch.FloatTensor([b0]))
     self.y_var = self.b0 / self.a0
Exemple #4
0
    def get_projections(self,
                        data,
                        J,
                        projection='two',
                        gamma=0,
                        transform=None,
                        **kwargs):
        """
        Get projections for ACS approximate procedure
        :param data: (Object) Data object to get projections for
        :param J: (int) Number of projections to use
        :param projection: (str) Type of projection to use (currently only 'two' supported)
        :return: (torch.tensor) Projections
        """
        ent = lambda py: torch.distributions.Categorical(probs=py).entropy()
        projections = []
        feat_x = []
        with torch.no_grad():
            mean, cov = self.linear._compute_posterior()
            jitter = utils.to_gpu(torch.eye(len(cov)) * 1e-6)
            theta_samples = MVN(mean,
                                cov + jitter).sample(torch.Size([J])).view(
                                    J, -1, self.linear.out_features)
            dataloader = DataLoader(Dataset(data,
                                            'unlabeled',
                                            transform=transform),
                                    batch_size=256,
                                    shuffle=False)

            for (x, _) in dataloader:
                x = utils.to_gpu(x)
                feat_x.append(self.encode(x))

            feat_x = torch.cat(feat_x)
            py = self._compute_predictive_posterior(self.linear(
                feat_x, num_samples=100),
                                                    logits=False)
            ent_x = ent(py)
            if projection == 'two':
                for theta_sample in theta_samples:
                    projections.append(
                        self._compute_expected_ll(feat_x, theta_sample, py) +
                        gamma * ent_x[:, None])
            else:
                raise NotImplementedError

        return utils.to_gpu(torch.sqrt(
            1 / torch.FloatTensor([J]))) * torch.cat(projections, dim=1), ent_x
Exemple #5
0
    def build(self, M=1, **kwargs):
        """
        Constructs a batch of points to sample from the unlabeled set.
        :param M: (int) Batch size.
        :param kwargs: (dict) Additional parameters.
        :return: (list of ints) Selected data point indices.
        """
        self._init_build(M, **kwargs)
        w = utils.to_gpu(torch.zeros([len(self.ELn), 1]))
        norm = lambda weights: (self.EL -
                                (self.ELn.t() @ weights).squeeze()).norm()
        for m in range(M):
            w = self._step(m, w)

        # print(w[w.nonzero()[:, 0]].cpu().numpy())
        print('|| L-L(w)  ||: {:.4f}'.format(norm(w)))
        print('|| L-L(w1) ||: {:.4f}'.format(norm((w > 0).float())))
        print('Avg pred entropy (pool): {:.4f}'.format(
            self.entropy.mean().item()))
        print('Avg pred entropy (batch): {:.4f}'.format(
            self.entropy[w.flatten() > 0].mean().item()))
        try:
            logdet = torch.slogdet(
                self.model.linear._compute_posterior()[1])[1].item()
            print('logdet weight cov: {:.4f}'.format(logdet))
        except TypeError:
            pass

        return w.nonzero()[:, 0].cpu().numpy()
Exemple #6
0
 def __init__(self, shape, sn2=1., s=1.):
     """
     Implements Bayesian linear regression with a dense linear layer.
     :param shape: (int) Number of input features for the regression.
     :param sn2: (float) Noise variable for linear regression.
     :param s: (float) Parameter for diagonal prior on the weights of the layer.
     """
     super().__init__()
     self.in_features, self.out_features = shape
     self.y_var = sn2
     self.w_cov_prior = s * utils.to_gpu(torch.eye(self.in_features))
Exemple #7
0
 def _compute_expected_ll(self, x, theta, py):
     """
     Compute expected log-likelihood for data
     :param x: (torch.tensor) Inputs to compute likelihood for
     :param theta: (torch.tensor) Theta parameter to use in likelihood computations
     :return: (torch.tensor) Expected log-likelihood of inputs
     """
     logits = x @ theta
     ys = torch.ones_like(logits).type(torch.LongTensor) * torch.arange(
         self.linear.out_features)[None, :]
     ys = utils.to_gpu(ys).t()
     loglik = torch.stack([-self.cross_entropy(logits, y) for y in ys]).t()
     return torch.sum(py * loglik, dim=-1, keepdim=True)
Exemple #8
0
    def _compute_predictive_posterior(self, y_pred, logits=True):
        """
        Return posterior predictive evaluated at x
        :param x: (torch.tensor) Inputs
        :return: (torch.tensor) Probit regression posterior predictive
        """
        log_pred_samples = y_pred
        L = utils.to_gpu(torch.FloatTensor([log_pred_samples.shape[0]]))
        preds = torch.logsumexp(log_pred_samples, dim=0) - torch.log(L)
        if not logits:
            preds = torch.softmax(preds, dim=-1)

        return preds
Exemple #9
0
    def get_predictions(self, x, data):
        """
        Make predictions for data
        :param x: (torch.tensor) Observations to make predictions for
        :param data: (Object) Data to use for making predictions
        :return: (np.array) Predictive distributions
        """
        self.eval()
        dataloader = DataLoader(Dataset(data, 'prediction', x_star=x),
                                batch_size=len(x),
                                shuffle=False)
        for (x, _) in dataloader:
            x = utils.to_gpu(x)
            y_pred = self.forward(x)
            pred_mean, pred_var = y_pred
            if self.normalize:
                pred_mean, pred_var = self.get_unnormalized(
                    pred_mean), self.output_std**2 * pred_var

        return pred_mean.detach().cpu().numpy(), pred_var.detach().cpu().numpy(
        )
Exemple #10
0
    def _evaluate(self, data, batch_size, data_type='test', transform=None):
        """
        Evaluate model with data
        :param data: (Object) Data to use for evaluation
        :param batch_size: (int) Batch-size for evaluation procedure (memory issues)
        :param data_type: (str) Data split to use for evaluation
        :param transform: (torchvision.transform) Tranform procedure applied to data during training / validation
        :return: (np.arrays) Performance metrics for model
        """
        assert data_type in ['val', 'test']
        losses, performances = [], []

        if data_type == 'val' and len(data.index['val']) == 0:
            return losses, performances

        gt.pause()
        with torch.no_grad():
            dataloader = DataLoader(dataset=Dataset(data,
                                                    data_type,
                                                    transform=transform),
                                    batch_size=batch_size,
                                    shuffle=True,
                                    drop_last=True,
                                    num_workers=4)
            for (x, y) in dataloader:
                x, y = utils.to_gpu(x, y.type(torch.LongTensor).squeeze())
                y_pred_samples = self.forward(x, num_samples=100)
                y_pred = self._compute_predictive_posterior(y_pred_samples)[
                    None, :, :]
                loss = self._compute_log_likelihood(
                    y, y_pred)  # use predictive at test time
                avg_loss = loss / len(x)
                performance = self._evaluate_performance(y, y_pred_samples)
                losses.append(avg_loss.cpu().item())
                performances.append(performance.cpu().item())

        gt.resume()
        return losses, performances
Exemple #11
0
    def _evaluate(self, data, batch_size, data_type='test', **kwargs):
        """
        Evaluate model with data
        :param data: (Object) Data to use for evaluation
        :param batch_size: (int) Batch-size for evaluation procedure (memory issues)
        :param data_type: (str) Data split to use for evaluation
        :param kwargs: (dict) Optional additional arguments for evaluation
        :return: (np.arrays) Performance metrics for model
        """

        assert data_type in ['val', 'test']
        losses, performances = [], []

        if data_type == 'val' and len(data.index['val']) == 0:
            return losses, performances

        gt.pause()
        self.eval()
        with torch.no_grad():
            dataloader = DataLoader(Dataset(data,
                                            data_type,
                                            transform=kwargs.get(
                                                'transform', None)),
                                    batch_size=batch_size,
                                    shuffle=True)
            for (x, y) in dataloader:
                x, y = utils.to_gpu(x, y)
                y_pred = self.forward(x)
                pred_mean, pred_variance = y_pred
                loss = torch.sum(
                    -utils.gaussian_log_density(y, pred_mean, pred_variance))
                avg_loss = loss / len(x)
                performance = self._evaluate_performance(y, y_pred)
                losses.append(avg_loss.cpu().item())
                performances.append(performance.cpu().item())

        gt.resume()
        return losses, performances
Exemple #12
0
    def optimize(self,
                 data,
                 num_epochs=1000,
                 batch_size=64,
                 initial_lr=1e-2,
                 freq_summary=100,
                 weight_decay=1e-1,
                 weight_decay_theta=None,
                 train_transform=None,
                 val_transform=None,
                 **kwargs):
        """
        Internal functionality to train model
        :param data: (Object) Training data
        :param num_epochs: (int) Number of epochs to train for
        :param batch_size: (int) Batch-size for training
        :param initial_lr: (float) Initial learning rate
        :param weight_decay: (float) Weight-decay parameter for deterministic weights
        :param weight_decay_theta: (float) Weight-decay parameter for non-deterministic weights
        :param train_transform: (torchvision.transform) Transform procedure for training data
        :param val_transform: (torchvision.transform) Transform procedure for validation data
        :param kwargs: (dict) Optional additional arguments for optimization
        :return: None
        """
        weight_decay_theta = weight_decay if weight_decay_theta is None else weight_decay_theta
        weights = [
            v for k, v in self.named_parameters()
            if (not k.startswith('linear')) and k.endswith('weight')
        ]
        weights_theta = [
            v for k, v in self.named_parameters()
            if k.startswith('linear') and k.endswith('weight')
        ]
        other = [
            v for k, v in self.named_parameters() if not k.endswith('weight')
        ]
        optimizer = torch.optim.Adam([
            {
                'params': weights,
                'weight_decay': weight_decay
            },
            {
                'params': weights_theta,
                'weight_decay': weight_decay_theta
            },
            {
                'params': other
            },
        ],
                                     lr=initial_lr)

        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                               num_epochs,
                                                               eta_min=1e-5)

        dataloader = DataLoader(dataset=Dataset(data,
                                                'train',
                                                transform=train_transform),
                                batch_size=batch_size,
                                shuffle=True,
                                drop_last=True,
                                num_workers=4)
        for epoch in range(num_epochs):
            scheduler.step()
            losses, kls, performances = [], [], []
            for (x, y) in dataloader:
                optimizer.zero_grad()
                x, y = utils.to_gpu(x, y.type(torch.LongTensor).squeeze())
                y_pred = self.forward(x)
                step_loss, kl = self._compute_loss(
                    y, y_pred,
                    len(x) / len(data.index['train']))
                step_loss.backward()
                optimizer.step()

                performance = self._evaluate_performance(y, y_pred)
                losses.append(step_loss.cpu().item())
                kls.append(kl.cpu().item())
                performances.append(performance.cpu().item())

            if epoch % freq_summary == 0 or epoch == num_epochs - 1:
                val_bsz = 1024
                val_losses, val_performances = self._evaluate(
                    data, val_bsz, 'val', transform=val_transform, **kwargs)
                print(
                    '#{} loss: {:.4f} (val: {:.4f}), kl: {:.4f}, {}: {:.4f} (val: {:.4f})'
                    .format(epoch, np.mean(losses), np.mean(val_losses),
                            np.mean(kls), self.metric, np.mean(performances),
                            np.mean(val_performances)))
Exemple #13
0
    def optimize(self,
                 data,
                 num_epochs=1000,
                 batch_size=64,
                 initial_lr=1e-2,
                 weight_decay=1e-1,
                 **kwargs):
        """
        Internal functionality to train model
        :param data: (Object) Training data
        :param num_epochs: (int) Number of epochs to train for
        :param batch_size: (int) Batch-size for training
        :param initial_lr: (float) Initial learning rate
        :param weight_decay: (float) Weight-decay parameter for deterministic weights
        :param kwargs: (dict) Optional additional arguments for optimization
        :return: None
        """
        weights = [
            v for k, v in self.named_parameters() if k.endswith('weight')
        ]
        other = [v for k, v in self.named_parameters() if k.endswith('bias')]
        optimizer = torch.optim.Adam([
            {
                'params': weights,
                'weight_decay': weight_decay
            },
            {
                'params': other
            },
        ],
                                     lr=initial_lr)

        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                               num_epochs,
                                                               eta_min=1e-5)
        dataloader = DataLoader(dataset=Dataset(data,
                                                'train',
                                                transform=kwargs.get(
                                                    'transform', None)),
                                batch_size=batch_size,
                                shuffle=True,
                                drop_last=True)
        for epoch in range(num_epochs):
            scheduler.step()
            losses, performances = [], []
            self.train()
            for (x, y) in dataloader:
                optimizer.zero_grad()
                x, y = utils.to_gpu(x, y)
                y_pred = self.forward(x)
                step_loss = -self._compute_log_likelihood(y, y_pred)
                step_loss.backward()
                optimizer.step()

                performance = self._evaluate_performance(y, y_pred)
                losses.append(step_loss.cpu().item())
                performances.append(performance.cpu().item())

            if epoch % 100 == 0 or epoch == num_epochs - 1:
                print('#{} loss: {:.4f}, rmse: {:.4f}'.format(
                    epoch, np.mean(losses), np.mean(performances)))
Exemple #14
0
    utils.set_gpu_mode(True)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    num_test_points = 10000
    if args.dataset == 'fashion_mnist':
        from acs.al_data_set import mnist_train_transform as train_transform, mnist_test_transform as test_transform
    else:
        from acs.al_data_set import torchvision_train_transform as train_transform, torchvision_test_transform as test_transform
        if args.dataset == 'svhn':
            num_test_points = 26032

    model = resnet18(pretrained=args.pretrained_model,
                     pretrained_model_file=args.model_file,
                     resnet_size=84)
    model = utils.to_gpu(model)
    dataset = utils.get_torchvision_dataset(name=args.dataset,
                                            data_dir=args.data_dir,
                                            model=model,
                                            encode=False,
                                            seed=args.seed,
                                            n_split=(-1, 10000,
                                                     num_test_points))
    init_num_labeled = len(
        dataset[1]
        ['train']) if args.coreset == 'Best' else args.init_num_labeled
    data = ALD(dataset, init_num_labeled=init_num_labeled, normalize=False)

    dir_string = 'acq_{}_cs_{}_batch_{}_labeled_{}_budget_{}_seed_{}'.format(
        args.acq.lower(), args.coreset.lower(), args.batch_size,
        args.init_num_labeled, args.budget, args.seed)
    gt.start()
    while len(data.index['train']) < args.init_num_labeled + args.budget:
        print('{}: Number of samples {}/{}'.format(
            args.seed,
            len(data.index['train']) - args.init_num_labeled, args.budget))

        optim_params = {
            'num_epochs': args.training_epochs,
            'batch_size': get_batch_size(args.dataset, data),
            'weight_decay': args.weight_decay,
            'initial_lr': args.initial_lr
        }
        nl = NeuralLinearTB(data, out_features=out_features, **kwargs)
        # nl = NeuralLinear(data, out_features=out_features, **kwargs)
        nl = utils.to_gpu(nl)
        nl.optimize(data, **optim_params)
        gt.stamp('model_training', unique=False)
        num_samples = len(data.index['train']) - args.init_num_labeled
        test_nll, test_performance = nl.test(data)

        dataloader = DataLoader(Dataset(data, 'prediction', x_star=data.X),
                                batch_size=len(data.X),
                                shuffle=False)
        batch_size = min(
            args.batch_size,
            args.init_num_labeled + args.budget - len(data.index['train']))
        cs_kwargs['a_tilde'] = nl.linear.a_tilde.cpu().item()
        cs_kwargs['b_tilde'] = nl.linear.b_tilde.cpu().item()
        cs_kwargs['nu'] = nl.linear.nu.cpu().item()
        for (x, _) in dataloader: