def __init__(self, data, linear=BayesianRegressionDense, out_features=10, **kwargs): """ Neural linear module. Implements a deep feature extractor with an (approximate) Bayesian layer on top. :param data: (ActiveLearningDataset) Dataset. :param linear: (nn.Module) Defines the type of layer to implement approx. Bayes computation. :param out_features: (int) Dimensionality of model targets. :param kwargs: (dict) Additional parameters. """ super().__init__() self.feature_extractor = nn.Sequential( nn.Linear(data.X.shape[1], out_features), nn.BatchNorm1d(out_features), nn.ReLU(), nn.Linear(out_features, out_features), nn.BatchNorm1d(out_features), nn.ReLU()) self.linear = linear([out_features, 1], **kwargs) self.normalize = data.normalize if self.normalize: self.output_mean = utils.to_gpu(torch.FloatTensor([data.y_mean])) self.output_std = utils.to_gpu(torch.FloatTensor([data.y_std])) dataloader = DataLoader(Dataset(data, 'train'), batch_size=len(data.index['train']), shuffle=False) for (x_train, y_train) in dataloader: self.x_train, self.y_train = utils.to_gpu(x_train, y_train)
def get_projections(self, data, J, projection='two'): """ Get projections for ACS approximate procedure :param data: (Object) Data object to get projections for :param J: (int) Number of projections to use :param projection: (str) Type of projection to use (currently only 'two' supported) :return: (torch.tensor) Projections """ projections = [] with torch.no_grad(): theta_mean, theta_cov = self.linear._compute_posterior( self.encode(self.x_train), self.y_train) jitter = utils.to_gpu(torch.eye(len(theta_cov)) * 1e-4) try: theta_samples = MVN(theta_mean.flatten(), theta_cov + jitter).sample(torch.Size([J])) except: import pdb pdb.set_trace() dataloader = DataLoader(Dataset(data, 'unlabeled'), batch_size=len(data.index['unlabeled']), shuffle=False) for (x, _) in dataloader: x = utils.to_gpu(x) if projection == 'two': for theta_sample in theta_samples: projections.append( self._compute_expected_ll(x, theta_sample)) else: raise NotImplementedError return utils.to_gpu(torch.sqrt(1 / torch.FloatTensor( [J]))) * torch.cat(projections, dim=1), torch.zeros(len(x))
def __init__(self, shape, a0=1., b0=1., **kwargs): """ Implements Bayesian linear regression layer with a hyper-prior on the weight variances. :param shape: (int) Number of input features for the regression. :param a0: (float) Hyper-prior alpha_0 for IG distribution on weight variances :param b0: (float) Hyper-prior beta_0 for IG distribution on weight variances """ super().__init__(shape, **kwargs) self.a0 = utils.to_gpu(torch.FloatTensor([a0])) self.b0 = utils.to_gpu(torch.FloatTensor([b0])) self.y_var = self.b0 / self.a0
def get_projections(self, data, J, projection='two', gamma=0, transform=None, **kwargs): """ Get projections for ACS approximate procedure :param data: (Object) Data object to get projections for :param J: (int) Number of projections to use :param projection: (str) Type of projection to use (currently only 'two' supported) :return: (torch.tensor) Projections """ ent = lambda py: torch.distributions.Categorical(probs=py).entropy() projections = [] feat_x = [] with torch.no_grad(): mean, cov = self.linear._compute_posterior() jitter = utils.to_gpu(torch.eye(len(cov)) * 1e-6) theta_samples = MVN(mean, cov + jitter).sample(torch.Size([J])).view( J, -1, self.linear.out_features) dataloader = DataLoader(Dataset(data, 'unlabeled', transform=transform), batch_size=256, shuffle=False) for (x, _) in dataloader: x = utils.to_gpu(x) feat_x.append(self.encode(x)) feat_x = torch.cat(feat_x) py = self._compute_predictive_posterior(self.linear( feat_x, num_samples=100), logits=False) ent_x = ent(py) if projection == 'two': for theta_sample in theta_samples: projections.append( self._compute_expected_ll(feat_x, theta_sample, py) + gamma * ent_x[:, None]) else: raise NotImplementedError return utils.to_gpu(torch.sqrt( 1 / torch.FloatTensor([J]))) * torch.cat(projections, dim=1), ent_x
def build(self, M=1, **kwargs): """ Constructs a batch of points to sample from the unlabeled set. :param M: (int) Batch size. :param kwargs: (dict) Additional parameters. :return: (list of ints) Selected data point indices. """ self._init_build(M, **kwargs) w = utils.to_gpu(torch.zeros([len(self.ELn), 1])) norm = lambda weights: (self.EL - (self.ELn.t() @ weights).squeeze()).norm() for m in range(M): w = self._step(m, w) # print(w[w.nonzero()[:, 0]].cpu().numpy()) print('|| L-L(w) ||: {:.4f}'.format(norm(w))) print('|| L-L(w1) ||: {:.4f}'.format(norm((w > 0).float()))) print('Avg pred entropy (pool): {:.4f}'.format( self.entropy.mean().item())) print('Avg pred entropy (batch): {:.4f}'.format( self.entropy[w.flatten() > 0].mean().item())) try: logdet = torch.slogdet( self.model.linear._compute_posterior()[1])[1].item() print('logdet weight cov: {:.4f}'.format(logdet)) except TypeError: pass return w.nonzero()[:, 0].cpu().numpy()
def __init__(self, shape, sn2=1., s=1.): """ Implements Bayesian linear regression with a dense linear layer. :param shape: (int) Number of input features for the regression. :param sn2: (float) Noise variable for linear regression. :param s: (float) Parameter for diagonal prior on the weights of the layer. """ super().__init__() self.in_features, self.out_features = shape self.y_var = sn2 self.w_cov_prior = s * utils.to_gpu(torch.eye(self.in_features))
def _compute_expected_ll(self, x, theta, py): """ Compute expected log-likelihood for data :param x: (torch.tensor) Inputs to compute likelihood for :param theta: (torch.tensor) Theta parameter to use in likelihood computations :return: (torch.tensor) Expected log-likelihood of inputs """ logits = x @ theta ys = torch.ones_like(logits).type(torch.LongTensor) * torch.arange( self.linear.out_features)[None, :] ys = utils.to_gpu(ys).t() loglik = torch.stack([-self.cross_entropy(logits, y) for y in ys]).t() return torch.sum(py * loglik, dim=-1, keepdim=True)
def _compute_predictive_posterior(self, y_pred, logits=True): """ Return posterior predictive evaluated at x :param x: (torch.tensor) Inputs :return: (torch.tensor) Probit regression posterior predictive """ log_pred_samples = y_pred L = utils.to_gpu(torch.FloatTensor([log_pred_samples.shape[0]])) preds = torch.logsumexp(log_pred_samples, dim=0) - torch.log(L) if not logits: preds = torch.softmax(preds, dim=-1) return preds
def get_predictions(self, x, data): """ Make predictions for data :param x: (torch.tensor) Observations to make predictions for :param data: (Object) Data to use for making predictions :return: (np.array) Predictive distributions """ self.eval() dataloader = DataLoader(Dataset(data, 'prediction', x_star=x), batch_size=len(x), shuffle=False) for (x, _) in dataloader: x = utils.to_gpu(x) y_pred = self.forward(x) pred_mean, pred_var = y_pred if self.normalize: pred_mean, pred_var = self.get_unnormalized( pred_mean), self.output_std**2 * pred_var return pred_mean.detach().cpu().numpy(), pred_var.detach().cpu().numpy( )
def _evaluate(self, data, batch_size, data_type='test', transform=None): """ Evaluate model with data :param data: (Object) Data to use for evaluation :param batch_size: (int) Batch-size for evaluation procedure (memory issues) :param data_type: (str) Data split to use for evaluation :param transform: (torchvision.transform) Tranform procedure applied to data during training / validation :return: (np.arrays) Performance metrics for model """ assert data_type in ['val', 'test'] losses, performances = [], [] if data_type == 'val' and len(data.index['val']) == 0: return losses, performances gt.pause() with torch.no_grad(): dataloader = DataLoader(dataset=Dataset(data, data_type, transform=transform), batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4) for (x, y) in dataloader: x, y = utils.to_gpu(x, y.type(torch.LongTensor).squeeze()) y_pred_samples = self.forward(x, num_samples=100) y_pred = self._compute_predictive_posterior(y_pred_samples)[ None, :, :] loss = self._compute_log_likelihood( y, y_pred) # use predictive at test time avg_loss = loss / len(x) performance = self._evaluate_performance(y, y_pred_samples) losses.append(avg_loss.cpu().item()) performances.append(performance.cpu().item()) gt.resume() return losses, performances
def _evaluate(self, data, batch_size, data_type='test', **kwargs): """ Evaluate model with data :param data: (Object) Data to use for evaluation :param batch_size: (int) Batch-size for evaluation procedure (memory issues) :param data_type: (str) Data split to use for evaluation :param kwargs: (dict) Optional additional arguments for evaluation :return: (np.arrays) Performance metrics for model """ assert data_type in ['val', 'test'] losses, performances = [], [] if data_type == 'val' and len(data.index['val']) == 0: return losses, performances gt.pause() self.eval() with torch.no_grad(): dataloader = DataLoader(Dataset(data, data_type, transform=kwargs.get( 'transform', None)), batch_size=batch_size, shuffle=True) for (x, y) in dataloader: x, y = utils.to_gpu(x, y) y_pred = self.forward(x) pred_mean, pred_variance = y_pred loss = torch.sum( -utils.gaussian_log_density(y, pred_mean, pred_variance)) avg_loss = loss / len(x) performance = self._evaluate_performance(y, y_pred) losses.append(avg_loss.cpu().item()) performances.append(performance.cpu().item()) gt.resume() return losses, performances
def optimize(self, data, num_epochs=1000, batch_size=64, initial_lr=1e-2, freq_summary=100, weight_decay=1e-1, weight_decay_theta=None, train_transform=None, val_transform=None, **kwargs): """ Internal functionality to train model :param data: (Object) Training data :param num_epochs: (int) Number of epochs to train for :param batch_size: (int) Batch-size for training :param initial_lr: (float) Initial learning rate :param weight_decay: (float) Weight-decay parameter for deterministic weights :param weight_decay_theta: (float) Weight-decay parameter for non-deterministic weights :param train_transform: (torchvision.transform) Transform procedure for training data :param val_transform: (torchvision.transform) Transform procedure for validation data :param kwargs: (dict) Optional additional arguments for optimization :return: None """ weight_decay_theta = weight_decay if weight_decay_theta is None else weight_decay_theta weights = [ v for k, v in self.named_parameters() if (not k.startswith('linear')) and k.endswith('weight') ] weights_theta = [ v for k, v in self.named_parameters() if k.startswith('linear') and k.endswith('weight') ] other = [ v for k, v in self.named_parameters() if not k.endswith('weight') ] optimizer = torch.optim.Adam([ { 'params': weights, 'weight_decay': weight_decay }, { 'params': weights_theta, 'weight_decay': weight_decay_theta }, { 'params': other }, ], lr=initial_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-5) dataloader = DataLoader(dataset=Dataset(data, 'train', transform=train_transform), batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4) for epoch in range(num_epochs): scheduler.step() losses, kls, performances = [], [], [] for (x, y) in dataloader: optimizer.zero_grad() x, y = utils.to_gpu(x, y.type(torch.LongTensor).squeeze()) y_pred = self.forward(x) step_loss, kl = self._compute_loss( y, y_pred, len(x) / len(data.index['train'])) step_loss.backward() optimizer.step() performance = self._evaluate_performance(y, y_pred) losses.append(step_loss.cpu().item()) kls.append(kl.cpu().item()) performances.append(performance.cpu().item()) if epoch % freq_summary == 0 or epoch == num_epochs - 1: val_bsz = 1024 val_losses, val_performances = self._evaluate( data, val_bsz, 'val', transform=val_transform, **kwargs) print( '#{} loss: {:.4f} (val: {:.4f}), kl: {:.4f}, {}: {:.4f} (val: {:.4f})' .format(epoch, np.mean(losses), np.mean(val_losses), np.mean(kls), self.metric, np.mean(performances), np.mean(val_performances)))
def optimize(self, data, num_epochs=1000, batch_size=64, initial_lr=1e-2, weight_decay=1e-1, **kwargs): """ Internal functionality to train model :param data: (Object) Training data :param num_epochs: (int) Number of epochs to train for :param batch_size: (int) Batch-size for training :param initial_lr: (float) Initial learning rate :param weight_decay: (float) Weight-decay parameter for deterministic weights :param kwargs: (dict) Optional additional arguments for optimization :return: None """ weights = [ v for k, v in self.named_parameters() if k.endswith('weight') ] other = [v for k, v in self.named_parameters() if k.endswith('bias')] optimizer = torch.optim.Adam([ { 'params': weights, 'weight_decay': weight_decay }, { 'params': other }, ], lr=initial_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-5) dataloader = DataLoader(dataset=Dataset(data, 'train', transform=kwargs.get( 'transform', None)), batch_size=batch_size, shuffle=True, drop_last=True) for epoch in range(num_epochs): scheduler.step() losses, performances = [], [] self.train() for (x, y) in dataloader: optimizer.zero_grad() x, y = utils.to_gpu(x, y) y_pred = self.forward(x) step_loss = -self._compute_log_likelihood(y, y_pred) step_loss.backward() optimizer.step() performance = self._evaluate_performance(y, y_pred) losses.append(step_loss.cpu().item()) performances.append(performance.cpu().item()) if epoch % 100 == 0 or epoch == num_epochs - 1: print('#{} loss: {:.4f}, rmse: {:.4f}'.format( epoch, np.mean(losses), np.mean(performances)))
utils.set_gpu_mode(True) np.random.seed(args.seed) torch.manual_seed(args.seed) num_test_points = 10000 if args.dataset == 'fashion_mnist': from acs.al_data_set import mnist_train_transform as train_transform, mnist_test_transform as test_transform else: from acs.al_data_set import torchvision_train_transform as train_transform, torchvision_test_transform as test_transform if args.dataset == 'svhn': num_test_points = 26032 model = resnet18(pretrained=args.pretrained_model, pretrained_model_file=args.model_file, resnet_size=84) model = utils.to_gpu(model) dataset = utils.get_torchvision_dataset(name=args.dataset, data_dir=args.data_dir, model=model, encode=False, seed=args.seed, n_split=(-1, 10000, num_test_points)) init_num_labeled = len( dataset[1] ['train']) if args.coreset == 'Best' else args.init_num_labeled data = ALD(dataset, init_num_labeled=init_num_labeled, normalize=False) dir_string = 'acq_{}_cs_{}_batch_{}_labeled_{}_budget_{}_seed_{}'.format( args.acq.lower(), args.coreset.lower(), args.batch_size, args.init_num_labeled, args.budget, args.seed)
gt.start() while len(data.index['train']) < args.init_num_labeled + args.budget: print('{}: Number of samples {}/{}'.format( args.seed, len(data.index['train']) - args.init_num_labeled, args.budget)) optim_params = { 'num_epochs': args.training_epochs, 'batch_size': get_batch_size(args.dataset, data), 'weight_decay': args.weight_decay, 'initial_lr': args.initial_lr } nl = NeuralLinearTB(data, out_features=out_features, **kwargs) # nl = NeuralLinear(data, out_features=out_features, **kwargs) nl = utils.to_gpu(nl) nl.optimize(data, **optim_params) gt.stamp('model_training', unique=False) num_samples = len(data.index['train']) - args.init_num_labeled test_nll, test_performance = nl.test(data) dataloader = DataLoader(Dataset(data, 'prediction', x_star=data.X), batch_size=len(data.X), shuffle=False) batch_size = min( args.batch_size, args.init_num_labeled + args.budget - len(data.index['train'])) cs_kwargs['a_tilde'] = nl.linear.a_tilde.cpu().item() cs_kwargs['b_tilde'] = nl.linear.b_tilde.cpu().item() cs_kwargs['nu'] = nl.linear.nu.cpu().item() for (x, _) in dataloader: