def predict(self, x, depth=None, get_std=False, return_model_std=False): self.model.eval() with torch.no_grad(): x, = to_variable(var=(x, ), cuda=self.cuda) # if depth is None: # depth = self.model.n_layers act_vec = self.model.forward(x, depth=depth).data if get_std: pred_mu, model_std = depth_categorical.marginalise_d_predict( act_vec, self.prob_model.current_posterior, depth=depth, softmax=(not self.regression), get_std=get_std) if return_model_std: return pred_mu.data, model_std.data else: pred_std = (model_std**2 + self.f_neg_loglike.log_std.exp()**2).pow(0.5) return pred_mu.data, pred_std.data else: probs = depth_categorical.marginalise_d_predict( act_vec, self.prob_model.current_posterior, depth=depth, softmax=(not self.regression), get_std=get_std) return probs.data
def get_exact_ELBO(self, trainloader, train_bn=False): """Get exact ELBO with full forward pass""" if train_bn: self.model.train() else: self.model.eval() with torch.no_grad(): prior_loglikes = self.model.get_w_prior_loglike(k=None) N_train = len(trainloader.dataset) assert N_train == self.N_train cum_ELBO = [] for x, y in trainloader: x, y = to_variable(var=(x, y), cuda=self.cuda) if not self.regression: y = y.long() act_vec = self.model.forward(x) ELBO = self.prob_model.estimate_ELBO(prior_loglikes, act_vec, y, self.f_neg_loglike, N_train, Beta=1) cum_ELBO.append( (x.shape[0] / N_train) * ELBO.data.unsqueeze(0)) cum_ELBO = torch.cat(cum_ELBO, dim=0).sum(dim=0) return cum_ELBO.data.item()
def fit(self, x, y): """Optimise stchastically estimated marginal joint of parameters and weights""" self.model.train() x, y = to_variable(var=(x, y), cuda=self.cuda) self.optimizer.zero_grad() sample_means = self.model.forward(x, self.train_samples) batch_size = x.shape[0] repeat_dims = [self.train_samples] + [ 1 for i in range(1, len(y.shape)) ] # Repeat batchwise without interleave y_expand = y.repeat( *repeat_dims) # targets are same across acts -> interleave sample_means_flat = sample_means.view( self.train_samples * batch_size, -1) # flattening results in batch_n changing first E_NLL = self.f_neg_loglike(sample_means_flat, y_expand).view(self.train_samples, batch_size).mean(dim=(0, 1)) minus_E_ELBO = E_NLL + self.model.get_KL() / self.N_train minus_E_ELBO.backward() self.optimizer.step() err = rms(sample_means.mean(dim=0), y).item() return -minus_E_ELBO.data.item(), E_NLL.data.item(), err
def layer_predict(self, x): self.model.eval() x, = to_variable(var=(x, ), cuda=self.cuda) out = self.model.forward(x).data if not self.regression: out = F.softmax(out, dim=2) return out
def eval(self, x, y): # TODO: make computationally stable with logsoftmax and nll loss -> would require making a log prediction method self.model.eval() with torch.no_grad(): x, y = to_variable(var=(x, y), cuda=self.cuda) if not self.regression: y = y.long() act_vec = self.model.forward(x) if self.regression: means, model_stds = depth_categorical.marginalise_d_predict( act_vec.data, self.prob_model.current_posterior, depth=None, softmax=(not self.regression), get_std=True) mean_pred_negloglike = self.f_neg_loglike( means, y, model_std=model_stds).mean(dim=0).data err = rms(means, y).item() else: probs = depth_categorical.marginalise_d_predict( act_vec.data, self.prob_model.current_posterior, depth=None, softmax=(not self.regression)) mean_pred_negloglike = self.f_neg_loglike_test( torch.log(probs), y).mean(dim=0).data pred = probs.max( dim=1, keepdim=False)[1] # get the index of the max probability err = pred.ne(y.data).sum().item() / y.shape[0] return mean_pred_negloglike.item(), err
def eval(self, x, y): self.model.eval() x, y = to_variable(var=(x, y), cuda=self.cuda) mean, model_std = self.model.forward_predict(x, self.MC_samples) mean_pred_loglike = self.f_neg_loglike( mean, y, model_std=model_std).mean(dim=0).data err = rms(mean, y).item() return mean_pred_loglike.item(), err
def vec_predict(self, x, bin_mat): """Get predictions for specific binary vector configurations""" self.set_mode_train(train=False) x, = to_variable(var=(x, ), cuda=self.cuda) out = x.data.new(bin_mat.shape[0], x.shape[0], self.model.output_dim) for s in range(bin_mat.shape[0]): out[s] = self.model.vec_forward(x, bin_mat[s, :]).data prob_out = F.softmax(out, dim=2) return prob_out
def vec_predict(self, x, bin_mat): """Get predictions for specific binary vector configurations""" self.model.eval() x, = to_variable(var=(x, ), cuda=self.cuda) out = x.data.new(bin_mat.shape[0], x.shape[0], self.model.output_dim) for s in range(bin_mat.shape[0]): out[s] = self.model.vec_forward(x, bin_mat[s, :]).data if not self.regression: probs = F.softmax(out, dim=2) return probs.data
def sample_predict(self, x, grad=False): self.set_mode_train(train=False) x, = to_variable(var=(x, ), cuda=self.cuda) act_vec = self.model.forward_get_acts(x) probs = self.model.prob_model.efficient_predict(act_vec, softmax=True) # Note that these are weighed probs that need to be summed in dim 0 to be actual probs if grad: return probs else: return probs.data
def predict(self, x, Nsamples=10, return_model_std=False): self.model.eval() x, = to_variable(var=(x, ), cuda=self.cuda) mean, model_std = self.model.forward_predict(x, Nsamples) if return_model_std: return mean.data, model_std # not data in order to take integer from sgd else: pred_std = (model_std**2 + self.f_neg_loglike.log_std.exp()**2).pow(0.5) return mean.data, pred_std.data
def fit(self, x, y): """Standard training loop: MC dropout and Ensembles""" self.model.train() x, y = to_variable(var=(x, y), cuda=self.cuda) self.optimizer.zero_grad() mean = self.model.forward(x) NLL = self.f_neg_loglike(mean, y).mean(dim=0) NLL.backward() self.optimizer.step() err = rms(mean, y).item() return -NLL.data.item(), NLL.data.item(), err
def eval(self, x, y): # TODO: make computationally stable with logsoftmax and nll loss self.set_mode_train(train=False) x, y = to_variable(var=(x, y.long()), cuda=self.cuda) act_vec = self.model.forward_get_acts(x) probs = self.model.prob_model.efficient_predict( act_vec, softmax=True).sum(dim=0).data minus_loglike = F.nll_loss(torch.log(probs), y, reduction='mean') pred = probs.max(dim=1, keepdim=False)[1] err = pred.ne(y.data).sum() return minus_loglike, err
def fit(self, x, y): """Optimise stchastically estimated marginal joint of parameters and weights""" self.model.train() x, y = to_variable(var=(x, y), cuda=self.cuda) if not self.regression: y = y.long() self.optimizer.zero_grad() act_vec = self.model.forward(x) prior_loglikes = self.model.get_w_prior_loglike(k=None) joint_loglike_per_depth = self.prob_model.get_w_joint_loglike( prior_loglikes, act_vec, y, self.f_neg_loglike, self.N_train) # size(depth) log_marginal_over_depth = self.prob_model.get_marg_loglike( joint_loglike_per_depth) loss = -log_marginal_over_depth / self.N_train loss.backward() self.optimizer.step() # Note this posterior is 1 it behind the parameter settings as it is estimated with acts before optim step log_depth_posteriors = self.prob_model.get_depth_log_posterior( joint_loglike_per_depth, log_marginal_over_depth) self.prob_model.current_posterior = log_depth_posteriors.exp() if self.regression: means, model_stds = depth_categorical.marginalise_d_predict( act_vec.data, self.prob_model.current_posterior, depth=None, softmax=(not self.regression), get_std=True) mean_pred_negloglike = self.f_neg_loglike( means, y, model_std=model_stds).mean(dim=0).data err = rms(means, y).item() else: probs = depth_categorical.marginalise_d_predict( act_vec.data, self.prob_model.current_posterior, depth=None, softmax=(not self.regression)) mean_pred_negloglike = self.f_neg_loglike_test( torch.log(probs), y).mean(dim=0).data pred = probs.max( dim=1, keepdim=False)[1] # get the index of the max probability err = pred.ne(y.data).sum().item() / y.shape[0] return log_marginal_over_depth.data.item(), mean_pred_negloglike.item( ), err
def partial_predict(self, x, depth=None): self.set_mode_train(train=False) x, = to_variable(var=(x, ), cuda=self.cuda) if depth is None: _, depth = self.model.prob_model.get_q_probs().max(dim=0) act_vec = self.model.forward_get_acts(x, depth=depth) probs = self.model.prob_model.efficient_predict_d(act_vec, depth, softmax=True) # Note that these are weighed probs that need to be summed in dim 0 to be actual probs return probs
def fit(self, x, y): """Optimise stchastically estimated marginal joint of parameters and weights""" self.set_mode_train(train=True) x, y = to_variable(var=(x, y), cuda=self.cuda) if not self.regression: y = y.long() self.optimizer.zero_grad() act_vec = self.model.forward(x) prior_loglikes = self.model.get_w_prior_loglike(k=None) ELBO = self.prob_model.estimate_ELBO(prior_loglikes, act_vec, y, self.f_neg_loglike, self.N_train, Beta=1) loss = -ELBO / self.N_train loss.backward() self.optimizer.step() self.prob_model.current_posterior = self.prob_model.get_q_probs() if self.regression: means, model_stds = depth_categorical.marginalise_d_predict( act_vec.data, self.prob_model.current_posterior, depth=None, softmax=(not self.regression), get_std=True) mean_pred_negloglike = self.f_neg_loglike( means, y, model_std=model_stds).mean(dim=0).data err = rms(means, y).item() else: probs = depth_categorical.marginalise_d_predict( act_vec.data, self.prob_model.current_posterior, depth=None, softmax=(not self.regression)) mean_pred_negloglike = self.f_neg_loglike_test( torch.log(probs), y).mean(dim=0).data pred = probs.max( dim=1, keepdim=False)[1] # get the index of the max probability err = pred.ne(y.data).sum().item() / y.shape[0] # print(ELBO.shape, mean_pred_loglike.shape, err.shape) return ELBO.data.item(), mean_pred_negloglike.item(), err
def fast_predict(self, x): self.model.eval() with torch.no_grad(): x, = to_variable(var=(x, ), cuda=self.cuda) act_vec = self.model.fast_forward_impl( x, self.prob_model.current_posterior, min_prob=1e-2).data probs = depth_categorical.marginalise_d_predict( act_vec, self.prob_model.current_posterior, depth=None, softmax=True, get_std=False) return probs.data
def get_exact_d_posterior(self, trainloader, train_bn=False, logposterior=False): """Get exact posterior over depth and log marginal over weights with full forward pass""" if train_bn: self.model.train() else: self.model.eval() with torch.no_grad(): prior_loglikes = self.model.get_w_prior_loglike(k=None) N_train = len(trainloader.dataset) assert N_train == self.N_train cum_joint_loglike_per_depth = [] for x, y in trainloader: x, y = to_variable(var=(x, y), cuda=self.cuda) if not self.regression: y = y.long() act_vec = self.model.forward(x) joint_loglike_per_depth = self.prob_model.get_w_joint_loglike( prior_loglikes, act_vec, y, self.f_neg_loglike, N_train) # size(depth) cum_joint_loglike_per_depth.append( (x.shape[0] / N_train) * joint_loglike_per_depth.data.unsqueeze(0)) cum_joint_loglike_per_depth = torch.cat( cum_joint_loglike_per_depth, dim=0).sum(dim=0) log_marginal_over_depth = self.prob_model.get_marg_loglike( cum_joint_loglike_per_depth) log_depth_posteriors = self.prob_model.get_depth_log_posterior( cum_joint_loglike_per_depth, log_marginal_over_depth) if logposterior: exact_posterior = log_depth_posteriors else: exact_posterior = log_depth_posteriors.exp() return exact_posterior, log_marginal_over_depth.data.item()
def fit(self, x, y): """Optimise ELBO treating model weights as hyperparameters""" self.set_mode_train(train=True) x, y = to_variable(var=(x, y.long()), cuda=self.cuda) self.optimizer.zero_grad() act_vec = self.model.forward_get_acts(x) mean_minus_loglike = self.model.prob_model.efficient_E_loglike( act_vec, y, self.f_neg_loglike) # returns sample mean over batch probs = self.model.prob_model.efficient_predict( act_vec, softmax=True).sum(dim=0).data KL_persample = self.model.get_KL() / self.N_train m_ELBO = mean_minus_loglike + KL_persample m_ELBO.backward() self.optimizer.step() pred = probs.max( dim=1, keepdim=False)[1] # get the index of the max probability err = pred.ne(y.data).sum() return KL_persample.item(), mean_minus_loglike.data.item(), err