def predict(self, X_test): r""" Returns the predictive mean and variance of the objective function at the given test points. Parameters ---------- X_test: np.ndarray (N, D) N input test points Returns ---------- np.array(N,) predictive mean np.array(N,) predictive variance """ # Normalize inputs if self.normalize_input: X_, _, _ = zero_mean_unit_var_normalization( X_test, self.X_mean, self.X_std) else: X_ = X_test # Get features from the net if self.gpu: network = self.network.cpu() else: network = self.network theta = network.basis_funcs(torch.Tensor(X_)).data.numpy() # Marginalise predictions over hyperparameters of the BLR mu = np.zeros([len(self.models), X_test.shape[0]]) var = np.zeros([len(self.models), X_test.shape[0]]) for i, m in enumerate(self.models): mu[i], var[i] = m.predict(theta) # See the algorithm runtime prediction paper by Hutter et al # for the derivation of the total variance m = np.mean(mu, axis=0) v = np.mean(mu**2 + var, axis=0) - m**2 # Clip negative variances and set them to the smallest # positive float value if v.shape[0] == 1: v = np.clip(v, np.finfo(v.dtype).eps, np.inf) else: v = np.clip(v, np.finfo(v.dtype).eps, np.inf) v[np.where((v < np.finfo(v.dtype).eps) & (v > -np.finfo(v.dtype).eps))] = 0 if self.normalize_output: m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std) v *= self.y_std**2 return m, v
def predict(self, x_test: np.ndarray, return_individual_predictions: bool = False): logging.debug("Predicting started.") x_test_ = np.asarray(x_test) logging.debug("Processing %d test datapoints " " with %d dimensions each." % x_test_.shape) if self.normalize_input: logging.debug("Normalizing test datapoints to " " zero mean and unit variance.") x_test_, *_ = zero_mean_unit_var_normalization( x_test, self.x_mean, self.x_std) def network_predict(x_test_, weights): logging.debug("Predicting on data:\n%s Using weights:\n%s" % (str(x_test_), str(weights))) with torch.no_grad(): self.network_weights = weights return self.model(torch.from_numpy(x_test_).float()).numpy()[:, 0] logging.debug("Predicting with %d networks." % len(self.sampled_weights)) network_outputs = [ network_predict(x_test_, weights=weights) for weights in self.sampled_weights ] mean_prediction = np.mean(network_outputs, axis=0) variance_prediction = np.mean((network_outputs - mean_prediction)**2, axis=0) if self.normalize_output: logging.debug("Unnormalizing predictions.") logging.debug("Mean of network predictions " "before unnormalization:\n%s" % str(mean_prediction)) logging.debug("Variance/Uncertainty of network predictions " "before unnormalization:\n%s" % str(variance_prediction)) mean_prediction = zero_mean_unit_var_unnormalization( mean_prediction, self.y_mean, self.y_std) variance_prediction *= self.y_std**2 logging.debug("Mean of network predictions " "after unnormalization:\n%s" % str(mean_prediction)) logging.debug("Variance/Uncertainty of network predictions " "after unnormalization:\n%s" % str(variance_prediction)) for i in range(len(network_outputs)): network_outputs[i] = zero_mean_unit_var_unnormalization( network_outputs[i], self.y_mean, self.y_std) if return_individual_predictions: return mean_prediction, variance_prediction, network_outputs return mean_prediction, variance_prediction
def predict(self, X_test): r""" Returns the predictive mean and variance of the objective function at the given test points. Parameters ---------- X_test: np.ndarray (N, D) N input test points Returns ---------- np.array(N,) predictive mean np.array(N,) predictive variance """ # Normalize inputs if self.normalize_input: X_, _, _ = zero_mean_unit_var_normalization( X_test, self.X_mean, self.X_std) else: X_ = X_test # Perform MC dropout model = self.model T = self.T # Yt_hat: T x N x 1 Yt_hat = np.array( [model(torch.Tensor(X_)).data.numpy() for _ in range(T)]) # Yt_hat = Yt_hat * self.std_y_train + self.mean_y_train # T x N TODO check with Adam MC_pred_mean = np.mean(Yt_hat, 0) # N x 1 Second_moment = np.mean(Yt_hat**2, 0) # N x 1 # MC_pred_var = Second_moment + np.eye(Yt_hat.shape[-1]) / self.tau - (MC_pred_mean ** 2) MC_pred_var = Second_moment - (MC_pred_mean**2) m = MC_pred_mean.flatten() if MC_pred_var.shape[0] == 1: v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf) else: v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf) v[np.where((v < np.finfo(v.dtype).eps) & (v > -np.finfo(v.dtype).eps))] = 0 if self.normalize_output: m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std) v *= self.y_std**2 m = m.flatten() v = v.flatten() return m, v
def normalize_output(self, x, m=None, s=None): """ Normalizes output :param x: targets :param m: mean :param s: standard deviation :return: normalized targets """ return zero_mean_unit_var_normalization(x, m, s)
def normalize_input(self, x, m=None, s=None): """ Normalizes input :param x: data :param m: mean :param s: standard deviation :return: normalized input """ return zero_mean_unit_var_normalization(x, m, s)
def train(self, X_adj, X_ops, y, do_optimize=True): """ Trains the model on the provided data. Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true the hyperparameters are optimized otherwise the default hyperparameters are used. """ start_time = time.time() self.X_adj = X_adj self.X_ops = X_ops # Normalize ouputs if self.normalize_output: self.y, self.y_mean, self.y_std = zero_mean_unit_var_normalization( y) else: self.y = y self.y = self.y[:, None] # Check if we have enough points to create a minibatch otherwise use all data points if self.X_adj.shape[0] <= self.batch_size: batch_size = self.X_adj.shape[0] else: batch_size = self.batch_size # Create the neural network features = X_ops.shape[1] optimizer = optim.Adam(self.network.parameters(), lr=self.init_learning_rate) # Start training lc = np.zeros([self.num_epochs]) for epoch in range(self.num_epochs): epoch_start_time = time.time() train_err = 0 train_batches = 0 for batch in self.iterate_minibatches(self.X_adj, self.X_ops, self.y, batch_size, shuffle=True): inputs_adj = torch.Tensor(batch[0]) inputs_ops = torch.Tensor(batch[1]) targets = torch.Tensor(batch[2]) optimizer.zero_grad() output = self.network(inputs_ops, inputs_adj) loss = torch.nn.functional.mse_loss(output, targets) loss.backward() optimizer.step() train_err += loss train_batches += 1 lc[epoch] = train_err / train_batches logging.debug("Epoch {} of {}".format(epoch + 1, self.num_epochs)) curtime = time.time() epoch_time = curtime - epoch_start_time total_time = curtime - start_time logging.debug("Epoch time {:.3f}s, total time {:.3f}s".format( epoch_time, total_time)) #print("Training loss:\t\t{:.5g}".format(train_err / train_batches)) # Design matrix self.Theta = self.network.basis_funcs(torch.Tensor(self.X_ops), torch.Tensor( self.X_adj)).data.numpy() if do_optimize: if self.do_mcmc: self.sampler = emcee.EnsembleSampler( self.n_hypers, 2, self.marginal_log_likelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker set them back on a linear scale linear_theta = np.exp(self.sampler.chain[:, -1]) self.hypers = linear_theta self.hypers[:, 1] = 1 / self.hypers[:, 1] else: # Optimize hyperparameters of the Bayesian linear regression p0 = self.prior.sample_from_prior(n_samples=1) res = optimize.fmin(self.negative_mll, p0) self.hypers = [[np.exp(res[0]), 1 / np.exp(res[1])]] else: self.hypers = [[self.alpha, self.beta]] logging.info("Hypers: %s" % self.hypers) self.models = [] for sample in self.hypers: # Instantiate a model for each hyperparameter configuration model = BayesianLinearRegression(alpha=sample[0], beta=sample[1], basis_func=None) model.train(self.Theta, self.y[:, 0], do_optimize=False) self.models.append(model)
def predict(self, X_test): r""" Returns the predictive mean and variance of the objective function at the given test points. Parameters ---------- X_test: np.ndarray (N, D) N input test points Returns ---------- np.array(N,) predictive mean np.array(N,) predictive variance """ # Normalize inputs if self.normalize_input: X_, _, _ = zero_mean_unit_var_normalization(X_test, self.X_mean, self.X_std) else: X_ = X_test # Perform MC dropout model = self.model model.eval() T = self.T # model.eval() # MC_samples : list T x N x 1 # Yt_hat = np.array([model(torch.Tensor(X_)).data.numpy() for _ in range(T)]) # start_mc=time.time() gpu_test = False if gpu_test: X_tensor = Variable(torch.FloatTensor(X_)).to(self.device) MC_samples = [model(X_tensor) for _ in range(T)] means = torch.stack([tup[0] for tup in MC_samples]).view(T, X_.shape[0]).cpu().data.numpy() # logvar = torch.stack([tup[1] for tup in MC_samples]).view(T, X_.shape[0]).cpu().data.numpy() else: model.cpu() MC_samples = [model(Variable(torch.FloatTensor(X_))) for _ in range(T)] means = torch.stack([tup[0] for tup in MC_samples]).view(T, X_.shape[0]).data.numpy() # logvar = torch.stack([tup[1] for tup in MC_samples]).view(T, X_.shape[0]).data.numpy() # mc_time = time.time() - start_mc # print(f'mc_time={mc_time}') # logvar = np.mean(logvar,0) # aleatoric_uncertainty = np.exp(logvar).mean(0) # epistemic_uncertainty = np.var(means, 0).mean(0) aleatoric_uncertainty = self.aleatoric_uncertainty MC_pred_mean = np.mean(means, 0) # N x 1 means_var = np.var(means, 0) MC_pred_var = means_var + aleatoric_uncertainty # MC_pred_var = means_var + np.mean(np.exp(logvar), 0) m = MC_pred_mean.flatten() if MC_pred_var.shape[0] == 1: v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf) else: v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf) v[np.where((v < np.finfo(v.dtype).eps) & (v > -np.finfo(v.dtype).eps))] = 0 if self.normalize_output: m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std) v *= self.y_std ** 2 m = m.flatten() v = v.flatten() return m, v
def train(self, X, y): """ Trains the model on the provided data. Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. """ start_time = time.time() # Normalize inputs if self.normalize_input: self.X, self.X_mean, self.X_std = zero_mean_unit_var_normalization(X) else: self.X = X # Normalize ouputs if self.normalize_output: self.y, self.y_mean, self.y_std = zero_mean_unit_var_normalization(y) else: self.y = y self.y = self.y[:, None] N = self.X.shape[0] # Check if we have enough points to create a minibatch otherwise use all data points if N <= self.batch_size: batch_size = N else: batch_size = self.batch_size # Create the neural network features = X.shape[1] wr = self.length_scale ** 2. / N dr = 2. / N network = Net(n_inputs=features, n_units=[self.n_units_1, self.n_units_2, self.n_units_3], weight_regularizer=wr, dropout_regularizer=dr, actv=self.actv) if self.gpu: # network = network.cuda() network = network.to(self.device) optimizer = optim.Adam(network.parameters(), lr=self.init_learning_rate) # Start training lc = np.zeros([self.num_epochs]) if self.loss_cal: util = utility(util_type=self.util_type, Y_train=self.y) for epoch in range(self.num_epochs): epoch_start_time = time.time() train_err = 0 train_batches = 0 for batch in self.iterate_minibatches(self.X, self.y, batch_size, shuffle=True): # inputs = torch.Tensor(batch[0]) # targets = torch.Tensor(batch[1]) inputs = Variable(torch.FloatTensor(batch[0])) targets = Variable(torch.FloatTensor(batch[1])) if self.gpu: inputs = inputs.to(self.device) targets = targets.to(self.device) if epoch == 0 and self.loss_cal and self.lc_burn == 0: h_x = targets optimizer.zero_grad() # output, log_var, regularization = network(inputs) output, regularization = network(inputs) # Estimate log_var empirically if self.mc_tau: minbatch_samples = [network(inputs) for _ in range(self.T)] y_minibatch_predict_samples = torch.stack([tup[0] for tup in minbatch_samples]) minibatch_var = torch.mean(torch.mean((y_minibatch_predict_samples - targets)**2,0)) else: minibatch_var = torch.mean((output - targets)**2) minibatch_log_var = torch.log(minibatch_var) if self.regu: if self.weights is None: loss = heteroscedastic_loss(targets, output, minibatch_log_var)+ regularization if self.loss_cal and epoch >= self.lc_burn: loss = cal_loss(targets, output, util, h_x, y_pred_samples, output, minibatch_log_var, regularization=regularization) else: loss = heteroscedastic_loss(targets, output, minibatch_log_var)+ regularization else: if self.weights is None: loss = heteroscedastic_loss(targets, output, minibatch_log_var) if self.loss_cal and epoch >= self.lc_burn: loss = cal_loss(targets, output, util, h_x, y_pred_samples, output, minibatch_log_var, regularization=None) else: loss = heteroscedastic_loss(targets, output, minibatch_log_var) loss.backward(retain_graph=True) optimizer.step() train_err += loss train_batches += 1 if self.loss_cal and epoch >= (self.lc_burn - 1): mc_samples = [network(inputs) for _ in range(10)] y_pred_samples = torch.stack([tup[0] for tup in mc_samples]) if self.util_type == 'se_prod_y': numerator = torch.sum(y_pred_samples * torch.exp(y_pred_samples),0) denominator = torch.sum(torch.exp(y_pred_samples),0) h_x = numerator / denominator else: y_pred_mean = torch.mean(y_pred_samples, 0) h_x = y_pred_mean lc[epoch] = train_err / train_batches logging.debug("Epoch {} of {}".format(epoch + 1, self.num_epochs)) curtime = time.time() epoch_time = curtime - epoch_start_time total_time = curtime - start_time logging.debug("Epoch time {:.3f}s, total time {:.3f}s".format(epoch_time, total_time)) logging.debug("Training loss:\t\t{:.5g}".format(train_err / train_batches)) self.model = network self.lc = lc # Estimate aleatoric uncertainty X_train_tensor = Variable(torch.FloatTensor(self.X)) if self.gpu: X_train_tensor = X_train_tensor.to(self.device) y_train_mc_samples = [network(X_train_tensor) for _ in range(self.T)] y_train_predict_samples = torch.stack([tup[0] for tup in y_train_mc_samples]).view(self.T, N).cpu().data.numpy() self.aleatoric_uncertainty = np.mean(np.mean((y_train_predict_samples - self.y.flatten())**2, 0))
def train(self, X, y): """ Trains the model on the provided data. Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. """ start_time = time.time() # Normalize inputs if self.normalize_input: self.X, self.X_mean, self.X_std = zero_mean_unit_var_normalization( X) else: self.X = X # Normalize ouputs if self.normalize_output: self.y, self.y_mean, self.y_std = zero_mean_unit_var_normalization( y) else: self.y = y self.y = self.y[:, None] # Check if we have enough points to create a minibatch otherwise use all data points if self.X.shape[0] <= self.batch_size: batch_size = self.X.shape[0] else: batch_size = self.batch_size # Create the neural network features = X.shape[1] network = Net(n_inputs=features, dropout=self.dropout, n_units=[self.n_units_1, self.n_units_2, self.n_units_3]) optimizer = optim.Adam(network.parameters(), lr=self.init_learning_rate) # Start training lc = np.zeros([self.num_epochs]) for epoch in range(self.num_epochs): epoch_start_time = time.time() train_err = 0 train_batches = 0 for batch in self.iterate_minibatches(self.X, self.y, batch_size, shuffle=True): inputs = torch.Tensor(batch[0]) targets = torch.Tensor(batch[1]) optimizer.zero_grad() output = network(inputs) loss = torch.nn.functional.mse_loss(output, targets) loss.backward() optimizer.step() train_err += loss train_batches += 1 lc[epoch] = train_err / train_batches logging.debug("Epoch {} of {}".format(epoch + 1, self.num_epochs)) curtime = time.time() epoch_time = curtime - epoch_start_time total_time = curtime - start_time logging.debug("Epoch time {:.3f}s, total time {:.3f}s".format( epoch_time, total_time)) logging.debug("Training loss:\t\t{:.5g}".format(train_err / train_batches)) self.model = network
def train(self, X, y): """ Trains the model on the provided data. Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. """ start_time = time.time() # Normalize inputs if self.normalize_input: self.X, self.X_mean, self.X_std = zero_mean_unit_var_normalization( X) else: self.X = X # Normalize ouputs if self.normalize_output: self.y, self.y_mean, self.y_std = zero_mean_unit_var_normalization( y) else: self.y = y self.y = self.y[:, None] N = self.X.shape[0] # Check if we have enough points to create a minibatch otherwise use all data points if N <= self.batch_size: batch_size = N else: batch_size = self.batch_size # Create the neural network features = X.shape[1] wr = self.length_scale**2. / N dr = 2. / N network = Net(n_inputs=features, n_units=[self.n_units_1, self.n_units_2, self.n_units_3], weight_regularizer=wr, dropout_regularizer=dr, actv=self.actv) if self.gpu: # network = network.cuda() network = network.to(self.device) optimizer = optim.Adam(network.parameters(), lr=self.init_learning_rate) # Start training lc = np.zeros([self.num_epochs]) network.train() for epoch in range(self.num_epochs): epoch_start_time = time.time() train_err = 0 train_batches = 0 for batch in self.iterate_minibatches(self.X, self.y, batch_size, shuffle=True): inputs = Variable(torch.FloatTensor(batch[0])) targets = Variable(torch.FloatTensor(batch[1])) if self.gpu: inputs = inputs.to(self.device) targets = targets.to(self.device) optimizer.zero_grad() output, log_var, regularization = network(inputs) if self.regu: loss = heteroscedastic_loss(targets, output, log_var) + regularization else: loss = heteroscedastic_loss(targets, output, log_var) loss.backward() optimizer.step() train_err += loss train_batches += 1 lc[epoch] = train_err / train_batches logging.debug("Epoch {} of {}".format(epoch + 1, self.num_epochs)) curtime = time.time() epoch_time = curtime - epoch_start_time total_time = curtime - start_time logging.debug("Epoch time {:.3f}s, total time {:.3f}s".format( epoch_time, total_time)) logging.debug("Training loss:\t\t{:.5g}".format(train_err / train_batches)) self.model = network
def predict(self, X_test): r""" Returns the predictive mean and variance of the objective function at the given test points. Parameters ---------- X_test: np.ndarray (N, D) N input test points Returns ---------- np.array(N,) predictive mean np.array(N,) predictive variance """ # Normalize inputs if self.normalize_input: X_, _, _ = zero_mean_unit_var_normalization( X_test, self.X_mean, self.X_std) else: X_ = X_test # Perform MC dropout model = self.model T = self.T model.eval() # MC_samples : list T x N x 1 # Yt_hat = np.array([model(torch.Tensor(X_)).data.numpy() for _ in range(T)]) MC_samples = [model(Variable(torch.FloatTensor(X_))) for _ in range(T)] means = torch.stack([tup[0] for tup in MC_samples ]).view(T, X_.shape[0]).data.numpy() logvar = torch.stack([tup[1] for tup in MC_samples ]).view(T, X_.shape[0]).data.numpy() # Yt_hat = Yt_hat * self.std_y_train + self.mean_y_train # T x N TODO check with Adam aleatoric_uncertainty = np.exp(logvar).mean(0) epistemic_uncertainty = np.var(means, 0).mean(0) MC_pred_mean = np.mean(means, 0) # N x 1 Second_moment = np.mean(means**2, 0) # N x 1 MC_pred_var = Second_moment + epistemic_uncertainty - (MC_pred_mean**2) m = MC_pred_mean.flatten() if MC_pred_var.shape[0] == 1: v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf) else: v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf) v[np.where((v < np.finfo(v.dtype).eps) & (v > -np.finfo(v.dtype).eps))] = 0 if self.normalize_output: m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std) v *= self.y_std**2 m = m.flatten() v = v.flatten() return m, v
def train(self, x_train: np.ndarray, y_train: np.ndarray): """ Train a BNN using input datapoints `x_train` with corresponding labels `y_train`. Parameters ---------- x_train : numpy.ndarray (N, D) Input training datapoints. y_train : numpy.ndarray (N,) Input training labels. """ logging.debug("Training started.") logging.debug("Clearing list of sampled weights.") self.sampled_weights.clear() num_datapoints, input_dimensionality = x_train.shape logging.debug("Processing %d training datapoints " " with % dimensions each." % (num_datapoints, input_dimensionality)) x_train_ = np.asarray(x_train) if self.normalize_input: logging.debug("Normalizing training datapoints to " " zero mean and unit variance.") x_train_, self.x_mean, self.x_std = zero_mean_unit_var_normalization( x_train) y_train_ = np.asarray(y_train) if self.normalize_output: logging.debug( "Normalizing training labels to zero mean and unit variance.") y_train_, self.y_mean, self.y_std = zero_mean_unit_var_normalization( y_train) train_loader = infinite_dataloader( data_utils.DataLoader(data_utils.TensorDataset( torch.from_numpy(x_train_).float(), torch.from_numpy(y_train_).float()), batch_size=self.batch_size)) self.model = get_network(input_dimensionality=input_dimensionality) sampler = AdaptiveSGHMC(self.model.parameters(), scale_grad=num_datapoints) batch_generator = islice(enumerate(train_loader), self.num_steps) for epoch, (x_batch, y_batch) in batch_generator: sampler.zero_grad() loss = nll(input=self.model(x_batch), target=y_batch) loss.backward() sampler.step() if self._keep_sample(epoch): logging.debug("Recording sample, epoch = %d " % (epoch)) weights = self.network_weights logging.debug("Sampled weights:\n%s" % str(weights)) self.sampled_weights.append(weights) self.is_trained = True return self