def predict(self, context_x, context_y, test_x, return_tensor=False, num_steps_eval=None): """ adapts the initial (MAML) parameters based on the context data and compute prediction for test_x Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) query input data of shape (n_samples, ndim_x) return_tensor: (bool) whether return a torch tensor or a numpy array num_steps_eval: (int) number of adaptation steps Returns: pred_mean: predicted mean corresponding to test_x """ context_x, context_y = _handle_input_dimensionality( context_x, context_y) test_x = _handle_input_dimensionality(test_x) assert test_x.shape[1] == context_x.shape[1] # normalize data and convert to tensor context_x, context_y = self._prepare_data_per_task(context_x, context_y, flatten_y=False) test_x = self._normalize_data(X=test_x, Y=None) test_x = torch.from_numpy(test_x).float().to(device) # perform adaptation steps on context data adapted_params = self._eval_steps(context_x, context_y, num_steps_eval=num_steps_eval) with torch.no_grad(): y_pred = self.nn.forward_parametrized(test_x, adapted_params) y_pred = y_pred * torch.Tensor( self.y_std).float()[None, :] + torch.Tensor( self.y_mean).float()[None, :] y_pred2 = self.nn.forward_parametrized(test_x, self.initial_params) y_pred2 = y_pred2 * torch.Tensor( self.y_std).float()[None, :] + torch.Tensor( self.y_mean).float()[None, :] if return_tensor: return y_pred else: return y_pred.cpu().numpy(), y_pred2.cpu().numpy()
def predict(self, context_x, context_y, test_x, return_density=False): """ Performs posterior inference (target training) with (context_x, context_y) as training data and then computes the predictive distribution of the targets p(y|test_x, test_context_x, context_y) in the test points Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) query input data of shape (n_samples, ndim_x) return_density: (bool) whether to return result as mean and std ndarray or as MultivariateNormal pytorch object Returns: (pred_mean, pred_std) predicted mean and standard deviation corresponding to p(t|test_x, test_context_x, context_y) """ context_x, context_y = _handle_input_dimensionality( context_x, context_y) test_x = _handle_input_dimensionality(test_x) assert test_x.shape[1] == context_x.shape[1] # normalize data and convert to tensor context_x, context_y = self._prepare_data_per_task( context_x, context_y) test_x = self._normalize_data(X=test_x, Y=None) test_x = torch.from_numpy(test_x).float().to(device) with torch.no_grad(): # compute posterior given the context data gp_model = LearnedGPRegressionModel( context_x, context_y, self.likelihood, learned_kernel=self.nn_kernel_map, learned_mean=self.nn_mean_fn, covar_module=self.covar_module, mean_module=self.mean_module) gp_model.eval() self.likelihood.eval() pred_dist = self.likelihood(gp_model(test_x)) pred_dist_transformed = AffineTransformedDistribution( pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) if return_density: return pred_dist_transformed else: pred_mean = pred_dist_transformed.mean pred_std = pred_dist_transformed.stddev return pred_mean.cpu().numpy(), pred_std.cpu().numpy()
def predict(self, context_x, context_y, test_x, return_density=False): """ computes the predictive distribution of the targets p(t|test_x, test_context_x, context_y) Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) query input data of shape (n_samples, ndim_x) return_density: (bool) whether to return result as mean and std ndarray or as MultivariateNormal pytorch object Returns: (pred_mean, pred_std) predicted mean and standard deviation corresponding to p(t|test_x, test_context_x, context_y) """ train_old = self.model.training self.model.eval() context_x, context_y = _handle_input_dimensionality( context_x, context_y) test_x = _handle_input_dimensionality(test_x) assert test_x.shape[1] == context_x.shape[1] # normalize data and convert to tensor context_x, context_y = self._prepare_data_per_task(context_x, context_y, flatten_y=False) test_x = self._normalize_data(X=test_x, Y=None) test_x = torch.from_numpy(test_x).float().to(device) context_x = torch.unsqueeze(context_x, 0) context_y = torch.unsqueeze(context_y, 0) test_x = torch.unsqueeze(test_x, 0) with torch.no_grad(): # compute posterior given the context data pred_dist = self.model(context_x, context_y, test_x) pred_dist_transformed = AffineTransformedDistribution( pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) if train_old: self.model.train() if return_density: return pred_dist_transformed else: pred_mean = pred_dist_transformed.mean pred_std = pred_dist_transformed.stddev return pred_mean.cpu().numpy(), pred_std.cpu().numpy()
def predict(self, context_x, context_y, test_x, n_iter_meta_test=3000, return_density=False): """ computes the predictive distribution of the targets p(t|test_x, test_context_x, context_y) Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) query input data of shape (n_samples, ndim_x) n_posterior_samples: (int) number of samples from posterior to average over mode: (std) either of ['Bayes' , 'MAP'] return_density: (bool) whether to return result as mean and std ndarray or as MultivariateNormal pytorch object Returns: (pred_mean, pred_std) predicted mean and standard deviation corresponding to p(t|test_x, test_context_x, context_y) """ context_x, context_y = _handle_input_dimensionality( context_x, context_y) test_x = _handle_input_dimensionality(test_x) assert test_x.shape[1] == context_x.shape[1] # meta-test training / inference task_dict = self._meta_test_inference([(context_x, context_y)], verbose=True, log_period=500, n_iter=n_iter_meta_test)[0] with torch.no_grad(): # meta-test evaluation test_x = self._normalize_data(X=test_x, Y=None) test_x = torch.from_numpy(test_x).float().to(device) gp_model = task_dict["gp_model"] gp_model.eval() pred_dist = self.likelihood(gp_model(test_x)) pred_dist = AffineTransformedDistribution( pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) if return_density: return pred_dist else: pred_mean = pred_dist.mean.cpu().numpy() pred_std = pred_dist.stddev.cpu().numpy() return pred_mean, pred_std
def eval(self, context_x, context_y, test_x, test_y, num_steps_eval=None): """ Computes the rmse on the test data after adapting the parameters to the context data Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) test input data of shape (n_samples, ndim_x) test_y: (ndarray) test target data of shape (n_samples, ndim_y) Returns: rmse """ test_x, test_y = _handle_input_dimensionality(test_x, test_y) test_y_tensor = torch.from_numpy(test_y).float().to(device) y_pred = self.predict(context_x, context_y, test_x, return_tensor=True, num_steps_eval=num_steps_eval) # print(self.loss_fn(y_pred, test_y_tensor).item()) rmse = torch.mean( torch.sum(torch.pow(y_pred - test_y_tensor, 2), dim=-1)).sqrt() return rmse.cpu().item()
def eval(self, test_x, test_t, **kwargs): """ Computes the average test log likelihood and the rmse on test data Args: test_x: (ndarray) test input data of shape (n_samples, ndim_x) test_t: (ndarray) test target data of shape (n_samples, 1) Returns: (avg_log_likelihood, rmse) """ # convert to tensors test_x, test_t = _handle_input_dimensionality(test_x, test_t) test_t_tensor = torch.from_numpy( test_t).contiguous().float().flatten().to(device) with torch.no_grad(): pred_dist = self.predict(test_x, return_density=True, *kwargs) avg_log_likelihood = pred_dist.log_prob( test_t_tensor) / test_t_tensor.shape[0] rmse = torch.mean(torch.pow(pred_dist.mean - test_t_tensor, 2)).sqrt() pred_dist_vect = self._vectorize_pred_dist(pred_dist) calibr_error = self._calib_error(pred_dist_vect, test_t_tensor) return avg_log_likelihood.cpu().item(), rmse.cpu().item( ), calibr_error.cpu().item()
def eval(self, context_x, context_y, test_x, test_y, flatten_y=True, **kwargs): """ Performs posterior inference (target training) with (context_x, context_y) as training data and then computes the average test log likelihood, rmse and calibration error on (test_x, test_y) Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) test input data of shape (n_samples, ndim_x) test_y: (ndarray) test target data of shape (n_samples, 1) Returns: (avg_log_likelihood, rmse, calibr_error) """ context_x, context_y = _handle_input_dimensionality( context_x, context_y) test_x, test_y = _handle_input_dimensionality(test_x, test_y) if flatten_y: test_y_tensor = torch.from_numpy(test_y).float().flatten().to( device) else: test_y_tensor = torch.unsqueeze( torch.from_numpy(test_y).float().to(device), dim=0) with torch.no_grad(): pred_dist = self.predict(context_x, context_y, test_x, return_density=True, **kwargs) avg_log_likelihood = torch.mean( pred_dist.log_prob(test_y_tensor) / test_y_tensor.shape[0]) rmse = torch.mean(torch.pow(pred_dist.mean - test_y_tensor, 2)).sqrt() pred_dist_vect = self._vectorize_pred_dist(pred_dist) calibr_error = self._calib_error(pred_dist_vect, test_y_tensor) return avg_log_likelihood.cpu().item(), rmse.cpu().item( ), calibr_error.cpu().item()
def predict(self, context_x, context_y, test_x, n_posterior_samples=100, mode='Bayes', return_density=False): """ computes the predictive distribution of the targets p(t|test_x, test_context_x, context_y) Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) query input data of shape (n_samples, ndim_x) n_posterior_samples: (int) number of samples from posterior to average over mode: (std) either of ['Bayes' , 'MAP'] return_density: (bool) whether to return result as mean and std ndarray or as MultivariateNormal pytorch object Returns: (pred_mean, pred_std) predicted mean and standard deviation corresponding to p(t|test_x, test_context_x, context_y) """ assert mode in ['bayes', 'Bayes', 'MAP', 'map'] context_x, context_y = _handle_input_dimensionality(context_x, context_y) test_x = _handle_input_dimensionality(test_x) assert test_x.shape[1] == context_x.shape[1] # normalize data and convert to tensor context_x, context_y = self._prepare_data_per_task(context_x, context_y) test_x = self._normalize_data(X=test_x, Y=None) test_x = torch.from_numpy(test_x).float().to(device) with torch.no_grad(): if mode == 'Bayes' or mode == 'bayes': pred_dist = self.get_pred_dist(context_x, context_y, test_x, n_post_samples=n_posterior_samples) pred_dist = AffineTransformedDistribution(pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) pred_dist = EqualWeightedMixtureDist(pred_dist, batched=True) else: pred_dist = self.get_pred_dist_map(context_x, context_y, test_x) pred_dist = AffineTransformedDistribution(pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) if return_density: return pred_dist else: pred_mean = pred_dist.mean.cpu().numpy() pred_std = pred_dist.stddev.cpu().numpy() return pred_mean, pred_std
def predict(self, context_x, context_y, test_x, return_density=False): """ Performs posterior inference (target training) with (context_x, context_y) as training data and then computes the predictive distribution of the targets p(y|test_x, test_context_x, context_y) in the test points Args: context_x: (ndarray) context input data for which to compute the posterior context_y: (ndarray) context targets for which to compute the posterior test_x: (ndarray) query input data of shape (n_samples, ndim_x) return_density: (bool) whether to return result as mean and std ndarray or as MultivariateNormal pytorch object Returns: (pred_mean, pred_std) predicted mean and standard deviation corresponding to p(t|test_x, test_context_x, context_y) """ context_x, context_y = _handle_input_dimensionality( context_x, context_y) test_x = _handle_input_dimensionality(test_x) assert test_x.shape[1] == context_x.shape[1] # normalize data and convert to tensor context_x, context_y = self._prepare_data_per_task( context_x, context_y) test_x = self._normalize_data(X=test_x, Y=None) test_x = torch.from_numpy(test_x).float().to(device) with torch.no_grad(): pred_dist = self.get_pred_dist(context_x, context_y, test_x) pred_dist = AffineTransformedDistribution( pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) pred_dist = EqualWeightedMixtureDist(pred_dist, batched=True) if return_density: return pred_dist else: pred_mean = pred_dist.mean.cpu().numpy() pred_std = pred_dist.stddev.cpu().numpy() return pred_mean, pred_std
def _check_meta_data_shapes(self, meta_train_data): for i in range(len(meta_train_data)): meta_train_data[i] = _handle_input_dimensionality( *meta_train_data[i]) self.input_dim = meta_train_data[0][0].shape[-1] self.output_dim = meta_train_data[0][1].shape[-1] assert all([ self.input_dim == train_x.shape[-1] and self.output_dim == train_t.shape[-1] for train_x, train_t in meta_train_data ])
def _prepare_data_per_task(self, x_data, y_data, flatten_y=True): # a) make arrays 2-dimensional x_data, y_data = _handle_input_dimensionality(x_data, y_data) # b) normalize data x_data, y_data = self._normalize_data(x_data, y_data) if flatten_y: assert y_data.shape[1] == 1 y_data = y_data.flatten() # c) convert to tensors x_tensor = torch.from_numpy(x_data).float().to(device) y_tensor = torch.from_numpy(y_data).float().to(device) return x_tensor, y_tensor
def _compute_normalization_stats(self, meta_train_tuples): X_stack, Y_stack = list( zip(*[ _handle_input_dimensionality(x_train, y_train) for x_train, y_train in meta_train_tuples ])) X, Y = np.concatenate(X_stack, axis=0), np.concatenate(Y_stack, axis=0) if self.normalize_data: self.x_mean, self.y_mean = np.mean(X, axis=0), np.mean(Y, axis=0) self.x_std, self.y_std = np.std(X, axis=0) + 1e-8, np.std( Y, axis=0) + 1e-8 else: self.x_mean, self.y_mean = np.zeros(X.shape[1]), np.zeros( Y.shape[1]) self.x_std, self.y_std = np.ones(X.shape[1]), np.ones(Y.shape[1])
def _initial_data_handling(self, train_x, train_t): train_x, train_t = _handle_input_dimensionality(train_x, train_t) self.input_dim, self.output_dim = train_x.shape[-1], train_t.shape[-1] self.n_train_samples = train_x.shape[0] # b) normalize data to exhibit zero mean and variance self._compute_normalization_stats(train_x, train_t) train_x_normalized, train_t_normalized = self._normalize_data( train_x, train_t) # c) Convert the data into pytorch tensors self.train_x = torch.from_numpy( train_x_normalized).contiguous().float().to(device) self.train_t = torch.from_numpy( train_t_normalized).contiguous().float().to(device) return self.train_x, self.train_t
def eval_datasets(self, test_tuples, n_iter_meta_test=3000, **kwargs): """ Performs meta-testing on multiple tasks / datasets. Computes the average test log likelihood, the rmse and the calibration error over multiple test datasets Args: test_tuples: list of test set tuples, i.e. [(test_context_x_1, test_context_y_1, test_x_1, test_y_1), ...] Returns: (avg_log_likelihood, rmse, calibr_error) """ assert (all([len(valid_tuple) == 4 for valid_tuple in test_tuples])) # meta-test training / inference context_tuples = [test_tuple[:2] for test_tuple in test_tuples] task_dicts = self._meta_test_inference(context_tuples, verbose=True, log_period=500, n_iter=n_iter_meta_test) # meta-test evaluation ll_list, rmse_list, calibr_err_list = [], [], [] for task_dict, test_tuple in zip(task_dicts, test_tuples): # data prep _, _, test_x, test_y = test_tuple test_x, test_y = _handle_input_dimensionality(test_x, test_y) test_x_tensor = torch.from_numpy(self._normalize_data(X=test_x, Y=None)).float().to(device) test_y_tensor = torch.from_numpy(test_y).float().flatten().to(device) # get predictive dist gp_model = task_dict["gp_model"] gp_model.eval() self.likelihood.eval() pred_dist = self.likelihood(gp_model(test_x_tensor)) pred_dist = AffineTransformedDistribution(pred_dist, normalization_mean=self.y_mean, normalization_std=self.y_std) # compute eval metrics ll_list.append(torch.mean(pred_dist.log_prob(test_y_tensor) / test_y_tensor.shape[0]).cpu().item()) rmse_list.append(torch.mean(torch.pow(pred_dist.mean - test_y_tensor, 2)).sqrt().cpu().item()) pred_dist_vect = self._vectorize_pred_dist(pred_dist) calibr_err_list.append(self._calib_error(pred_dist_vect, test_y_tensor).cpu().item()) return np.mean(ll_list), np.mean(rmse_list), np.mean(calibr_err_list)