def test_fixed_noise_gaussian_likelihood(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): noise = 0.1 + torch.rand(4, device=device, dtype=dtype) lkhd = FixedNoiseGaussianLikelihood(noise=noise) # test basics self.assertIsInstance(lkhd.noise_covar, FixedGaussianNoise) self.assertTrue(torch.equal(noise, lkhd.noise)) new_noise = 0.1 + torch.rand(4, device=device, dtype=dtype) lkhd.noise = new_noise self.assertTrue(torch.equal(lkhd.noise, new_noise)) # test __call__ mean = torch.zeros(4, device=device, dtype=dtype) covar = DiagLazyTensor(torch.ones(4, device=device, dtype=dtype)) mvn = MultivariateNormal(mean, covar) out = lkhd(mvn) self.assertTrue(torch.allclose(out.variance, 1 + new_noise)) # things should break if dimensions mismatch mean = torch.zeros(5, device=device, dtype=dtype) covar = DiagLazyTensor(torch.ones(5, device=device, dtype=dtype)) mvn = MultivariateNormal(mean, covar) with self.assertWarns(UserWarning): lkhd(mvn) # test __call__ w/ observation noise obs_noise = 0.1 + torch.rand(5, device=device, dtype=dtype) out = lkhd(mvn, noise=obs_noise) self.assertTrue(torch.allclose(out.variance, 1 + obs_noise))
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False): # This test throws a warning because the fixed noise likelihood gets the wrong input warnings.simplefilter("ignore", GPInputWarning) train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = FixedNoiseGaussianLikelihood(torch.ones(11) * 0.001) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.rbf_covar_module.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def setUp(self, batched=False, learnable=False): torch.set_default_tensor_type(torch.DoubleTensor) torch.random.manual_seed(10) train_x = torch.rand(10, 2) train_y = torch.sin(2 * train_x[:, 0] + 3 * train_x[:, 1]).unsqueeze(-1) train_y_var = 0.1 * torch.ones_like(train_y) if batched: train_y = torch.cat( ( train_y, train_y + 0.3 * torch.randn_like(train_y), train_y + 0.3 * torch.randn_like(train_y), ), dim=1 ) train_y_var = train_y_var.repeat(1, 3) model = FixedNoiseOnlineSKIGP( train_inputs=train_x, train_targets=train_y, train_noise_term=train_y_var, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), grid_size=5, learn_additional_noise=learnable ) equivalent_model = SingleTaskGP( train_X=train_x, train_Y=train_y, likelihood=FixedNoiseGaussianLikelihood(train_y_var.t(), learn_additional_noise=learnable), covar_module = deepcopy(model.covar_module) ) equivalent_model.mean_module = ZeroMean() return model, equivalent_model, train_x, train_y
def create_model(self, fixed_noise=False): data = TestExactGP.create_test_data(self) likelihood, labels = TestExactGP.create_likelihood_and_labels(self) if fixed_noise: noise = 0.1 + 0.2 * torch.rand_like(labels) likelihood = FixedNoiseGaussianLikelihood(noise) return TestExactGP.create_model(self, data, labels, likelihood)
def test_fixed_noise_fantasy_updates_batch(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) noise = torch.full_like(train_y, 2e-4) test_noise = torch.full_like(test_y, 3e-4) likelihood = FixedNoiseGaussianLikelihood(noise) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() with gpytorch.settings.fast_pred_var(): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x), noise=test_noise) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) gp_model.likelihood.noise_covar = FixedGaussianNoise(noise=noise[:5]) likelihood(gp_model(test_x), noise=test_noise) fantasy_x = train_x[5:].clone().unsqueeze(0).unsqueeze(-1).repeat(3, 1, 1).requires_grad_(True) fantasy_y = train_y[5:].unsqueeze(0).repeat(3, 1) fant_model = gp_model.get_fantasy_model(fantasy_x, fantasy_y, noise=noise[5:].unsqueeze(0).repeat(3, 1)) fant_function_predictions = likelihood(fant_model(test_x), noise=test_noise) self.assertAllClose(test_function_predictions.mean, fant_function_predictions.mean[0], atol=1e-4) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None)
def test_kissgp_gp_fast_pred_var(self): with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False): train_x, train_y, test_x, test_y = make_data() likelihood = FixedNoiseGaussianLikelihood(torch.ones(100) * 0.001) gp_model = GPRegressionModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.initialize(noise=3.) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.noise var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05)
def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to be ridiculous likelihood = FixedNoiseGaussianLikelihood(torch.ones(11) * 1e-8) gp_model = ExactGPModel(train_x, train_y, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.rbf_covar_module.initialize(lengthscale=exp(-6)) gp_model.mean_module.initialize(constant=0) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.variance), 5e-3) # It shouldn't fit much else though test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess(torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4)
def test_kissgp_gp_mean_abs_error_cuda(self): if not torch.cuda.is_available(): return with least_used_cuda_device(): train_x, train_y, test_x, test_y = make_data(cuda=True) likelihood = FixedNoiseGaussianLikelihood(torch.ones(100) * 0.001).cuda() gp_model = GPRegressionModel(train_x, train_y, likelihood).cuda() mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.squeeze().item(), 0.02)
def test_kissgp_gp_mean_abs_error(self): # This test throws a warning because the fixed noise likelihood gets the wrong input warnings.simplefilter("ignore", GPInputWarning) train_x, train_y, test_x, test_y = make_data() likelihood = FixedNoiseGaussianLikelihood(torch.ones(100) * 0.001) gp_model = GPRegressionModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def __init__(self, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None: # Error checking: assert train_Y.dim() == 1, "train_Y is required to be 1D" self._validate_tensor_args(X=train_X, Y=train_Y[:,None]) # Only for this function, train_Y must be 2D (this must be a bug in botorch) # Dimensionality of the input space: self.dim = train_X.shape[-1] # Model identity: self.iden = "GP_model_{0:s}".format(which_type) # Likelihood: noise_std = options["noise_std_obj"] lik = FixedNoiseGaussianLikelihood(noise=torch.full_like(train_Y, noise_std**2)) # Initialize parent class: super().__init__(train_X, train_Y, lik) # Obtain hyperprior for lengthscale and outputscale: # NOTE: The mean (zero) and the model noise are fixed lengthscale_prior, outputscale_prior = extract_prior(options,which_type) # Initialize prior mean: # self.mean_module = ConstantMean() self.mean_module = ZeroMean() # Initialize covariance function: # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2)) self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=outputscale_prior) # Make sure we're on the right device/dtype self.to(train_X) # Instantiate the gradient model: self.model_grad = GPmodelWithGrad(dim=self.dim)
def create_likelihood(self): noise = 0.1 + torch.rand(2, 3, 5) return FixedNoiseGaussianLikelihood(noise=noise)
def run(obs, params_true, device='cpu'): device = safe_cast(torch.device, device) dx, NX = PARAM_DX, PARAM_MESH_RES_SPACE ts = torch.arange(PARAM_MESH_RES_TIME, device=device) priors_uniform = priors() y = torch.tensor(obs['Ss'], device=device) def simulate(params): _theta = {'a':params[0], 'b':params[1], 'k':params[2]} sim_pde = LandauCahnHilliard( params = _theta, M = PARAM_DT, dx = dx, device = device ) loss_fn = Evaluator(sim_pde, loss) return loss_fn pgd = lhs(3, samples=PARAM_INIT_EVAL) # calculate initial samples from latin hypercube xs, ys = [],[] for j in range(PARAM_INIT_EVAL): xk = torch.stack( [(priors_uniform[k][1]-priors_uniform[k][0])*torch.tensor(pgd[j,i], device=device, dtype=torch.float32) + priors_uniform[k][0] for i,k in enumerate(('a','b','k'))], 0 ) xs.append(xk) # ell, params = simulate(params) phi0 = (0.2 * torch.rand((NX, NX), device=device)).view(-1,1,NX,NX) with torch.no_grad(): for j in range(PARAM_INIT_EVAL): params = xs[j] loss_fn = simulate(params) ys.append(loss_fn(phi0, ts, y, dx)) x_init, y_init = torch.stack(xs), torch.stack(ys) print(y_init) N = PARAM_SEARCH_RES x_eval = torch.cat([x.reshape(-1,1) for x in torch.meshgrid( *[torch.linspace(priors_uniform[k][0], priors_uniform[k][1], N)\ for k in priors_uniform] )],1) x_train = x_init y_train = y_init for i in range(PARAM_MAX_EVAL - PARAM_INIT_EVAL): for ntry in range(5): model = ExactGPModel( x_train, y_train, FixedNoiseGaussianLikelihood( noise=1e-2*torch.ones(len(x_train)) ) ) try: optimise(model, method='adam', max_iter=1000) break except Exception as err: print('attempt %d failed' % ntry) if ntry == 4: raise err u = acq(y_train.min(), model, x_eval) xn = x_eval[u.argmax(),:] x_eval = torch.cat([x_eval[0:u.argmax(),:], x_eval[u.argmax()+1:,:]]) # print(x_eval.shape) loss_fn = simulate(xn) yn = loss_fn(phi0, ts, y, dx) x_train = torch.cat([x_train, xn.reshape(1,-1)]) y_train = torch.stack([*y_train, yn.detach()]) print(i) return (x_train, y_train)
def __init__(self, dim: int, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None: self.dim = dim if len(train_Y) == 0: # No data case train_X = None train_Y = None else: # Error checking: assert train_Y.dim() == 1, "train_Y is required to be 1D" self._validate_tensor_args( X=train_X, Y=train_Y[:, None] ) # Only for this function, train_Y must be 2D (this must be a bug in botorch) print("\n") logger.info("### Initializing GP model for objective f(x) ###") # Likelihood: noise_std = options.hyperpars.noise_std.value if train_Y is not None: lik = FixedNoiseGaussianLikelihood( noise=torch.full_like(train_Y, noise_std**2)) else: lik = FixedNoiseGaussianLikelihood( noise=torch.tensor([noise_std**2], device=device, dtype=dtype)) # Initialize parent class: super().__init__(train_X, train_Y, lik) # # Obtain hyperprior for lengthscale and outputscale: # # NOTE: The mean (zero) and the model noise are fixed # lengthscale_prior, outputscale_prior = extract_prior(options.hyperpriors) # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF hyperpriors = dict( lengthscales=eval(options.hyperpars.lenthscales.prior), outputscale=eval(options.hyperpars.outputscale.prior)) # Index hyperparameters: self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)), outputscale=[self.dim]) self.dim_hyperpars = sum( [len(val) for val in self.idx_hyperpars.values()]) # Get bounds: self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors) logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds)) # Initialize prior mean: # self.mean_module = ConstantMean() self.mean_module = ZeroMean() # Initialize covariance function: # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original # base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2)) base_kernel = MaternKernel(nu=2.5, ard_num_dims=self.dim, lengthscale=0.1 * torch.ones(self.dim)) self.covar_module = ScaleKernel(base_kernel=base_kernel) self.disp_info_scipy_opti = True # self.method = "L-BFGS-B" self.method = "LN_BOBYQA" # self.method = 'trust-constr' # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors): hyperpars_sample = self._sample_hyperparameters_within_bounds( Nsamples=1).squeeze(0) self.covar_module.outputscale = hyperpars_sample[ self.idx_hyperpars["outputscale"]] self.covar_module.base_kernel.lengthscale = hyperpars_sample[ self.idx_hyperpars["lengthscales"]] self.noise_std = options.hyperpars.noise_std.value # The evaluation noise is fixed, and given by the user # Initialize marginal log likelihood for the GPCR model. # mll_objective is callable # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine self.mll_objective = MLLGP(model_gp=self, likelihood_gp=self.likelihood, hyperpriors=hyperpriors) # Define nlopt optimizer: self.opti_hyperpars = OptimizationNonLinear( dim=self.dim_hyperpars, fun_obj=self.mll_objective, algo_str=self.method, tol_x=1e-4, Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals, bounds=self.hyperpars_bounds, what2optimize_str="GP hyperparameters") # Make sure we're on the right device/dtype if train_Y is not None: self.to(train_X) self.Nrestarts = options.hyperpars.optimization.Nrestarts self._update_hyperparameters() self.eval()
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data(args.data_loc, args.num_init, args.num_total, test_is_year=False, seed=args.seed) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) likelihood = FixedNoiseGaussianLikelihood(noise=init_y_var) grid_pts = create_grid(grid_sizes=[30, 30], grid_bounds=torch.tensor([[0., 1.], [0., 1.]])) induc_points = torch.cat( [x.reshape(-1, 1) for x in torch.meshgrid(grid_pts)], dim=-1) model = VariationalGPModel( inducing_points=induc_points, mean_module=gpytorch.means.ZeroMean(), covar_module=ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), streaming=True, likelihood=likelihood, beta=args.beta, learn_inducing_locations=args.learn_inducing, ).to(device) mll = VariationalELBO(model.likelihood, model, beta=args.beta, num_data=args.num_init) print("---- Fitting initial model ----") start = time.time() model.train() model.zero_grad() optimizer = torch.optim.Adam(model.parameters(), lr=10 * args.lr_init) model, loss = fit_variational_model(mll, model, optimizer, init_x, init_y, maxiter=1000) end = time.time() print("Elapsed fitting time: ", end - start) print("--- Now computing initial RMSE") model.eval() with gpytorch.settings.skip_posterior_variances(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean - test_y)**2).mean().sqrt() print("---- Initial RMSE: ", pred_rmse.item()) all_outputs = [] start_ind = init_x.shape[0] end_ind = int(start_ind + args.batch_size) current_x = init_x current_y = init_y current_y_var = init_y_var for step in range(args.num_steps): if step > 0 and step % 25 == 0: print("Beginning step ", step) total_time_step_start = time.time() if step > 0: print("---- Fitting model ----") start = time.time() model.train() model.zero_grad() model.likelihood = FixedNoiseGaussianLikelihood(current_y_var) mll = VariationalELBO(model.likelihood, model, beta=args.beta, num_data=args.num_init) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr_init * 0.99**step) model, loss = fit_variational_model(mll, model, optimizer, current_x, current_y, maxiter=300) model.zero_grad() end = time.time() print("Elapsed fitting time: ", end - start) # print("Named parameters: ", list(model.named_parameters())) if args.acqf == "max_post_var" and not args.random: candidates, acq_value = generate_candidates(model, args.batch_size, device, maxiter=300) elif args.acqf == "max_test_var" and not args.random: model.eval() vals, inds = model(test_x).variance.sort() acq_value = vals[-args.batch_size:].mean().detach() candidates = test_x[inds[-args.batch_size:]] else: candidates = torch.rand(args.batch_size, train_x.shape[-1], device=device, dtype=train_x.dtype) acq_value = torch.zeros(1) model.eval() _ = model(test_x[:10]) # to init caches print("---- Finished optimizing; now querying dataset ---- ") with torch.no_grad(): covar_dists = model.covar_module(candidates, train_x) nearest_points = covar_dists.evaluate().argmax(dim=-1) new_x = train_x[nearest_points] new_y = train_y[nearest_points] new_y_var = train_y_var[nearest_points] todrop = torch.tensor( [x in nearest_points for x in range(train_x.shape[0])]) train_x, train_y, train_y_var = train_x[~todrop], train_y[ ~todrop], train_y_var[~todrop] print("New train_x shape", train_x.shape) print("--- Now updating model with simulator ----") current_x = torch.cat((current_x, new_x), dim=0) current_y = torch.cat((current_y, new_y), dim=0) current_y_var = torch.cat((current_y_var, new_y_var), dim=0) print("--- Now computing updated RMSE") model.eval() test_pred = model(test_x) pred_rmse = ((test_pred.mean.view(-1) - test_y.view(-1))**2).mean().sqrt() pred_avg_variance = test_pred.variance.mean() total_time_step_elapsed_time = time.time() - total_time_step_start step_output_list = [ total_time_step_elapsed_time, acq_value.item(), pred_rmse.item(), pred_avg_variance.item(), loss.item() ] print("Step RMSE: ", pred_rmse) all_outputs.append(step_output_list) start_ind = end_ind end_ind = int(end_ind + args.batch_size) output_dict = { "model_state_dict": model.cpu().state_dict(), "queried_points": { 'x': current_x, 'y': current_y }, "results": DataFrame(all_outputs) } torch.save(output_dict, args.output)