def build_dist_dict(noise_prior, outputscale_prior, lengthscale_prior): """ Build a dictionary of distributions to sample for random restarts. """ if noise_prior == None: noise_dist = GammaPrior(1.5, 0.5) else: noise_dist = noise_prior[0] if outputscale_prior == None: output_dist = GammaPrior(3, 0.5) else: output_dist = outputscale_prior[0] if lengthscale_prior == None: lengthscale_dist = GammaPrior(3, 0.5) else: lengthscale_dist = lengthscale_prior[0] distributions = { 'likelihood.noise_covar.raw_noise': noise_dist, 'covar_module.raw_outputscale': output_dist, 'covar_module.base_kernel.raw_lengthscale': lengthscale_dist } return distributions
def query_covar(covar_name: str, scale: bool, outputscale: float, lscales: Tensor, **kwargs) -> Kernel: lengthscale_prior = GammaPrior(3.0, 6.0) kws = dict( lengthscale_prior=lengthscale_prior, ard_num_dims=lscales.shape[-1], ) if covar_name.lower()[:6] == 'matern': kernel_class = MaternKernel if covar_name[-2:] == '52': kws['nu'] = 2.5 elif covar_name[-2:] == '32': kws['nu'] = 1.5 elif covar_name[-2:] == '12': kws['nu'] = .5 else: raise ValueError(covar_name) elif covar_name.lower() == 'rbf': kernel_class = RBFKernel else: raise ValueError(covar_name) kws.update(**kwargs) kernel = kernel_class(**kws) kernel.lengthscale = lscales if scale: kernel = ScaleKernel(kernel, outputscale_prior=GammaPrior(2.0, 0.15)) kernel.outputscale = outputscale return kernel
def extract_prior(cfg_node: dict, which_type: str): assert which_type in ["obj","cons"] if cfg_node["lengthscale_prior_type"] == "box": # par1: low, par2: high lengthscale_prior = SmoothedBoxPrior(cfg_node["lengthscale_prior_par1_{0:s}".format(which_type)], cfg_node["lengthscale_prior_par2_{0:s}".format(which_type)], sigma=0.001) elif cfg_node["lengthscale_prior_type"] == "gamma": # par1: alpha (concentration), par2: beta (rate) lengthscale_prior = GammaPrior( concentration=cfg_node["lengthscale_prior_par1_{0:s}".format(which_type)], rate=cfg_node["lengthscale_prior_par2_{0:s}".format(which_type)]) elif cfg_node["lengthscale_prior_type"] == "gaussian": lengthscale_prior = NormalPrior(loc=cfg_node["lengthscale_prior_par1_{0:s}".format(which_type)], scale=cfg_node["lengthscale_prior_par2_{0:s}".format(which_type)]) else: lengthscale_prior = None print("Using no prior for the length scale") if cfg_node["outputscale_prior_type"] == "box": # par1: low, par2: high outputscale_prior = SmoothedBoxPrior(cfg_node["outputscale_prior_par1_{0:s}".format(which_type)], cfg_node["outputscale_prior_par2_{0:s}".format(which_type)], sigma=0.001) elif cfg_node["outputscale_prior_type"] == "gamma": # par1: alpha (concentration), par2: beta (rate) outputscale_prior = GammaPrior( concentration=cfg_node["outputscale_prior_par1_{0:s}".format(which_type)], rate=cfg_node["outputscale_prior_par2_{0:s}".format(which_type)]) elif cfg_node["outputscale_prior_type"] == "gaussian": outputscale_prior = NormalPrior(loc=cfg_node["outputscale_prior_par1_{0:s}".format(which_type)], scale=cfg_node["outputscale_prior_par2_{0:s}".format(which_type)]) else: outputscale_prior = None print("Using no prior for the length scale") return lengthscale_prior, outputscale_prior
def extract_prior(cfg): if cfg.lengthscales.which == "box": lengthscale_prior = SmoothedBoxPrior(cfg.lengthscales.prior_box.lb, cfg.lengthscales.prior_box.ub, sigma=0.001) elif cfg.lengthscales.which == "gamma": lengthscale_prior = GammaPrior( concentration=cfg.lengthscales.prior_gamma.concentration, rate=cfg.lengthscales.prior_gamma.rate) elif cfg.lengthscales.which == "gaussian": lengthscale_prior = NormalPrior( loc=cfg.lengthscales.prior_gaussian.loc, scale=cfg.lengthscales.prior_gaussian.scale) else: lengthscale_prior = None print("Using no prior for the lengthscale") if cfg.outputscale.which == "box": outputscale_prior = SmoothedBoxPrior(cfg.outputscale.prior_box.lb, cfg.outputscale.prior_box.ub, sigma=0.001) elif cfg.outputscale.which == "gamma": outputscale_prior = GammaPrior( concentration=cfg.outputscale.prior_gamma.concentration, rate=cfg.outputscale.prior_gamma.rate) elif cfg.outputscale.which == "gaussian": outputscale_prior = NormalPrior( loc=cfg.outputscale.prior_gaussian.loc, scale=cfg.outputscale.prior_gaussian.scale) else: outputscale_prior = None print("Using no prior for the outputscale") return lengthscale_prior, outputscale_prior
def _get_fixed_prior_model(**tkwargs): train_X, train_Y = _get_random_mt_data(**tkwargs) sd_prior = GammaPrior(2.0, 0.15) sd_prior._event_shape = torch.Size([2]) model = MultiTaskGP(train_X, train_Y, task_feature=1, prior=LKJCovariancePrior(2, 0.6, sd_prior)) return model.to(**tkwargs)
def test_scalar_gamma_prior(self): prior = GammaPrior(1, 1) # this is an exponential w/ rate 1 self.assertFalse(prior.log_transform) self.assertTrue(prior.is_in_support(prior.rate.new([1]))) self.assertFalse(prior.is_in_support(prior.rate.new([-1]))) self.assertEqual(prior.shape, torch.Size([1])) self.assertEqual(prior.concentration.item(), 1.0) self.assertEqual(prior.rate.item(), 1.0) self.assertAlmostEqual(prior.log_prob(prior.rate.new([1.0])).item(), -1.0, places=5)
def _get_fixed_noise_and_prior_model(**tkwargs): train_X, train_Y = _get_random_mt_data(**tkwargs) train_Yvar = torch.full_like(train_Y, 0.05) sd_prior = GammaPrior(2.0, 0.15) sd_prior._event_shape = torch.Size([2]) model = FixedNoiseMultiTaskGP( train_X, train_Y, train_Yvar, task_feature=1, task_covar_prior=LKJCovariancePrior(2, 0.6, sd_prior), ) return model.to(**tkwargs)
def test_gamma_prior_log_prob(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") concentration = torch.tensor(1.0, device=device) rate = torch.tensor(1.0, device=device) prior = GammaPrior(concentration, rate) dist = Gamma(concentration, rate) t = torch.tensor(1.0, device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t))) t = torch.tensor([1.5, 0.5], device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t))) t = torch.tensor([[1.0, 0.5], [3.0, 0.25]], device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t)))
def test_gamma_prior_log_prob_log_transform(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") concentration = torch.tensor(1.0, device=device) rate = torch.tensor(1.0, device=device) prior = GammaPrior(concentration, rate, transform=torch.exp) dist = Gamma(concentration, rate) t = torch.tensor(0.0, device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t.exp()))) t = torch.tensor([-1, 0.5], device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t.exp()))) t = torch.tensor([[-1, 0.5], [0.1, -2.0]], device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t.exp())))
def test_vector_gamma_prior_size(self): prior = GammaPrior(1, 1, size=2) self.assertFalse(prior.log_transform) self.assertTrue(prior.is_in_support(prior.rate.new_ones(2))) self.assertFalse(prior.is_in_support(prior.rate.new_zeros(2))) self.assertEqual(prior.shape, torch.Size([2])) self.assertTrue( torch.equal(prior.concentration, prior.rate.new([1.0, 1.0]))) self.assertTrue(torch.equal(prior.rate, prior.rate.new([1.0, 1.0]))) parameter = prior.rate.new([1.0, 2.0]) self.assertAlmostEqual(prior.log_prob(parameter).item(), -3.0, places=5)
def test_vector_gamma_prior(self): prior = GammaPrior(torch.tensor([1.0, 2.0]), torch.tensor([0.5, 2.0])) self.assertFalse(prior.log_transform) self.assertTrue(prior.is_in_support(torch.rand(1))) self.assertEqual(prior.shape, torch.Size([2])) self.assertTrue( torch.equal(prior.concentration, torch.tensor([1.0, 2.0]))) self.assertTrue(torch.equal(prior.rate, torch.tensor([0.5, 2.0]))) parameter = torch.tensor([1.0, math.exp(1)]) expected_log_prob = torch.tensor( [math.log(0.5) - 0.5, 2 * math.log(2) + 1 - 2 * math.exp(1)]).sum().item() self.assertAlmostEqual(prior.log_prob(torch.tensor(parameter)).item(), expected_log_prob, places=5)
def get_priors(heuristic_lengthscales, use_priors=True, is_composite=False): """Utility function used to get priors for GPR kernel hypeparameters. Parameters: use_priors (bool): Whether to use prior distribution over lengthscale and outputscale hyperparameters. Defaults to False. is_composite (bool): Whether we are constructing means and kernels for a composite GPR kernel. If True, returns two sets of priors for for each of the two kernels. Returns: lengthscale_prior (GammaPrior): A Gamma prior distribution on the kernel lengthscale hyperparameter. outputscale_prior (GammaPrior): A Gamma prior distribution on the kernel outputscale hyperparameter. """ # Determine if we use priors lengthscale_prior = None outputscale_prior = None if use_priors: lengthscale_prior = GammaPrior( 0.01 * heuristic_lengthscales, 0.01 * torch.ones(heuristic_lengthscales.size())) print("LENGTHSCALE MEAN: \n{}".format(lengthscale_prior.mean)) print("LENGTHSCALE VARIANCE: \n{}".format(lengthscale_prior.variance)) if is_composite: return lengthscale_prior, lengthscale_prior, outputscale_prior else: return lengthscale_prior, outputscale_prior
def fit_model(self): """ If no state_dict exists, fits the model and saves the state_dict. Otherwise, constructs the model but uses the fit given by the state_dict. """ # read the data data_list = list() for i in range(1, 31): data_file = os.path.join(script_dir, "port_evals", "port_n=100_seed=%d" % i) data_list.append(torch.load(data_file)) # join the data together X = torch.cat([data_list[i]["X"] for i in range(len(data_list))], dim=0).squeeze(-2) Y = torch.cat([data_list[i]["Y"] for i in range(len(data_list))], dim=0).squeeze(-2) # fit GP noise_prior = GammaPrior(1.1, 0.5) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=[], noise_constraint=GreaterThan( 0.000005, # minimum observation noise assumed in the GP model transform=None, initial_value=noise_prior_mode, ), ) # We save the state dict to avoid fitting the GP every time which takes ~3 mins try: state_dict = torch.load( os.path.join(script_dir, "portfolio_surrogate_state_dict.pt")) model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) model.load_state_dict(state_dict) except FileNotFoundError: model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(model.likelihood, model) from time import time start = time() fit_gpytorch_model(mll) print("fitting took %s seconds" % (time() - start)) torch.save( model.state_dict(), os.path.join(script_dir, "portfolio_surrogate_state_dict.pt"), ) self.model = model
def _get_fixed_noise_and_given_covar_module_model(**tkwargs): train_X, train_Y = _get_random_mt_data(**tkwargs) train_Yvar = torch.full_like(train_Y, 0.05) model = FixedNoiseMultiTaskGP( train_X, train_Y, train_Yvar, task_feature=1, covar_module=MaternKernel(nu=1.5, lengthscale_prior=GammaPrior(1.0, 1.0)), ) return model.to(**tkwargs)
def initialize_model(X, Y, old_model=None, **kwargs): if old_model is None: covar_module = ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), lengthscale_constraint=Interval(1e-4, 12.0), ), outputscale_prior=GammaPrior(2.0, 0.15), outputscale_constraint=Interval(1e-4, 12.0), ) if args.fixed_noise: model_obj = FixedNoiseGP( X, Y, train_Yvar=noise, covar_module=covar_module ) else: model_obj = SingleTaskGP(X, Y, covar_module=covar_module) else: model_obj = old_model mll = ExactMarginalLogLikelihood(model_obj.likelihood, model_obj) return model_obj, mll
def initialize_model(X, Y, old_model=None, **kwargs): if old_model is None: covar_module = ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), lengthscale_constraint=Interval(1e-4, 12.0), ), outputscale_prior=GammaPrior(2.0, 0.15), outputscale_constraint=Interval(1e-4, 12.0), ) else: covar_module = old_model.covar_module if args.dim == 3: wiski_grid_size = 10 elif args.dim == 2: wiski_grid_size = 30 kernel_cache = old_model._kernel_cache if old_model is not None else None model_obj = OnlineSKIBotorchModel( X, Y, train_noise_term=noise, grid_bounds=bounds, grid_size=wiski_grid_size, learn_additional_noise=True, kernel_cache=kernel_cache, covar_module=covar_module, ).to(X) mll = BatchedWoodburyMarginalLogLikelihood( model_obj.likelihood, model_obj, clear_caches_every_iteration=True ) # TODO: reload statedict here? # weird errors resulting return model_obj, mll
def test_fixed_prior_BotorchModel(self, dtype=torch.float, cuda=False): Xs1, Ys1, Yvars1, bounds, _, fns, __package__ = get_torch_test_data( dtype=dtype, cuda=cuda, constant_noise=True) Xs2, Ys2, Yvars2, _, _, _, _ = get_torch_test_data(dtype=dtype, cuda=cuda, constant_noise=True) kwargs = { "prior": { "type": LKJCovariancePrior, "sd_prior": GammaPrior(2.0, 0.44), "eta": 0.6, } } model = BotorchModel(**kwargs) with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model: model.fit( Xs=Xs1 + Xs2, Ys=Ys1 + Ys2, Yvars=Yvars1 + Yvars2, search_space_digest=SearchSpaceDigest( feature_names=fns, bounds=bounds, task_features=[0], ), metric_names=["y", "w"], ) _mock_fit_model.assert_called_once() # Check ranks model_list = model.model.models for i in range(1): self.assertIsInstance( model_list[i].task_covar_module.IndexKernelPrior, LKJCovariancePrior) self.assertEqual( model_list[i].task_covar_module.IndexKernelPrior.sd_prior. concentration, 2.0, ) self.assertEqual( model_list[i].task_covar_module.IndexKernelPrior.sd_prior.rate, 0.44) self.assertEqual( model_list[i].task_covar_module.IndexKernelPrior. correlation_prior.eta, 0.6, )
def __init__(self, x: torch.Tensor, xe: torch.Tensor, y: torch.Tensor, lik: GaussianLikelihood, **conf): super().__init__((x, xe), y.squeeze(), lik) mean = conf.get('mean', ConstantMean()) kern = conf.get( 'kern', ScaleKernel(MaternKernel(nu=1.5, ard_num_dims=x.shape[1]), outputscale_prior=GammaPrior(0.5, 0.5))) kern_emb = conf.get('kern_emb', MaternKernel(nu=2.5)) self.multi_task = y.shape[1] > 1 self.mean = mean if not self.multi_task else MultitaskMean( mean, num_tasks=y.shape[1]) if x.shape[1] > 0: self.kern = kern if not self.multi_task else MultitaskKernel( kern, num_tasks=y.shape[1]) if xe.shape[1] > 0: assert 'num_uniqs' in conf num_uniqs = conf['num_uniqs'] emb_sizes = conf.get('emb_sizes', None) self.emb_trans = EmbTransform(num_uniqs, emb_sizes=emb_sizes) self.kern_emb = kern_emb if not self.multi_task else MultitaskKernel( kern_emb, num_tasks=y.shape[1])
def __init__( self, train_X: Tensor, train_Y: Tensor, cat_dims: List[int], cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None, likelihood: Optional[Likelihood] = None, outcome_transform: Optional[OutcomeTransform] = None, # TODO input_transform: Optional[InputTransform] = None, # TODO ) -> None: r"""A single-task exact GP model supporting categorical parameters. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. cat_dims: A list of indices corresponding to the columns of the input `X` that should be considered categorical features. cont_kernel_factory: A method that accepts `ard_num_dims` and `active_dims` arguments and returns an instatiated GPyTorch `Kernel` object to be used as the ase kernel for the continuous dimensions. If omitted, this model uses a Matern-2.5 kernel as the kernel for the ordinal parameters. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. # outcome_transform: An outcome transform that is applied to the # training data during instantiation and to the posterior during # inference (that is, the `Posterior` obtained by calling # `.posterior` on the model will be on the original scale). # input_transform: An input transform that is applied in the model's # forward pass. Example: >>> train_X = torch.cat( [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1) ) >>> train_Y = ( torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True) + train_X[..., -1:] ) >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1]) """ if outcome_transform is not None: raise UnsupportedError("outcome transforms not yet supported") if input_transform is not None: raise UnsupportedError("input transforms not yet supported") if len(cat_dims) == 0: raise ValueError( "Must specify categorical dimensions for MixedSingleTaskGP" ) input_batch_shape, aug_batch_shape = self.get_batch_dimensions( train_X=train_X, train_Y=train_Y ) if cont_kernel_factory is None: def cont_kernel_factory( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int] ) -> MaternKernel: return MaternKernel( nu=2.5, batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, ) if likelihood is None: # This Gamma prior is quite close to the Horseshoe prior min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6 likelihood = GaussianLikelihood( batch_shape=aug_batch_shape, noise_constraint=GreaterThan( min_noise, transform=None, initial_value=1e-3 ), noise_prior=GammaPrior(0.9, 10.0), ) d = train_X.shape[-1] cat_dims = normalize_indices(indices=cat_dims, d=d) ord_dims = sorted(set(range(d)) - set(cat_dims)) if len(ord_dims) == 0: covar_module = ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), ) ) else: sum_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) + ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) ) prod_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) * CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) covar_module = sum_kernel + prod_kernel super().__init__( train_X=train_X, train_Y=train_Y, likelihood=likelihood, covar_module=covar_module, outcome_transform=outcome_transform, input_transform=input_transform, )
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data( args.data_loc, args.num_init, args.num_total, test_is_year=False, seed=args.seed, ) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) if args.model == "wiski": model = FixedNoiseOnlineSKIGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), GridInterpolationKernel( base_kernel=ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ), learn_additional_noise=False, ).to(device) mll_type = lambda x, y: BatchedWoodburyMarginalLogLikelihood( x, y, clear_caches_every_iteration=True) elif args.model == "exact": model = FixedNoiseGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), ).to(device) mll_type = ExactMarginalLogLikelihood mll = mll_type(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() model.train() model.zero_grad() # with max_cholesky_size(args.cholesky_size), skip_logdet_forward(True), \ # use_toeplitz(args.toeplitz), max_root_decomposition_size(args.sketch_size): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) print("Named parameters: ", list(model.named_parameters())) print("--- Now computing initial RMSE") model.eval() with gpytorch.settings.skip_posterior_variances(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean - test_y)**2).mean().sqrt() print("---- Initial RMSE: ", pred_rmse.item()) all_outputs = [] start_ind = init_x.shape[0] end_ind = int(start_ind + args.batch_size) for step in range(args.num_steps): if step > 0 and step % 25 == 0: print("Beginning step ", step) total_time_step_start = time.time() if step > 0: print("---- Fitting model ----") start = time.time() model.train() model.zero_grad() mll = mll_type(model.likelihood, model) # with skip_logdet_forward(True), max_root_decomposition_size( # args.sketch_size # ), max_cholesky_size(args.cholesky_size), use_toeplitz( # args.toeplitz # ): fit_gpytorch_torch(mll, options={ "lr": 0.01 * (0.99**step), "maxiter": 300 }) model.zero_grad() end = time.time() print("Elapsed fitting time: ", end - start) print("Named parameters: ", list(model.named_parameters())) if not args.random: if args.model == "wiski": botorch_model = OnlineSKIBotorchModel(model=model) else: botorch_model = model # qmc_sampler = SobolQMCNormalSampler(num_samples=4) bounds = torch.stack([torch.zeros(2), torch.ones(2)]).to(device) qnipv = qNIPV( model=botorch_model, mc_points=test_x, # sampler=qmc_sampler, ) #with use_toeplitz(args.toeplitz), root_pred_var(True), fast_pred_var(True): candidates, acq_value = optimize_acqf( acq_function=qnipv, bounds=bounds, q=args.batch_size, num_restarts=1, raw_samples=10, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200 }, ) else: candidates = torch.rand(args.batch_size, train_x.shape[-1], device=device, dtype=train_x.dtype) acq_value = torch.zeros(1) model.eval() _ = model(test_x[:10]) # to init caches print("---- Finished optimizing; now querying dataset ---- ") with torch.no_grad(): covar_dists = model.covar_module(candidates, train_x) nearest_points = covar_dists.evaluate().argmax(dim=-1) new_x = train_x[nearest_points] new_y = train_y[nearest_points] new_y_var = train_y_var[nearest_points] todrop = torch.tensor( [x in nearest_points for x in range(train_x.shape[0])]) train_x, train_y, train_y_var = train_x[~todrop], train_y[ ~todrop], train_y_var[~todrop] print("New train_x shape", train_x.shape) print("--- Now updating model with simulator ----") model = model.condition_on_observations(X=new_x, Y=new_y.view(-1, 1), noise=new_y_var.view( -1, 1)) print("--- Now computing updated RMSE") model.eval() # with gpytorch.settings.fast_pred_var(True), \ # detach_test_caches(True), \ # max_root_decomposition_size(args.sketch_size), \ # max_cholesky_size(args.cholesky_size), \ # use_toeplitz(args.toeplitz), root_pred_var(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean.view(-1) - test_y.view(-1))**2).mean().sqrt() pred_avg_variance = test_pred.variance.mean() total_time_step_elapsed_time = time.time() - total_time_step_start step_output_list = [ total_time_step_elapsed_time, acq_value.item(), pred_rmse.item(), pred_avg_variance.item() ] print("Step RMSE: ", pred_rmse) all_outputs.append(step_output_list) start_ind = end_ind end_ind = int(end_ind + args.batch_size) output_dict = { "model_state_dict": model.cpu().state_dict(), "queried_points": { 'x': model.cpu().train_inputs[0], 'y': model.cpu().train_targets }, "results": DataFrame(all_outputs) } torch.save(output_dict, args.output)
def BO_pred(acq_func, plot=False, return_='pred', append=False, init='external'): # Experiment index X = np.linspace(0,1,1000) exindex = pd.DataFrame([[x, f(x)] for x in X], columns=['x', 'f(x)']) training_points = [50, 300, 500, 900] # Instatiate BO class bo = BO(exindex=exindex, domain=exindex.drop('f(x)', axis=1), results=exindex.iloc[training_points], acquisition_function=acq_func, init_method=init, lengthscale_prior=[GammaPrior(1.2,1.1), 0.2], noise_prior=None, batch_size=random.sample([1,2,3,4,5,6,7,8,9,10],1)[0], fast_comp=True, gpu=True) bo.run(append=append) # Check prediction if return_ == 'pred': try: bo.model.predict(to_torch(bo.obj.domain)) # torch.tensor bo.model.predict(bo.obj.domain.values) # numpy.array bo.model.predict(list(bo.obj.domain.values)) # list bo.model.predict(exindex.drop('f(x)', axis=1)) # pandas.DataFrame except: return False pred = bo.model.predict(bo.obj.domain.iloc[[32]]) pred = bo.obj.scaler.unstandardize(pred) return (pred[0] - 1.33) < 0.1 # Check predictive postrior variance elif return_ == 'var': try: bo.model.predict(to_torch(bo.obj.domain)) # torch.tensor bo.model.predict(bo.obj.domain.values) # numpy.array bo.model.predict(list(bo.obj.domain.values)) # list bo.model.predict(exindex.drop('f(x)', axis=1)) # pandas.DataFrame except: return False var = bo.model.variance(bo.obj.domain.iloc[[32]]) return (var[0] - 0.04) < 0.1 # Make sure sampling works with tensors, arrays, lists, and DataFrames elif return_ == 'sample': try: bo.model.sample_posterior(to_torch(bo.obj.domain)) # torch.tensor bo.model.sample_posterior(bo.obj.domain.values) # numpy.array bo.model.sample_posterior(list(bo.obj.domain.values)) # list bo.model.sample_posterior(exindex.drop('f(x)', axis=1)) # pandas.DataFrame return True except: return False # Plot model elif return_ == 'plot': next_points = bo.obj.get_results(bo.proposed_experiments) mean = bo.obj.scaler.unstandardize(bo.model.predict(bo.obj.domain)) std = np.sqrt(bo.model.variance(bo.obj.domain)) * bo.obj.scaler.std * 2 samples = bo.obj.scaler.unstandardize(bo.model.sample_posterior(bo.obj.domain, batch_size=3)) plt.figure(1, figsize=(6,6)) # Model mean and standard deviation plt.subplot(211) plt.plot(X, exindex['f(x)'], color='black') plt.plot(X, mean, label='GP') plt.fill_between(X, mean-std, mean+std, alpha=0.4) # Known results and next selected point plt.scatter(bo.obj.results_input()['x'], bo.obj.results_input()['f(x)'], color='black', label='known') plt.scatter(next_points['x'],next_points['f(x)'], color='red', label='next_experiments') plt.ylabel('f(x)') # Samples plt.subplot(212) for sample in samples: plt.plot(X, torch_to_numpy(sample, gpu=True)) plt.xlabel('x') plt.ylabel('Posterior Samples') plt.show() return True elif return_ == 'simulate': if init != 'external': bo.init_seq.batch_size = random.sample([2,3,4,5,6,7,8,9,10],1)[0] bo.simulate(iterations=5) bo.plot_convergence() bo.model.regression() return True
def test_bounds_to_prior(self): prior = GammaPrior(1, 1) self.assertEqual(prior, _bounds_to_prior(prior=prior, bounds=None)) self.assertIsInstance(_bounds_to_prior(prior=None, bounds=(-10, 10)), SmoothedBoxPrior)
def BO_pred(acq_func, plot=False, return_='pred', append=False, init='external', fast_comp=True): # Experiment index X = np.linspace(0, 1, 1000) exindex = pd.DataFrame([[x, f(x)] for x in X], columns=['x', 'f(x)']) training_points = [50, 300, 500, 900] # Instatiate BO class bo = BO(exindex=exindex, domain=exindex.drop('f(x)', axis=1), results=exindex.iloc[training_points], acquisition_function=acq_func, init_method=init, lengthscale_prior=[GammaPrior(1.2, 1.1), 0.2], noise_prior=None, batch_size=random.sample([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 1)[0], fast_comp=fast_comp) bo.run(append=append) # Check prediction if return_ == 'pred': try: bo.model.predict(to_torch(bo.obj.domain)) # torch.tensor bo.model.predict(bo.obj.domain.values) # numpy.array bo.model.predict(list(bo.obj.domain.values)) # list bo.model.predict(exindex.drop('f(x)', axis=1)) # pandas.DataFrame except: return False pred = bo.model.predict(bo.obj.domain.iloc[[32]]) pred = bo.obj.scaler.unstandardize(pred) return (pred[0] - 1.33) < 0.1 # Check predictive postrior variance elif return_ == 'var': try: bo.model.predict(to_torch(bo.obj.domain)) # torch.tensor bo.model.predict(bo.obj.domain.values) # numpy.array bo.model.predict(list(bo.obj.domain.values)) # list bo.model.predict(exindex.drop('f(x)', axis=1)) # pandas.DataFrame except: return False var = bo.model.variance(bo.obj.domain.iloc[[32]]) return (var[0] - 0.04) < 0.1 # Make sure sampling works with tensors, arrays, lists, and DataFrames elif return_ == 'sample': try: bo.model.sample_posterior(to_torch(bo.obj.domain)) # torch.tensor bo.model.sample_posterior(bo.obj.domain.values) # numpy.array bo.model.sample_posterior(list(bo.obj.domain.values)) # list bo.model.sample_posterior(exindex.drop('f(x)', axis=1)) # pandas.DataFrame return True except: return False elif return_ == 'simulate': if init != 'external': bo.init_seq.batch_size = random.sample( [2, 3, 4, 5, 6, 7, 8, 9, 10], 1)[0] bo.simulate(iterations=5) return True elif return_ == 'none': return True
def test_gamma_prior_batch_log_prob(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") concentration = torch.tensor([1.0, 2.0], device=device) rate = torch.tensor([1.0, 2.0], device=device) prior = GammaPrior(concentration, rate) dist = Gamma(concentration, rate) t = torch.ones(2, device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t))) t = torch.ones(2, 2, device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t))) with self.assertRaises(RuntimeError): prior.log_prob(torch.ones(3, device=device)) mean = torch.tensor([[1.0, 2.0], [0.5, 3.0]], device=device) variance = torch.tensor([[1.0, 2.0], [0.5, 1.0]], device=device) prior = GammaPrior(mean, variance) dist = Gamma(mean, variance) t = torch.ones(2, device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t))) t = torch.ones(2, 2, device=device) self.assertTrue(torch.equal(prior.log_prob(t), dist.log_prob(t))) with self.assertRaises(RuntimeError): prior.log_prob(torch.ones(3, device=device)) with self.assertRaises(RuntimeError): prior.log_prob(torch.ones(2, 3, device=device))
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data(args.data_loc, args.num_init, args.num_total, test_is_year=False) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) model = FixedNoiseOnlineSKIGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), GridInterpolationKernel( base_kernel=ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ), learn_additional_noise=False, ).to(device) mll = BatchedWoodburyMarginalLogLikelihood(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() with skip_logdet_forward(True), max_root_decomposition_size( args.sketch_size), use_toeplitz(args.toeplitz): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) model.zero_grad() model.eval() print("--- Generating initial predictions on test set ----") start = time.time() with detach_test_caches(True), max_root_decomposition_size( args.sketch_size), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): pred_dist = model(test_x) pred_mean = pred_dist.mean.detach() # pred_var = pred_dist.variance.detach() end = time.time() print("Elapsed initial prediction time: ", end - start) rmse_initial = ((pred_mean.view(-1) - test_y.view(-1))**2).mean().sqrt() print("Initial RMSE: ", rmse_initial.item()) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) mll_time_list = [] rmse_list = [] for i in range(500, train_x.shape[0]): model.zero_grad() model.train() start = time.time() with skip_logdet_forward(True), max_root_decomposition_size( args.sketch_size), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): loss = -mll(model(train_x[:i]), train_y[:i]).sum() loss.backward() mll_time = start - time.time() optimizer.step() model.zero_grad() optimizer.zero_grad() start = time.time() with torch.no_grad(): model.condition_on_observations( train_x[i].unsqueeze(0), train_y[i].view(1, 1), train_y_var[i].view(-1, 1), inplace=True, ) fantasy_time = start - time.time() mll_time_list.append([-mll_time, -fantasy_time]) if i % 25 == 0: start = time.time() model.eval() model.zero_grad() with detach_test_caches(), max_root_decomposition_size( args.sketch_size), max_cholesky_size(args.cholesky_size): pred_dist = model(test_x) end = time.time() rmse = (((pred_dist.mean - test_y.view(-1))**2).mean().sqrt().item()) rmse_list.append([rmse, end - start]) print("Current RMSE: ", rmse) print("Outputscale: ", model.covar_module.base_kernel.raw_outputscale) print( "Lengthscale: ", model.covar_module.base_kernel.base_kernel.raw_lengthscale, ) print("Step: ", i, "Train Loss: ", loss) optimizer.param_groups[0]["lr"] *= 0.9 torch.save({ "training": mll_time_list, "predictions": rmse_list }, args.output)
def test_pickle_with_prior(self): likelihood = GaussianLikelihood(noise_prior=GammaPrior(1, 1)) pickle.loads(pickle.dumps(likelihood)) # Should be able to pickle and unpickle with a prior
def test_pairwise_gp(self): for batch_shape, dtype in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double)): tkwargs = {"device": self.device, "dtype": dtype} X_dim = 2 model, model_kwargs = self._get_model_and_data( batch_shape=batch_shape, X_dim=X_dim, **tkwargs) train_X = model_kwargs["datapoints"] train_comp = model_kwargs["comparisons"] # test training # regular training mll = PairwiseLaplaceMarginalLogLikelihood(model).to(**tkwargs) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) fit_gpytorch_model(mll, options={"maxiter": 2}, max_retries=1) # prior training prior_m = PairwiseGP(None, None) with self.assertRaises(RuntimeError): prior_m(train_X) # forward in training mode with non-training data custom_m = PairwiseGP(**model_kwargs) other_X = torch.rand(batch_shape + torch.Size([3, X_dim]), **tkwargs) other_comp = train_comp.clone() with self.assertRaises(RuntimeError): custom_m(other_X) custom_mll = PairwiseLaplaceMarginalLogLikelihood(custom_m).to( **tkwargs) post = custom_m(train_X) with self.assertRaises(RuntimeError): custom_mll(post, other_comp) # setting jitter = 0 with a singular covar will raise error sing_train_X = torch.ones(batch_shape + torch.Size([10, X_dim]), **tkwargs) with self.assertRaises(RuntimeError): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) custom_m = PairwiseGP(sing_train_X, train_comp, jitter=0) custom_m.posterior(sing_train_X) # test init self.assertIsInstance(model.mean_module, ConstantMean) self.assertIsInstance(model.covar_module, RBFKernel) self.assertIsInstance(model.covar_module.lengthscale_prior, GammaPrior) self.assertEqual(model.num_outputs, 1) # test custom noise prior custom_noise_prior = GammaPrior(concentration=2.0, rate=1.0) custom_noise_module = HomoskedasticNoise( noise_prior=custom_noise_prior) custom_m = PairwiseGP(**model_kwargs, noise_module=custom_noise_module) self.assertEqual(custom_m.noise_module.noise_prior.concentration, torch.tensor(2.0)) self.assertEqual(custom_m.noise_module.noise_prior.rate, torch.tensor(1.0)) # test custom models custom_m = PairwiseGP(**model_kwargs, covar_module=LinearKernel()) self.assertIsInstance(custom_m.covar_module, LinearKernel) # std_noise setter custom_m.std_noise = 123 self.assertTrue(torch.all(custom_m.std_noise == 123)) # prior prediction prior_m = PairwiseGP(None, None) prior_m.eval() post = prior_m.posterior(train_X) self.assertIsInstance(post, GPyTorchPosterior) # test methods that are not commonly or explicitly used # _calc_covar with observation noise no_noise_cov = model._calc_covar(train_X, train_X, observation_noise=False) noise_cov = model._calc_covar(train_X, train_X, observation_noise=True) diag_diff = (noise_cov - no_noise_cov).diagonal(dim1=-2, dim2=-1) self.assertTrue( torch.allclose( diag_diff, model.std_noise.expand(diag_diff.shape), rtol=1e-4, atol=1e-5, )) # test trying adding jitter pd_mat = torch.eye(2, 2) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) jittered_pd_mat = model._add_jitter(pd_mat) diag_diff = (jittered_pd_mat - pd_mat).diagonal(dim1=-2, dim2=-1) self.assertTrue( torch.allclose( diag_diff, torch.full_like(diag_diff, model._jitter), atol=model._jitter / 10, )) # test initial utility val util_comp = torch.topk(model.utility, k=2, dim=-1).indices.unsqueeze(-2) self.assertTrue(torch.all(util_comp == train_comp)) # test posterior # test non batch evaluation X = torch.rand(batch_shape + torch.Size([3, X_dim]), **tkwargs) expected_shape = batch_shape + torch.Size([3, 1]) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual(posterior.mean.shape, expected_shape) self.assertEqual(posterior.variance.shape, expected_shape) # expect to raise error when output_indices is not None with self.assertRaises(RuntimeError): model.posterior(X, output_indices=[0]) # test re-evaluating utility when it's None model.utility = None posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) # test adding observation noise posterior_pred = model.posterior(X, observation_noise=True) self.assertIsInstance(posterior_pred, GPyTorchPosterior) self.assertEqual(posterior_pred.mean.shape, expected_shape) self.assertEqual(posterior_pred.variance.shape, expected_shape) pvar = posterior_pred.variance reshaped_noise = model.std_noise.unsqueeze(-2).expand( posterior.variance.shape) pvar_exp = posterior.variance + reshaped_noise self.assertTrue( torch.allclose(pvar, pvar_exp, rtol=1e-4, atol=1e-5)) # test batch evaluation X = torch.rand(2, *batch_shape, 3, X_dim, **tkwargs) expected_shape = torch.Size([2]) + batch_shape + torch.Size([3, 1]) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual(posterior.mean.shape, expected_shape) # test adding observation noise in batch mode posterior_pred = model.posterior(X, observation_noise=True) self.assertIsInstance(posterior_pred, GPyTorchPosterior) self.assertEqual(posterior_pred.mean.shape, expected_shape) pvar = posterior_pred.variance reshaped_noise = model.std_noise.unsqueeze(-2).expand( posterior.variance.shape) pvar_exp = posterior.variance + reshaped_noise self.assertTrue( torch.allclose(pvar, pvar_exp, rtol=1e-4, atol=1e-5))
def test_scalar_gamma_prior_log_transform(self): prior = GammaPrior(1, 1, log_transform=True) self.assertTrue(prior.log_transform) self.assertAlmostEqual(prior.log_prob(prior.rate.new([0.0])).item(), -1.0, places=5)
def test_scalar_gamma_prior_invalid_params(self): with self.assertRaises(ValueError): GammaPrior(0, 1) with self.assertRaises(ValueError): GammaPrior(1, 0)
def test_vector_gamma_prior_invalid_params(self): with self.assertRaises(ValueError): GammaPrior(torch.tensor([-0.5, 0.5]), torch.tensor([1.0, 1.0])) with self.assertRaises(ValueError): GammaPrior(torch.tensor([0.5, 0.5]), torch.tensor([-0.1, 1.0]))