def test_roundtrip(self): for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc ) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig) )
def test_roundtrip(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
def test_sample_all_priors(self, cuda=False): device = torch.device("cuda" if cuda else "cpu") for dtype in (torch.float, torch.double): train_X = torch.rand(3, 5, device=device, dtype=dtype) train_Y = torch.rand(3, 1, device=device, dtype=dtype) model = SingleTaskGP(train_X=train_X, train_Y=train_Y) mll = ExactMarginalLogLikelihood(model.likelihood, model) mll.to(device=device, dtype=dtype) original_state_dict = dict(deepcopy(mll.model.state_dict())) sample_all_priors(model) # make sure one of the hyperparameters changed self.assertTrue( dict(model.state_dict())["likelihood.noise_covar.raw_noise"] != original_state_dict["likelihood.noise_covar.raw_noise"]) # check that lengthscales are all different ls = model.covar_module.base_kernel.raw_lengthscale.view( -1).tolist() self.assertTrue(all(ls[0] != ls[i]) for i in range(1, len(ls))) # change one of the priors to SmoothedBoxPrior model.covar_module = ScaleKernel( MaternKernel( nu=2.5, ard_num_dims=model.train_inputs[0].shape[-1], batch_shape=model._aug_batch_shape, lengthscale_prior=SmoothedBoxPrior(3.0, 6.0), ), batch_shape=model._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) original_state_dict = dict(deepcopy(mll.model.state_dict())) with warnings.catch_warnings( record=True) as ws, settings.debug(True): sample_all_priors(model) self.assertEqual(len(ws), 1) self.assertTrue("rsample" in str(ws[0].message)) # the lengthscale should not have changed because sampling is # not implemented for SmoothedBoxPrior self.assertTrue( torch.equal( dict(model.state_dict()) ["covar_module.base_kernel.raw_lengthscale"], original_state_dict[ "covar_module.base_kernel.raw_lengthscale"], )) # set setting_closure to None and make sure RuntimeError is raised prior_tuple = model.likelihood.noise_covar._priors["noise_prior"] model.likelihood.noise_covar._priors["noise_prior"] = ( prior_tuple[0], prior_tuple[1], None, ) with self.assertRaises(RuntimeError): sample_all_priors(model)
def one_step_acquisition_gp(oracle, full_train_X, full_train_Y, acq, q, bounds, dim, domain, domain_image, state_dict=None, plot_stuff=False): model = SingleTaskGP(full_train_X, full_train_Y) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) fit_gpytorch_model(mll) candidate, EI = get_candidate(model, acq, full_train_Y, q, bounds, dim) if acq == 'EI' and dim == 1 and plot_stuff: plot_util(oracle, model, EI, domain, domain_image, None, full_train_X, full_train_Y, candidate) candidate_image = oracle(candidate) full_train_X = torch.cat([full_train_X, candidate]) full_train_Y = torch.cat([full_train_Y, candidate_image]) state_dict = model.state_dict() return full_train_X, full_train_Y, model, candidate, candidate_image, state_dict
def fit_model(self): """ If no state_dict exists, fits the model and saves the state_dict. Otherwise, constructs the model but uses the fit given by the state_dict. """ # read the data data_list = list() for i in range(1, 31): data_file = os.path.join(script_dir, "port_evals", "port_n=100_seed=%d" % i) data_list.append(torch.load(data_file)) # join the data together X = torch.cat([data_list[i]["X"] for i in range(len(data_list))], dim=0).squeeze(-2) Y = torch.cat([data_list[i]["Y"] for i in range(len(data_list))], dim=0).squeeze(-2) # fit GP noise_prior = GammaPrior(1.1, 0.5) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=[], noise_constraint=GreaterThan( 0.000005, # minimum observation noise assumed in the GP model transform=None, initial_value=noise_prior_mode, ), ) # We save the state dict to avoid fitting the GP every time which takes ~3 mins try: state_dict = torch.load( os.path.join(script_dir, "portfolio_surrogate_state_dict.pt")) model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) model.load_state_dict(state_dict) except FileNotFoundError: model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(model.likelihood, model) from time import time start = time() fit_gpytorch_model(mll) print("fitting took %s seconds" % (time() - start)) torch.save( model.state_dict(), os.path.join(script_dir, "portfolio_surrogate_state_dict.pt"), ) self.model = model
def gp_torch_train(train_x: Tensor, train_y: Tensor, n_inducing_points: int, tkwargs: Dict[str, Any], init, scale: bool, covar_name: str, gp_file: Optional[str], save_file: str, input_wp: bool, outcome_transform: Optional[OutcomeTransform] = None, options: Dict[str, Any] = None) -> SingleTaskGP: assert train_y.ndim > 1, train_y.shape assert gp_file or init, (gp_file, init) likelihood = gpytorch.likelihoods.GaussianLikelihood() if init: # build hyp print("Initialize GP hparams...") print("Doing Kmeans init...") assert n_inducing_points > 0, n_inducing_points kmeans = MiniBatchKMeans(n_clusters=n_inducing_points, batch_size=min(10000, train_x.shape[0]), n_init=25) start_time = time.time() kmeans.fit(train_x.cpu().numpy()) end_time = time.time() print(f"K means took {end_time - start_time:.1f}s to finish...") inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy()) output_scale = None if scale: output_scale = train_y.var().item() lscales = torch.empty(1, train_x.shape[1]) for i in range(train_x.shape[1]): lscales[0, i] = torch.pdist(train_x[:, i].view( -1, 1)).median().clamp(min=0.01) base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=inducing_points, likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = CustomWarp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) else: # load model output_scale = 1 # will be overwritten when loading model lscales = torch.ones( train_x.shape[1]) # will be overwritten when loading model base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=torch.empty( n_inducing_points, train_x.shape[1]), likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = Warp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) print("Loading GP from file") state_dict = torch.load(gp_file) model.load_state_dict(state_dict) print("GP regression") start_time = time.time() model.to(**tkwargs) model.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) # set approx_mll to False since we are using an exact marginal log likelihood # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options) fit_gpytorch_torch(mll, options=options, approx_mll=False, clip_by_value=True if input_wp else False, clip_value=10.0) end_time = time.time() print(f"Regression took {end_time - start_time:.1f}s to finish...") print("Save GP model...") torch.save(model.state_dict(), save_file) print("Done training of GP.") model.eval() return model