def load_gp(log_dir): try: # The GP state, i.e., hyperparameters, normalization, etc. model_file = os.path.join(log_dir, "model_state.pth") with open(model_file, "rb") as f: state_dict = torch.load(f) # Get the evaluated data points eval_dict = load_eval(log_dir) train_X = eval_dict["train_inputs"] train_Y = eval_dict["train_targets"] # The bounds of the domain config_dict = load_config(log_dir) lb = torch.tensor(config_dict["lower_bound"]) ub = torch.tensor(config_dict["upper_bound"]) bounds = torch.stack((lb, ub)) # Create GP instance and load respective parameters gp = SingleTaskGP( train_X=train_X, train_Y=train_Y, outcome_transform=Standardize(m=1), input_transform=Normalize(d=1, bounds=bounds), ) gp.load_state_dict(state_dict=state_dict) except FileNotFoundError: print(f"The model file could not be found in: {log_dir}") exit(1) return gp
def one_step_acquisition_gp(oracle, full_train_X, full_train_Y, acq, q, bounds, dim, domain, domain_image, state_dict=None, plot_stuff=False): model = SingleTaskGP(full_train_X, full_train_Y) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) fit_gpytorch_model(mll) candidate, EI = get_candidate(model, acq, full_train_Y, q, bounds, dim) if acq == 'EI' and dim == 1 and plot_stuff: plot_util(oracle, model, EI, domain, domain_image, None, full_train_X, full_train_Y, candidate) candidate_image = oracle(candidate) full_train_X = torch.cat([full_train_X, candidate]) full_train_Y = torch.cat([full_train_Y, candidate_image]) state_dict = model.state_dict() return full_train_X, full_train_Y, model, candidate, candidate_image, state_dict
def initialize_model(train_x, train_obj, state_dict=None): model = SingleTaskGP(train_x, train_obj) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def initialize_model(train_x, train_y, state_dict=None): """initialize GP model with/without initial states """ model = SingleTaskGP(train_x, train_y).to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def initialize_model(train_x, train_obj, train_con, state_dict=None): # define models for objective and constraint train_y = torch.cat([train_obj, train_con], dim=-1) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def fit_model(self): """ If no state_dict exists, fits the model and saves the state_dict. Otherwise, constructs the model but uses the fit given by the state_dict. """ # read the data data_list = list() for i in range(1, 31): data_file = os.path.join(script_dir, "port_evals", "port_n=100_seed=%d" % i) data_list.append(torch.load(data_file)) # join the data together X = torch.cat([data_list[i]["X"] for i in range(len(data_list))], dim=0).squeeze(-2) Y = torch.cat([data_list[i]["Y"] for i in range(len(data_list))], dim=0).squeeze(-2) # fit GP noise_prior = GammaPrior(1.1, 0.5) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=[], noise_constraint=GreaterThan( 0.000005, # minimum observation noise assumed in the GP model transform=None, initial_value=noise_prior_mode, ), ) # We save the state dict to avoid fitting the GP every time which takes ~3 mins try: state_dict = torch.load( os.path.join(script_dir, "portfolio_surrogate_state_dict.pt")) model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) model.load_state_dict(state_dict) except FileNotFoundError: model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(model.likelihood, model) from time import time start = time() fit_gpytorch_model(mll) print("fitting took %s seconds" % (time() - start)) torch.save( model.state_dict(), os.path.join(script_dir, "portfolio_surrogate_state_dict.pt"), ) self.model = model
def get_fitted_model(x, obj, state_dict=None): # initialize and fit model fitted_model = SingleTaskGP(train_X=x, train_Y=obj) if state_dict is not None: fitted_model.load_state_dict(state_dict) mll = ExactMarginalLogLikelihood(fitted_model.likelihood, fitted_model) mll.to(x) fit_gpytorch_model(mll) return fitted_model
def initialize_model_ei(train_x, train_obj, state_dict=None): """ Define model for the objective. :param train_x: input tensor :param train_obj: output tensor -> g(x) + s(x) :param state_dict: optional model parameters :return: """ model = SingleTaskGP(train_x, train_obj).to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def get_fitted_model(train_x, train_obj, state_dict=None): # initialize and fit model model = SingleTaskGP(train_X=train_x, train_Y=train_obj) # # initialize likelihood and model # likelihood = gpytorch.likelihoods.GaussianLikelihood() # model = ExactGPModel(train_x, train_obj, likelihood) # model.train() # likelihood.train() if state_dict is not None: model.load_state_dict(state_dict) mll = ExactMarginalLogLikelihood(model.likelihood, model) mll.to(train_x) fit_gpytorch_model(mll) return model
def make_model(train_x, train_y, state_dict=None): """ Define the models based on the observed data :param train_x: The design points/ trial solutions :param train_y: The objective functional value of the trial solutions used for model fitting :param state_dict: Dictionary storing the parameters of the GP model :return: """ try: model = SingleTaskGP(train_x, train_y) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) except Exception as e: print('Exception: {} in make_model()'.format(e)) return model, mll
def initialize_model(self, train_X, train_Y, state_dict=None): """Initialise model for BO.""" # From: https://github.com/pytorch/botorch/issues/179 noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate MIN_INFERRED_NOISE_LEVEL = 1e-3 likelihood = GaussianLikelihood( noise_prior=noise_prior, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) # train_x = self.scale_to_0_1_bounds(train_X) train_Y = standardize(train_Y) gp = SingleTaskGP(train_X, train_Y, likelihood=likelihood) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) # load state dict if it is passed if state_dict is not None: gp.load_state_dict(state_dict) return mll, gp
def gp_torch_train(train_x: Tensor, train_y: Tensor, n_inducing_points: int, tkwargs: Dict[str, Any], init, scale: bool, covar_name: str, gp_file: Optional[str], save_file: str, input_wp: bool, outcome_transform: Optional[OutcomeTransform] = None, options: Dict[str, Any] = None) -> SingleTaskGP: assert train_y.ndim > 1, train_y.shape assert gp_file or init, (gp_file, init) likelihood = gpytorch.likelihoods.GaussianLikelihood() if init: # build hyp print("Initialize GP hparams...") print("Doing Kmeans init...") assert n_inducing_points > 0, n_inducing_points kmeans = MiniBatchKMeans(n_clusters=n_inducing_points, batch_size=min(10000, train_x.shape[0]), n_init=25) start_time = time.time() kmeans.fit(train_x.cpu().numpy()) end_time = time.time() print(f"K means took {end_time - start_time:.1f}s to finish...") inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy()) output_scale = None if scale: output_scale = train_y.var().item() lscales = torch.empty(1, train_x.shape[1]) for i in range(train_x.shape[1]): lscales[0, i] = torch.pdist(train_x[:, i].view( -1, 1)).median().clamp(min=0.01) base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=inducing_points, likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = CustomWarp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) else: # load model output_scale = 1 # will be overwritten when loading model lscales = torch.ones( train_x.shape[1]) # will be overwritten when loading model base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=torch.empty( n_inducing_points, train_x.shape[1]), likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = Warp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) print("Loading GP from file") state_dict = torch.load(gp_file) model.load_state_dict(state_dict) print("GP regression") start_time = time.time() model.to(**tkwargs) model.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) # set approx_mll to False since we are using an exact marginal log likelihood # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options) fit_gpytorch_torch(mll, options=options, approx_mll=False, clip_by_value=True if input_wp else False, clip_value=10.0) end_time = time.time() print(f"Regression took {end_time - start_time:.1f}s to finish...") print("Save GP model...") torch.save(model.state_dict(), save_file) print("Done training of GP.") model.eval() return model
def test_cache_root(self): sample_cached_path = ( "botorch.acquisition.cached_cholesky.sample_cached_cholesky") raw_state_dict = { "likelihood.noise_covar.raw_noise": torch.tensor([[0.0895], [0.2594]], dtype=torch.float64), "mean_module.constant": torch.tensor([[-0.4545], [-0.1285]], dtype=torch.float64), "covar_module.raw_outputscale": torch.tensor([1.4876, 1.4897], dtype=torch.float64), "covar_module.base_kernel.raw_lengthscale": torch.tensor([[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]], dtype=torch.float64), } # test batched models (e.g. for MCMC) for train_batch_shape, m, dtype in product( (torch.Size([]), torch.Size([3])), (1, 2), (torch.float, torch.double)): state_dict = deepcopy(raw_state_dict) for k, v in state_dict.items(): if m == 1: v = v[0] if len(train_batch_shape) > 0: v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape) state_dict[k] = v tkwargs = {"device": self.device, "dtype": dtype} if m == 2: objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1)) else: objective = None for k, v in state_dict.items(): state_dict[k] = v.to(**tkwargs) all_close_kwargs = ({ "atol": 1e-1, "rtol": 0.0, } if dtype == torch.float else { "atol": 1e-4, "rtol": 0.0 }) torch.manual_seed(1234) train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs) train_Y = ( torch.sin(train_X * 2 * pi) + torch.randn(*train_batch_shape, 3, 2, **tkwargs))[..., :m] train_Y = standardize(train_Y) model = SingleTaskGP( train_X, train_Y, ) if len(train_batch_shape) > 0: X_baseline = train_X[0] else: X_baseline = train_X model.load_state_dict(state_dict, strict=False) # test sampler with collapse_batch_dims=False sampler = IIDNormalSampler(5, seed=0, collapse_batch_dims=False) with self.assertRaises(UnsupportedError): qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler, objective=objective, prune_baseline=False, cache_root=True, ) sampler = IIDNormalSampler(5, seed=0) torch.manual_seed(0) acqf = qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler, objective=objective, prune_baseline=False, cache_root=True, ) orig_base_samples = acqf.base_sampler.base_samples.detach().clone() sampler2 = IIDNormalSampler(5, seed=0) sampler2.base_samples = orig_base_samples torch.manual_seed(0) acqf_no_cache = qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler2, objective=objective, prune_baseline=False, cache_root=False, ) for q, batch_shape in product( (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))): test_X = (0.3 + 0.05 * torch.randn(*batch_shape, q, 2, **tkwargs) ).requires_grad_(True) with mock.patch( sample_cached_path, wraps=sample_cached_cholesky) as mock_sample_cached: torch.manual_seed(0) val = acqf(test_X) mock_sample_cached.assert_called_once() val.sum().backward() base_samples = acqf.sampler.base_samples.detach().clone() X_grad = test_X.grad.clone() test_X2 = test_X.detach().clone().requires_grad_(True) acqf_no_cache.sampler.base_samples = base_samples with mock.patch( sample_cached_path, wraps=sample_cached_cholesky) as mock_sample_cached: torch.manual_seed(0) val2 = acqf_no_cache(test_X2) mock_sample_cached.assert_not_called() self.assertTrue(torch.allclose(val, val2, **all_close_kwargs)) val2.sum().backward() self.assertTrue( torch.allclose(X_grad, test_X2.grad, **all_close_kwargs)) # test we fall back to standard sampling for # ill-conditioned covariances acqf._baseline_L = torch.zeros_like(acqf._baseline_L) with warnings.catch_warnings( record=True) as ws, settings.debug(True): with torch.no_grad(): acqf(test_X) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning))