def __init__(self, num_features=None, ard_num_dims=1, batch_shape=torch.Size([]), signal_variance_constraint=None, fourier_features_constraint=None, **kwargs): super(SparseSpectrumKernel, self).__init__(ard_num_dims=ard_num_dims, batch_shape=batch_shape, **kwargs) if num_features is None: raise RuntimeError("num_features is a required argument") self.num_features = num_features if signal_variance_constraint is None: signal_variance_constraint = Positive() if fourier_features_constraint is None: fourier_features_constraint = Positive() self.register_parameter(name='raw_signal_variance', parameter=torch.nn.Parameter(torch.zeros(1))) ff_shape = torch.Size( [*self.batch_shape, self.num_features, 1, self.ard_num_dims]) self.register_parameter(name='raw_fourier_features', parameter=torch.nn.Parameter( torch.zeros(ff_shape))) self.register_constraint('raw_signal_variance', signal_variance_constraint) self.register_constraint('raw_fourier_features', fourier_features_constraint)
def __init__(self): super().__init__() ms_shape = torch.Size([1, 1]) self.register_parameter(name="raw_scale", parameter=torch.nn.Parameter(torch.zeros(ms_shape))) self.register_parameter(name="raw_mean", parameter=torch.nn.Parameter(torch.zeros(ms_shape))) self.register_constraint("raw_scale", Positive()) self.register_constraint("raw_mean", Positive())
def __init__(self, base_kernel, angle_prior: Optional[Prior] = None, radius_prior: Optional[Prior] = None, **kwargs): super(ArcKernel, self).__init__(has_lengthscale=True, **kwargs) if self.ard_num_dims is None: last_dim = 1 else: last_dim = self.ard_num_dims # TODO: check the errors given by interval angle_constraint = Positive() self.register_parameter( name="raw_angle", parameter=torch.nn.Parameter( torch.zeros(*self.batch_shape, 1, last_dim)), ) if angle_prior is not None: self.register_prior( "angle_prior", angle_prior, lambda: self.angle, lambda v: self._set_angle(v), ) self.register_constraint("raw_angle", angle_constraint) self.register_parameter( name="raw_radius", parameter=torch.nn.Parameter( torch.zeros(*self.batch_shape, 1, last_dim)), ) if radius_prior is not None: self.register_prior( "radius_prior", radius_prior, lambda: self.radius, lambda v: self._set_radius(v), ) radius_constraint = Positive() self.register_constraint("raw_radius", radius_constraint) self.base_kernel = base_kernel if self.base_kernel.has_lengthscale: self.base_kernel.lengthscale = 1 self.base_kernel.raw_lengthscale.requires_grad_(False)
def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_constraint=Positive()) # This test actually wants a noise < 1e-4 gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(-15)) likelihood.initialize(noise=exp(-15)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data with gpytorch.settings.debug(False): function_predictions = likelihood(gp_model(train_x)) self.assertAllClose(function_predictions.mean, train_y) self.assertAllClose(function_predictions.variance, torch.zeros_like(function_predictions.variance)) # It shouldn't fit much else though test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x)) self.assertAllClose(test_function_predictions.mean, torch.zeros_like(test_function_predictions.mean)) self.assertAllClose( test_function_predictions.variance, gp_model.covar_module.outputscale.expand_as(test_function_predictions.variance) )
def test_prior(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1), noise_constraint=Positive(), # Prior for this test is looser than default bound ) gp_model = ExactGPModel(None, None, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale" ) gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.base_kernel.initialize(lengthscale=1) likelihood.initialize(noise=0) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # The model should predict in prior mode function_predictions = likelihood(gp_model(train_x)) correct_variance = gp_model.covar_module.outputscale + likelihood.noise self.assertAllClose(function_predictions.mean, torch.full_like(function_predictions.mean, fill_value=1.5)) self.assertAllClose( function_predictions.variance, correct_variance.squeeze().expand_as(function_predictions.variance) )
def __init__( self, power_prior: Optional[Prior] = None, offset_prior: Optional[Prior] = None, power_constraint: Optional[Interval] = None, offset_constraint: Optional[Interval] = None, **kwargs ): super().__init__(has_lengthscale=True, **kwargs) if power_constraint is None: power_constraint = Positive() if offset_constraint is None: offset_constraint = Positive() self.register_parameter( name="raw_power", parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)), ) self.register_parameter( name="raw_offset", parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)), ) if power_prior is not None: self.register_prior( "power_prior", power_prior, lambda: self.power, lambda v: self._set_power(v), ) self.register_constraint("raw_power", offset_constraint) if offset_prior is not None: self.register_prior( "offset_prior", offset_prior, lambda: self.offset, lambda v: self._set_offset(v), ) self.register_constraint("raw_offset", offset_constraint)
def __init__(self, m, **kwargs): # self.m = m scale_constraint = LessThan(0.1) super(RBFConstraint, self).__init__(lengthscale_constraint=scale_constraint, **kwargs) outputscale = torch.zeros( *self.batch_shape) if len(self.batch_shape) else torch.tensor(0.0) self.register_parameter(name="raw_outputscale", parameter=torch.nn.Parameter(outputscale)) outputscale_constraint = Positive() self.register_constraint("raw_outputscale", outputscale_constraint) self.register_buffer("m", torch.tensor(m))
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1), noise_constraint=Positive(), ) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.rbf_covar_module.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.25), noise_constraint=Positive(), ) gp_model = ExactGPModel(train_x, train_y, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.rbf_covar_module.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale") gp_model.rbf_covar_module.initialize(lengthscale=exp(-10)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(-10)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.variance), 5e-3) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess( torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4)
def _train_gp_models(self, x, y2): X = torch.tensor(x) y2 = torch.tensor(y2) ll1 = GaussianLikelihood() ll2 = GaussianLikelihood(noise_constraint=Positive()) Xgrid = convert_to_xgrid_torch(X, self.transform).double() y1_pred, y1_latent = self.aux_model(Xgrid, return_latent=True) train_y1 = y1_latent if self.use_latent else y1_pred train_y1 = (train_y1.data[..., self.slice] - self.y1_lower) / (self.y1_upper - self.y1_lower) warm_gp = GPWarm(train_y1, y2, ll1) train(train_y1, y2, warm_gp, self.train_cf1) transform_input_fn = tensor_x_to_tensor_grid(self.transform) cold_gp = GPCold(X, y2, ll2, transform_input_fn=transform_input_fn) train(X, y2, cold_gp, self.train_cf2) return warm_gp, cold_gp
def __init__(self, n_elements, n_dimensions, prior_mean=0, prior_variance=1, share_variational_variance=False): super().__init__() self.prior = Normal(prior_mean, prior_variance**0.5) mean = self.prior.sample([n_elements, n_dimensions]) if share_variational_variance: raw_variance = torch.zeros((n_elements, 1)) else: raw_variance = torch.zeros_like(mean) self.constraint = Positive() self.register_parameter("variational_mean", Parameter(mean)) self.register_parameter("raw_variational_variance", Parameter(raw_variance)) self.variational_variance = torch.ones_like(self.variational_mean) self.input_dims = 0 self.output_dims = n_dimensions
def __init__(self, power_law_prior=None, power_law_constraint=None, **kwargs): super(RationalQuadraticKernel, self).__init__(has_lengthscale=True, **kwargs) self.register_parameter(name="raw_power_law", parameter=torch.nn.Parameter( torch.zeros(*self.batch_shape, 1, 1))) if power_law_constraint is None: power_law_constraint = Positive() if power_law_prior is not None: self.register_prior( "power_law_prior", power_law_prior, lambda: self.power_law, lambda v: self._set_power_law(v), ) self.register_constraint("raw_power_law", power_law_constraint)
def __init__(self, active_dim, period_length_prior=None, period_length_constraint=None, **kwargs): super(MyCustomPeriodicKernel, self).__init__(**kwargs) if period_length_constraint is None: period_length_constraint = Positive() self._my_active_dim = active_dim self.register_parameter(name="raw_period_length", parameter=torch.nn.Parameter( torch.zeros(*self.batch_shape, 1, 1))) if period_length_prior is not None: self.register_prior( "period_length_prior", period_length_prior, lambda: self.period_length, lambda v: self._set_period_length(v), ) self.register_constraint("raw_period_length", period_length_constraint)
def __init__( self, datapoints: Tensor, comparisons: Tensor, covar_module: Optional[Module] = None, noise_module: Optional[HomoskedasticNoise] = None, **kwargs, ) -> None: super().__init__() r"""A probit-likelihood GP with Laplace approximation model. A probit-likelihood GP with Laplace approximation model that learns via pairwise comparison data. By default it uses a scaled-RBF kernel. Args: datapoints: A `batch_shape x n x d` tensor of training features. comparisons: A `batch_shape x m x 2` training comparisons; comparisons[i] is a noisy indicator suggesting the utility value of comparisons[i, 0]-th is greater than comparisons[i, 1]-th. covar_module: Covariance module noise_module: Noise module """ # Compatibility variables with fit_gpytorch_*: Dummy likelihood # Likelihood is tightly tied with this model and # it doesn't make much sense to keep it separate self.likelihood = None # TODO: remove these variables from `state_dict()` so that when calling # `load_state_dict()`, only the hyperparameters are copied over self.register_buffer("datapoints", None) self.register_buffer("comparisons", None) self.register_buffer("utility", None) self.register_buffer("covar_chol", None) self.register_buffer("likelihood_hess", None) self.register_buffer("hlcov_eye", None) self.register_buffer("covar", None) self.register_buffer("covar_inv", None) self.train_inputs = [] self.train_targets = None self.pred_cov_fac_need_update = True self._input_batch_shape = torch.Size() self.dim = None # will be set to match datapoints' dtype and device # since scipy.optimize.fsolve only works on cpu, it'd be the # fastest to fit the model on cpu and take samples on gpu to avoid # overhead of moving data back and forth during fitting time self.tkwargs = {} # See set_train_data for additional compatibility variables self.set_train_data(datapoints, comparisons, update_model=False) # Set optional parameters # jitter to add for numerical stability self._jitter = kwargs.get("jitter", 1e-6) # Clamping z lim for better numerical stability. See self._calc_z for detail # norm_cdf(z=3) ~= 0.999, top 0.1% percent self._zlim = kwargs.get("zlim", 3) # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update() # If None, set to 1e-6 by default in _update self._xtol = kwargs.get("xtol") # The maximum number of calls to the function in scipy.optimize.fsolve # If None, set to 100 by default in _update # If zero, then 100*(N+1) is used by default by fsolve; self._maxfev = kwargs.get("maxfev") # Set hyperparameters # Do not set the batch_shape explicitly so mean_module can operate in both mode # once fsolve used in _update can run in batch mode, we should explicitly set # the bacth shape here self.mean_module = ConstantMean() # Do not optimize constant mean prior for param in self.mean_module.parameters(): param.requires_grad = False # set covariance module if noise_module is None: noise_module = HomoskedasticNoise( noise_prior=SmoothedBoxPrior(-5, 5, 0.5, transform=torch.log), noise_constraint=GreaterThan(1e-4), # if None, 1e-4 by default batch_shape=self._input_batch_shape, ) self.noise_module = noise_module # set covariance module if covar_module is None: ls_prior = GammaPrior(1.2, 0.5) ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate covar_module = RBFKernel( batch_shape=self._input_batch_shape, ard_num_dims=self.dim, lengthscale_prior=ls_prior, lengthscale_constraint=Positive(transform=None, initial_value=ls_prior_mode), ) self.covar_module = covar_module self._x0 = None # will store temporary results for warm-starting if self.datapoints is not None and self.comparisons is not None: self.to(dtype=self.datapoints.dtype, device=self.datapoints.device) self._update() # Find f_map for initial parameters self.to(self.datapoints)
def __init__( self, decomposition: Dict[str, List[int]], batch_shape: torch.Size, train_embedding: bool = True, cat_feature_dict: Optional[Dict] = None, embs_feature_dict: Optional[Dict] = None, embs_dim_list: Optional[List[int]] = None, context_weight_dict: Optional[Dict] = None, device: Optional[torch.device] = None, ) -> None: super().__init__(batch_shape=batch_shape) self.decomposition = decomposition self.batch_shape = batch_shape self.train_embedding = train_embedding self.device = device num_param = len(next(iter(decomposition.values()))) self.context_list = list(decomposition.keys()) self.num_contexts = len(self.context_list) # get parameter space decomposition for active_parameters in decomposition.values(): # check number of parameters are same in each decomp if len(active_parameters) != num_param: raise ValueError( "num of parameters needs to be same across all contexts") self._indexers = { context: torch.tensor(active_params, device=self.device) for context, active_params in self.decomposition.items() } # get context features and set emb dim self.context_cat_feature = None self.context_emb_feature = None self.n_embs = 0 self.emb_weight_matrix_list = None self.emb_dims = None self._set_context_features( cat_feature_dict=cat_feature_dict, embs_feature_dict=embs_feature_dict, embs_dim_list=embs_dim_list, ) # contruct embedding layer if train_embedding: self._set_emb_layers() # task covariance matrix self.task_covar_module = MaternKernel( nu=2.5, ard_num_dims=self.n_embs, batch_shape=batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ) # base kernel self.base_kernel = MaternKernel( nu=2.5, ard_num_dims=num_param, batch_shape=batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ) # outputscales for each context (note this is like sqrt of outputscale) self.context_weight = None if context_weight_dict is None: outputscale_list = torch.zeros(*batch_shape, self.num_contexts, device=self.device) else: outputscale_list = torch.zeros(*batch_shape, 1, device=self.device) self.context_weight = torch.tensor( [context_weight_dict[c] for c in self.context_list], device=self.device) self.register_parameter(name="raw_outputscale_list", parameter=torch.nn.Parameter(outputscale_list)) self.register_prior( "outputscale_list_prior", GammaPrior(2.0, 15.0), lambda m: m.outputscale_list, lambda m, v: m._set_outputscale_list(v), ) self.register_constraint("raw_outputscale_list", Positive())
def __init__( # noqa C901 self, fidelity_dims: List[int], dimension: Optional[int] = None, power_prior: Optional[Prior] = None, power_constraint: Optional[Interval] = None, nu: float = 2.5, lengthscale_prior_unbiased: Optional[Prior] = None, lengthscale_prior_biased: Optional[Prior] = None, lengthscale_constraint_unbiased: Optional[Interval] = None, lengthscale_constraint_biased: Optional[Interval] = None, covar_module_unbiased: Optional[Kernel] = None, covar_module_biased: Optional[Kernel] = None, **kwargs: Any, ) -> None: if dimension is None and kwargs.get("active_dims") is None: raise UnsupportedError( "Must specify dimension when not specifying active_dims.") n_fidelity = len(fidelity_dims) if len(set(fidelity_dims)) != n_fidelity: raise ValueError("fidelity_dims must not have repeated elements") if n_fidelity not in {1, 2}: raise UnsupportedError( "LinearTruncatedFidelityKernel accepts either one or two" "fidelity parameters.") if nu not in {0.5, 1.5, 2.5}: raise ValueError("nu must be one of 0.5, 1.5, or 2.5") super().__init__(**kwargs) self.fidelity_dims = fidelity_dims if power_constraint is None: power_constraint = Positive() if lengthscale_prior_unbiased is None: lengthscale_prior_unbiased = GammaPrior(3, 6) if lengthscale_prior_biased is None: lengthscale_prior_biased = GammaPrior(6, 2) if lengthscale_constraint_unbiased is None: lengthscale_constraint_unbiased = Positive() if lengthscale_constraint_biased is None: lengthscale_constraint_biased = Positive() self.register_parameter( name="raw_power", parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)), ) self.register_constraint("raw_power", power_constraint) if power_prior is not None: self.register_prior( "power_prior", power_prior, lambda: self.power, lambda v: self._set_power(v), ) if self.active_dims is not None: dimension = len(self.active_dims) if covar_module_unbiased is None: covar_module_unbiased = MaternKernel( nu=nu, batch_shape=self.batch_shape, lengthscale_prior=lengthscale_prior_unbiased, ard_num_dims=dimension - n_fidelity, lengthscale_constraint=lengthscale_constraint_unbiased, ) if covar_module_biased is None: covar_module_biased = MaternKernel( nu=nu, batch_shape=self.batch_shape, lengthscale_prior=lengthscale_prior_biased, ard_num_dims=dimension - n_fidelity, lengthscale_constraint=lengthscale_constraint_biased, ) self.covar_module_unbiased = covar_module_unbiased self.covar_module_biased = covar_module_biased
def __init__( self, dimension: int = 3, nu: float = 2.5, train_iteration_fidelity: bool = True, train_data_fidelity: bool = True, lengthscale_prior: Optional[Prior] = None, power_prior: Optional[Prior] = None, power_constraint: Optional[Interval] = None, lengthscale_2_prior: Optional[Prior] = None, lengthscale_2_constraint: Optional[Interval] = None, lengthscale_constraint: Optional[Interval] = None, covar_module_1: Optional[Kernel] = None, covar_module_2: Optional[Kernel] = None, **kwargs: Any, ): if not train_iteration_fidelity and not train_data_fidelity: raise UnsupportedError( "You should have at least one fidelity parameter.") if nu not in {0.5, 1.5, 2.5}: raise ValueError("nu expected to be 0.5, 1.5, or 2.5") super().__init__(**kwargs) self.train_iteration_fidelity = train_iteration_fidelity self.train_data_fidelity = train_data_fidelity if power_constraint is None: power_constraint = Positive() if lengthscale_prior is None: lengthscale_prior = GammaPrior(3, 6) if lengthscale_2_prior is None: lengthscale_2_prior = GammaPrior(6, 2) if lengthscale_constraint is None: lengthscale_constraint = Positive() if lengthscale_2_constraint is None: lengthscale_2_constraint = Positive() self.register_parameter( name="raw_power", parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)), ) if power_prior is not None: self.register_prior( "power_prior", power_prior, lambda: self.power, lambda v: self._set_power(v), ) self.register_constraint("raw_power", power_constraint) m = self.train_iteration_fidelity + self.train_data_fidelity if self.active_dims is not None: dimension = len(self.active_dims) if covar_module_1 is None: self.covar_module_1 = MaternKernel( nu=nu, batch_shape=self.batch_shape, lengthscale_prior=lengthscale_prior, ard_num_dims=dimension - m, lengthscale_constraint=lengthscale_constraint, ) else: self.covar_module_1 = covar_module_1 if covar_module_2 is None: self.covar_module_2 = MaternKernel( nu=nu, batch_shape=self.batch_shape, lengthscale_prior=lengthscale_2_prior, ard_num_dims=dimension - m, lengthscale_constraint=lengthscale_2_constraint, ) else: self.covar_module_2 = covar_module_2
def __init__( self, datapoints: Tensor, comparisons: Tensor, covar_module: Optional[Module] = None, input_transform: Optional[InputTransform] = None, **kwargs, ) -> None: r"""A probit-likelihood GP with Laplace approximation model that learns via pairwise comparison data. By default it uses a scaled RBF kernel. Args: datapoints: A `batch_shape x n x d` tensor of training features. comparisons: A `batch_shape x m x 2` training comparisons; comparisons[i] is a noisy indicator suggesting the utility value of comparisons[i, 0]-th is greater than comparisons[i, 1]-th. covar_module: Covariance module. input_transform: An input transform that is applied in the model's forward pass. """ super().__init__() if input_transform is not None: input_transform.to(datapoints) # input transformation is applied in set_train_data self.input_transform = input_transform # Compatibility variables with fit_gpytorch_*: Dummy likelihood # Likelihood is tightly tied with this model and # it doesn't make much sense to keep it separate self.likelihood = None # TODO: remove these variables from `state_dict()` so that when calling # `load_state_dict()`, only the hyperparameters are copied over self.register_buffer("datapoints", None) self.register_buffer("comparisons", None) self.register_buffer("D", None) self.register_buffer("DT", None) self.register_buffer("utility", None) self.register_buffer("covar_chol", None) self.register_buffer("likelihood_hess", None) self.register_buffer("hlcov_eye", None) self.register_buffer("covar", None) self.register_buffer("covar_inv", None) self.train_inputs = [] self.train_targets = None self.pred_cov_fac_need_update = True self.dim = None # See set_train_data for additional compatibility variables. # Not that the datapoints here are not transformed even if input_transform # is not None to avoid double transformation during model fitting. # self.transform_inputs is called in `forward` self.set_train_data(datapoints, comparisons, update_model=False) # Set optional parameters # jitter to add for numerical stability self._jitter = kwargs.get("jitter", 1e-6) # Clamping z lim for better numerical stability. See self._calc_z for detail # norm_cdf(z=3) ~= 0.999, top 0.1% percent self._zlim = kwargs.get("zlim", 3) # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update() # If None, set to 1e-6 by default in _update self._xtol = kwargs.get("xtol") # The maximum number of calls to the function in scipy.optimize.fsolve # If None, set to 100 by default in _update # If zero, then 100*(N+1) is used by default by fsolve; self._maxfev = kwargs.get("maxfev") # Set hyperparameters # Do not set the batch_shape explicitly so mean_module can operate in both mode # once fsolve used in _update can run in batch mode, we should explicitly set # the bacth shape here self.mean_module = ConstantMean() # Do not optimize constant mean prior for param in self.mean_module.parameters(): param.requires_grad = False # set covariance module # the default outputscale here is only a rule of thumb, meant to keep # estimates away from scale value that would make Phi(f(x)) saturate # at 0 or 1 if covar_module is None: ls_prior = GammaPrior(1.2, 0.5) ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate covar_module = ScaleKernel( RBFKernel( batch_shape=self.batch_shape, ard_num_dims=self.dim, lengthscale_prior=ls_prior, lengthscale_constraint=Positive( transform=None, initial_value=ls_prior_mode), ), outputscale_prior=SmoothedBoxPrior(a=1, b=4), ) self.covar_module = covar_module self._x0 = None # will store temporary results for warm-starting if self.datapoints is not None and self.comparisons is not None: self.to(dtype=self.datapoints.dtype, device=self.datapoints.device) # Find f_map for initial parameters with transformed datapoints transformed_dp = self.transform_inputs(datapoints) self._update(transformed_dp) self.to(self.datapoints)