def test_initialize_covar_module(self): kernel = LinearTruncatedFidelityKernel() self.assertTrue(isinstance(kernel.covar_module_1, MaternKernel)) self.assertTrue(isinstance(kernel.covar_module_2, MaternKernel)) kernel.covar_module_1 = RBFKernel() kernel.covar_module_2 = RBFKernel() self.assertTrue(isinstance(kernel.covar_module_1, RBFKernel)) self.assertTrue(isinstance(kernel.covar_module_2, RBFKernel)) kernel2 = LinearTruncatedFidelityKernel(covar_module_1=RBFKernel(), covar_module_2=RBFKernel()) self.assertTrue(isinstance(kernel2.covar_module_1, RBFKernel)) self.assertTrue(isinstance(kernel2.covar_module_2, RBFKernel))
def test_manual_bounds(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=device, dtype=dtype) train_y = torch.tensor([4.0], device=device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # test the basic case x, pdict, bounds = module_to_array( module=mll, bounds={"model.covar_module.raw_lengthscale": (0.1, None)} ) self.assertTrue(np.array_equal(x, np.zeros(5))) expected_sizes = { "likelihood.noise_covar.raw_noise": torch.Size([1]), "model.covar_module.raw_lengthscale": torch.Size([1, 3]), "model.mean_module.constant": torch.Size([1]), } self.assertEqual(set(pdict.keys()), set(expected_sizes.keys())) for pname, val in pdict.items(): self.assertEqual(val.dtype, dtype) self.assertEqual(val.shape, expected_sizes[pname]) self.assertEqual(val.device.type, device.type) lower_exp = np.full_like(x, 0.1) for p in ("likelihood.noise_covar.raw_noise", "model.mean_module.constant"): lower_exp[_get_index(pdict, p)] = -np.inf self.assertTrue(np.equal(bounds[0], lower_exp).all()) self.assertTrue(np.equal(bounds[1], np.full_like(x, np.inf)).all())
def test_exclude(self): for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=self.device, dtype=dtype) train_y = torch.tensor([4.0], device=self.device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=self.device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # test the basic case x, pdict, bounds = module_to_array( module=mll, exclude={"model.mean_module.constant"}) self.assertTrue(np.array_equal(x, np.zeros(4))) expected_sizes = { "likelihood.noise_covar.raw_noise": torch.Size([1]), "model.covar_module.raw_lengthscale": torch.Size([1, 3]), } self.assertEqual(set(pdict.keys()), set(expected_sizes.keys())) for pname, val in pdict.items(): self.assertEqual(val.dtype, dtype) self.assertEqual(val.shape, expected_sizes[pname]) self.assertEqual(val.device.type, self.device.type) self.assertIsNone(bounds)
def test_initialize_covar_module(self): kernel = LinearTruncatedFidelityKernel(fidelity_dims=[1, 2], dimension=3) self.assertTrue(isinstance(kernel.covar_module_unbiased, MaternKernel)) self.assertTrue(isinstance(kernel.covar_module_biased, MaternKernel)) kernel.covar_module_unbiased = RBFKernel() kernel.covar_module_biased = RBFKernel() self.assertTrue(isinstance(kernel.covar_module_unbiased, RBFKernel)) self.assertTrue(isinstance(kernel.covar_module_biased, RBFKernel)) kernel2 = LinearTruncatedFidelityKernel( fidelity_dims=[1, 2], dimension=3, covar_module_unbiased=RBFKernel(), covar_module_biased=RBFKernel(), ) self.assertTrue(isinstance(kernel2.covar_module_unbiased, RBFKernel)) self.assertTrue(isinstance(kernel2.covar_module_biased, RBFKernel))
def __init__( self, train_X: Tensor, train_Y: Tensor, train_iteration_fidelity: bool = True, train_data_fidelity: bool = True, likelihood: Optional[Likelihood] = None, ) -> None: train_X, train_Y, _ = self._set_dimensions(train_X=train_X, train_Y=train_Y) num_fidelity = train_iteration_fidelity + train_data_fidelity ard_num_dims = train_X.shape[-1] - num_fidelity active_dimsX = list(range(train_X.shape[-1] - num_fidelity)) rbf_kernel = RBFKernel( ard_num_dims=ard_num_dims, batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), active_dims=active_dimsX, ) exp_kernel = ExpDecayKernel( batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), ) ds_kernel = DownsamplingKernel( batch_shape=self._aug_batch_shape, offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), ) if train_iteration_fidelity and train_data_fidelity: active_dimsS1 = [train_X.shape[-1] - 1] active_dimsS2 = [train_X.shape[-1] - 2] exp_kernel.active_dims = torch.tensor(active_dimsS1) ds_kernel.active_dims = torch.tensor(active_dimsS2) kernel = rbf_kernel * exp_kernel * ds_kernel elif train_iteration_fidelity or train_data_fidelity: active_dimsS = [train_X.shape[-1] - 1] if train_iteration_fidelity: exp_kernel.active_dims = torch.tensor(active_dimsS) kernel = rbf_kernel * exp_kernel else: ds_kernel.active_dims = torch.tensor(active_dimsS) kernel = rbf_kernel * ds_kernel else: raise UnsupportedError( "You should have at least one fidelity parameter.") covar_module = ScaleKernel( kernel, batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) super().__init__(train_X=train_X, train_Y=train_Y, covar_module=covar_module) self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, task_feature: int, context_cat_feature: Optional[Tensor] = None, context_emb_feature: Optional[Tensor] = None, embs_dim_list: Optional[List[int]] = None, output_tasks: Optional[List[int]] = None, input_transform: Optional[InputTransform] = None, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: super().__init__( train_X=train_X, train_Y=train_Y, task_feature=task_feature, output_tasks=output_tasks, input_transform=input_transform, outcome_transform=outcome_transform, ) self.device = train_X.device # context indices all_tasks = train_X[:, task_feature].unique() self.all_tasks = all_tasks.to(dtype=torch.long).tolist() self.all_tasks.sort() # unique in python does automatic sort; add for safety if context_cat_feature is None: context_cat_feature = all_tasks.unsqueeze(-1).to(device=self.device) self.context_cat_feature = context_cat_feature # row indices = context indices self.context_emb_feature = context_emb_feature # construct emb_dims based on categorical features if embs_dim_list is None: # set embedding_dim = 1 for each categorical variable embs_dim_list = [1 for _i in range(context_cat_feature.size(1))] n_embs = sum(embs_dim_list) self.emb_dims = [ (len(context_cat_feature[:, i].unique()), embs_dim_list[i]) for i in range(context_cat_feature.size(1)) ] # contruct embedding layer: need to handle multiple categorical features self.emb_layers = ModuleList( [ torch.nn.Embedding(num_embeddings=x, embedding_dim=y, max_norm=1.0) for x, y in self.emb_dims ] ) self.task_covar_module = RBFKernel( ard_num_dims=n_embs, lengthscale_constraint=Interval( 0.0, 2.0, transform=None, initial_value=1.0 ), ) self.to(train_X)
def test_set_parameters(self): for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=self.device, dtype=dtype) train_y = torch.tensor([4.0], device=self.device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=self.device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # get parameters x, pdict, bounds = module_to_array(module=mll) # Set parameters mll = set_params_with_array(mll, np.array([1.0, 2.0, 3.0, 4.0, 5.0]), pdict) z = dict(mll.named_parameters()) self.assertTrue( torch.equal( z["likelihood.noise_covar.raw_noise"], torch.tensor([1.0], device=self.device, dtype=dtype), )) self.assertTrue( torch.equal( z["model.covar_module.raw_lengthscale"], torch.tensor([[2.0, 3.0, 4.0]], device=self.device, dtype=dtype), )) self.assertTrue( torch.equal( z["model.mean_module.constant"], torch.tensor([5.0], device=self.device, dtype=dtype), )) # Extract again x2, pdict2, bounds2 = module_to_array(module=mll) self.assertTrue( np.array_equal(x2, np.array([1.0, 2.0, 3.0, 4.0, 5.0])))
def __init__(self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor) -> None: self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, train_Yvar = self._transform_tensor_args( X=train_X, Y=train_Y, Yvar=train_Yvar) likelihood = FixedNoiseGaussianLikelihood( noise=train_Yvar, batch_shape=self._aug_batch_shape) ExactGP.__init__(self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) self.covar_module = ScaleKernel( base_kernel=RBFKernel( ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, ), batch_shape=self._aug_batch_shape, ) self.to(train_X)
def __init__( self, datapoints: Tensor, comparisons: Tensor, covar_module: Optional[Module] = None, noise_module: Optional[HomoskedasticNoise] = None, **kwargs, ) -> None: super().__init__() r"""A probit-likelihood GP with Laplace approximation model. A probit-likelihood GP with Laplace approximation model that learns via pairwise comparison data. By default it uses a scaled-RBF kernel. Args: datapoints: A `batch_shape x n x d` tensor of training features. comparisons: A `batch_shape x m x 2` training comparisons; comparisons[i] is a noisy indicator suggesting the utility value of comparisons[i, 0]-th is greater than comparisons[i, 1]-th. covar_module: Covariance module noise_module: Noise module """ # Compatibility variables with fit_gpytorch_*: Dummy likelihood # Likelihood is tightly tied with this model and # it doesn't make much sense to keep it separate self.likelihood = None # TODO: remove these variables from `state_dict()` so that when calling # `load_state_dict()`, only the hyperparameters are copied over self.register_buffer("datapoints", None) self.register_buffer("comparisons", None) self.register_buffer("utility", None) self.register_buffer("covar_chol", None) self.register_buffer("likelihood_hess", None) self.register_buffer("hlcov_eye", None) self.register_buffer("covar", None) self.register_buffer("covar_inv", None) self.train_inputs = [] self.train_targets = None self.pred_cov_fac_need_update = True self._input_batch_shape = torch.Size() self.dim = None # will be set to match datapoints' dtype and device # since scipy.optimize.fsolve only works on cpu, it'd be the # fastest to fit the model on cpu and take samples on gpu to avoid # overhead of moving data back and forth during fitting time self.tkwargs = {} # See set_train_data for additional compatibility variables self.set_train_data(datapoints, comparisons, update_model=False) # Set optional parameters # jitter to add for numerical stability self._jitter = kwargs.get("jitter", 1e-6) # Clamping z lim for better numerical stability. See self._calc_z for detail # norm_cdf(z=3) ~= 0.999, top 0.1% percent self._zlim = kwargs.get("zlim", 3) # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update() # If None, set to 1e-6 by default in _update self._xtol = kwargs.get("xtol") # The maximum number of calls to the function in scipy.optimize.fsolve # If None, set to 100 by default in _update # If zero, then 100*(N+1) is used by default by fsolve; self._maxfev = kwargs.get("maxfev") # Set hyperparameters # Do not set the batch_shape explicitly so mean_module can operate in both mode # once fsolve used in _update can run in batch mode, we should explicitly set # the bacth shape here self.mean_module = ConstantMean() # Do not optimize constant mean prior for param in self.mean_module.parameters(): param.requires_grad = False # set covariance module if noise_module is None: noise_module = HomoskedasticNoise( noise_prior=SmoothedBoxPrior(-5, 5, 0.5, transform=torch.log), noise_constraint=GreaterThan(1e-4), # if None, 1e-4 by default batch_shape=self._input_batch_shape, ) self.noise_module = noise_module # set covariance module if covar_module is None: ls_prior = GammaPrior(1.2, 0.5) ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate covar_module = RBFKernel( batch_shape=self._input_batch_shape, ard_num_dims=self.dim, lengthscale_prior=ls_prior, lengthscale_constraint=Positive(transform=None, initial_value=ls_prior_mode), ) self.covar_module = covar_module self._x0 = None # will store temporary results for warm-starting if self.datapoints is not None and self.comparisons is not None: self.to(dtype=self.datapoints.dtype, device=self.datapoints.device) self._update() # Find f_map for initial parameters self.to(self.datapoints)
def _setup_multifidelity_covar_module( dim: int, aug_batch_shape: torch.Size, iteration_fidelity: Optional[int], data_fidelity: Optional[int], linear_truncated: bool, nu: float, ) -> Tuple[ScaleKernel, Dict]: """Helper function to get the covariance module and associated subset_batch_dict for the multifidelity setting. Args: dim: The dimensionality of the training data. aug_batch_shape: The output-augmented batch shape as defined in `BatchedMultiOutputGPyTorchModel`. iteration_fidelity: The column index for the training iteration fidelity parameter (optional). data_fidelity: The column index for the downsampling fidelity parameter (optional). linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead of the default kernel. nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or 5/2. Only used when `linear_truncated=True`. Returns: The covariance module and subset_batch_dict. """ if iteration_fidelity is not None and iteration_fidelity < 0: iteration_fidelity = dim + iteration_fidelity if data_fidelity is not None and data_fidelity < 0: data_fidelity = dim + data_fidelity if linear_truncated: fidelity_dims = [ i for i in (iteration_fidelity, data_fidelity) if i is not None ] kernel = LinearTruncatedFidelityKernel( fidelity_dims=fidelity_dims, dimension=dim, nu=nu, batch_shape=aug_batch_shape, power_prior=GammaPrior(3.0, 3.0), ) else: active_dimsX = [ i for i in range(dim) if i not in {iteration_fidelity, data_fidelity} ] kernel = RBFKernel( ard_num_dims=len(active_dimsX), batch_shape=aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), active_dims=active_dimsX, ) additional_kernels = [] if iteration_fidelity is not None: exp_kernel = ExponentialDecayKernel( batch_shape=aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), active_dims=[iteration_fidelity], ) additional_kernels.append(exp_kernel) if data_fidelity is not None: ds_kernel = DownsamplingKernel( batch_shape=aug_batch_shape, offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), active_dims=[data_fidelity], ) additional_kernels.append(ds_kernel) kernel = ProductKernel(kernel, *additional_kernels) covar_module = ScaleKernel(kernel, batch_shape=aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15)) if linear_truncated: subset_batch_dict = { "covar_module.base_kernel.raw_power": -2, "covar_module.base_kernel.covar_module_unbiased.raw_lengthscale": -3, "covar_module.base_kernel.covar_module_biased.raw_lengthscale": -3, } else: subset_batch_dict = { "covar_module.base_kernel.kernels.0.raw_lengthscale": -3, "covar_module.base_kernel.kernels.1.raw_power": -2, "covar_module.base_kernel.kernels.1.raw_offset": -2, } if iteration_fidelity is not None: subset_batch_dict = { "covar_module.base_kernel.kernels.1.raw_lengthscale": -3, **subset_batch_dict, } if data_fidelity is not None: subset_batch_dict = { "covar_module.base_kernel.kernels.2.raw_power": -2, "covar_module.base_kernel.kernels.2.raw_offset": -2, **subset_batch_dict, } return covar_module, subset_batch_dict
def __init__( self, datapoints: Tensor, comparisons: Tensor, covar_module: Optional[Module] = None, input_transform: Optional[InputTransform] = None, **kwargs, ) -> None: r"""A probit-likelihood GP with Laplace approximation model that learns via pairwise comparison data. By default it uses a scaled RBF kernel. Args: datapoints: A `batch_shape x n x d` tensor of training features. comparisons: A `batch_shape x m x 2` training comparisons; comparisons[i] is a noisy indicator suggesting the utility value of comparisons[i, 0]-th is greater than comparisons[i, 1]-th. covar_module: Covariance module. input_transform: An input transform that is applied in the model's forward pass. """ super().__init__() if input_transform is not None: input_transform.to(datapoints) # input transformation is applied in set_train_data self.input_transform = input_transform # Compatibility variables with fit_gpytorch_*: Dummy likelihood # Likelihood is tightly tied with this model and # it doesn't make much sense to keep it separate self.likelihood = None # TODO: remove these variables from `state_dict()` so that when calling # `load_state_dict()`, only the hyperparameters are copied over self.register_buffer("datapoints", None) self.register_buffer("comparisons", None) self.register_buffer("D", None) self.register_buffer("DT", None) self.register_buffer("utility", None) self.register_buffer("covar_chol", None) self.register_buffer("likelihood_hess", None) self.register_buffer("hlcov_eye", None) self.register_buffer("covar", None) self.register_buffer("covar_inv", None) self.train_inputs = [] self.train_targets = None self.pred_cov_fac_need_update = True self.dim = None # See set_train_data for additional compatibility variables. # Not that the datapoints here are not transformed even if input_transform # is not None to avoid double transformation during model fitting. # self.transform_inputs is called in `forward` self.set_train_data(datapoints, comparisons, update_model=False) # Set optional parameters # jitter to add for numerical stability self._jitter = kwargs.get("jitter", 1e-6) # Clamping z lim for better numerical stability. See self._calc_z for detail # norm_cdf(z=3) ~= 0.999, top 0.1% percent self._zlim = kwargs.get("zlim", 3) # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update() # If None, set to 1e-6 by default in _update self._xtol = kwargs.get("xtol") # The maximum number of calls to the function in scipy.optimize.fsolve # If None, set to 100 by default in _update # If zero, then 100*(N+1) is used by default by fsolve; self._maxfev = kwargs.get("maxfev") # Set hyperparameters # Do not set the batch_shape explicitly so mean_module can operate in both mode # once fsolve used in _update can run in batch mode, we should explicitly set # the bacth shape here self.mean_module = ConstantMean() # Do not optimize constant mean prior for param in self.mean_module.parameters(): param.requires_grad = False # set covariance module # the default outputscale here is only a rule of thumb, meant to keep # estimates away from scale value that would make Phi(f(x)) saturate # at 0 or 1 if covar_module is None: ls_prior = GammaPrior(1.2, 0.5) ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate covar_module = ScaleKernel( RBFKernel( batch_shape=self.batch_shape, ard_num_dims=self.dim, lengthscale_prior=ls_prior, lengthscale_constraint=Positive( transform=None, initial_value=ls_prior_mode), ), outputscale_prior=SmoothedBoxPrior(a=1, b=4), ) self.covar_module = covar_module self._x0 = None # will store temporary results for warm-starting if self.datapoints is not None and self.comparisons is not None: self.to(dtype=self.datapoints.dtype, device=self.datapoints.device) # Find f_map for initial parameters with transformed datapoints transformed_dp = self.transform_inputs(datapoints) self._update(transformed_dp) self.to(self.datapoints)
def __init__( self, train_X: Tensor, train_Y: Tensor, iteration_fidelity: Optional[int] = None, data_fidelity: Optional[int] = None, linear_truncated: bool = True, nu: float = 2.5, likelihood: Optional[Likelihood] = None, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: self._init_args = { "iteration_fidelity": iteration_fidelity, "data_fidelity": data_fidelity, "linear_truncated": linear_truncated, "nu": nu, "outcome_transform": outcome_transform, } if iteration_fidelity is None and data_fidelity is None: raise UnsupportedError( "SingleTaskMultiFidelityGP requires at least one fidelity parameter." ) if iteration_fidelity is not None and iteration_fidelity < 0: iteration_fidelity = train_X.size(-1) + iteration_fidelity if data_fidelity is not None and data_fidelity < 0: data_fidelity = train_X.size(-1) + data_fidelity self._set_dimensions(train_X=train_X, train_Y=train_Y) if linear_truncated: fidelity_dims = [ i for i in (iteration_fidelity, data_fidelity) if i is not None ] kernel = LinearTruncatedFidelityKernel( fidelity_dims=fidelity_dims, dimension=train_X.size(-1), nu=nu, batch_shape=self._aug_batch_shape, power_prior=GammaPrior(3.0, 3.0), ) else: active_dimsX = [ i for i in range(train_X.size(-1)) if i not in {iteration_fidelity, data_fidelity} ] kernel = RBFKernel( ard_num_dims=len(active_dimsX), batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), active_dims=active_dimsX, ) additional_kernels = [] if iteration_fidelity is not None: exp_kernel = ExponentialDecayKernel( batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), active_dims=[iteration_fidelity], ) additional_kernels.append(exp_kernel) if data_fidelity is not None: ds_kernel = DownsamplingKernel( batch_shape=self._aug_batch_shape, offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), active_dims=[data_fidelity], ) additional_kernels.append(ds_kernel) kernel = ProductKernel(kernel, *additional_kernels) covar_module = ScaleKernel( kernel, batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) super().__init__( train_X=train_X, train_Y=train_Y, covar_module=covar_module, outcome_transform=outcome_transform, ) if linear_truncated: subset_batch_dict = { "covar_module.base_kernel.raw_power": -2, "covar_module.base_kernel.covar_module_unbiased.raw_lengthscale": -3, "covar_module.base_kernel.covar_module_biased.raw_lengthscale": -3, } else: subset_batch_dict = { "covar_module.base_kernel.kernels.0.raw_lengthscale": -3, "covar_module.base_kernel.kernels.1.raw_power": -2, "covar_module.base_kernel.kernels.1.raw_offset": -2, } if iteration_fidelity is not None: subset_batch_dict = { "covar_module.base_kernel.kernels.1.raw_lengthscale": -3, **subset_batch_dict, } if data_fidelity is not None: subset_batch_dict = { "covar_module.base_kernel.kernels.2.raw_power": -2, "covar_module.base_kernel.kernels.2.raw_offset": -2, **subset_batch_dict, } self._subset_batch_dict = { "likelihood.noise_covar.raw_noise": -2, "mean_module.constant": -2, "covar_module.raw_outputscale": -1, **subset_batch_dict, } self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, iteration_fidelity: Optional[int] = None, data_fidelity: Optional[int] = None, linear_truncated: bool = True, nu: float = 2.5, likelihood: Optional[Likelihood] = None, ) -> None: if iteration_fidelity is None and data_fidelity is None: raise UnsupportedError( "SingleTaskMultiFidelityGP requires at least one fidelity parameter." ) if iteration_fidelity is not None and iteration_fidelity < 0: iteration_fidelity = train_X.size(-1) + iteration_fidelity if data_fidelity is not None and data_fidelity < 0: data_fidelity = train_X.size(-1) + data_fidelity self._set_dimensions(train_X=train_X, train_Y=train_Y) if linear_truncated: fidelity_dims = [ i for i in (iteration_fidelity, data_fidelity) if i is not None ] kernel = LinearTruncatedFidelityKernel( fidelity_dims=fidelity_dims, dimension=train_X.size(-1), nu=nu, batch_shape=self._aug_batch_shape, power_prior=GammaPrior(3.0, 3.0), ) else: active_dimsX = [ i for i in range(train_X.size(-1)) if i not in {iteration_fidelity, data_fidelity} ] kernel = RBFKernel( ard_num_dims=len(active_dimsX), batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), active_dims=active_dimsX, ) additional_kernels = [] if iteration_fidelity is not None: exp_kernel = ExponentialDecayKernel( batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), active_dims=[iteration_fidelity], ) additional_kernels.append(exp_kernel) if data_fidelity is not None: ds_kernel = DownsamplingKernel( batch_shape=self._aug_batch_shape, offset_prior=GammaPrior(3.0, 6.0), power_prior=GammaPrior(3.0, 6.0), active_dims=[data_fidelity], ) additional_kernels.append(ds_kernel) kernel = ProductKernel(kernel, *additional_kernels) covar_module = ScaleKernel( kernel, batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) super().__init__(train_X=train_X, train_Y=train_Y, covar_module=covar_module) self.to(train_X)