def __init__(self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor) -> None: r"""A single-task exact GP model using a heteroskedastic noise model. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x (o)` or `batch_shape x n x (o)` (batch mode) tensor of training observations. train_Yvar: A `batch_shape x n x (o)` or `batch_shape x n x (o)` (batch mode) tensor of observed measurement noise.. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X[:, 0]]) + torch.cos(train_X[:, 1]) >>> se = torch.norm(train_X - 0.5, dim=-1) >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y) >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar) """ self._set_dimensions(train_X=train_X, train_Y=train_Y) train_Y_log_var = torch.log(train_Yvar) noise_likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log), batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan(MIN_INFERRED_NOISE_LEVEL, transform=None), ) noise_model = SingleTaskGP( train_X=train_X, train_Y=train_Y_log_var, likelihood=noise_likelihood ) likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) super().__init__(train_X=train_X, train_Y=train_Y, likelihood=likelihood) self.to(train_X)
def load_mcmc_samples(self, mcmc_samples: Dict[str, Tensor]) -> None: r"""Load the MCMC hyperparameter samples into the model. This method will be called by `fit_fully_bayesian_model_nuts` when the model has been fitted in order to create a batched SingleTaskGP model. """ tkwargs = {"device": self.train_X.device, "dtype": self.train_X.dtype} num_mcmc_samples = len(mcmc_samples["mean"]) batch_shape = torch.Size([num_mcmc_samples]) self.train_X = self.train_X.unsqueeze(0).expand( num_mcmc_samples, self.train_X.shape[0], -1 ) self.mean_module = ConstantMean(batch_shape=batch_shape).to(**tkwargs) self.covar_module = ScaleKernel( base_kernel=MaternKernel( ard_num_dims=self.train_X.shape[-1], batch_shape=batch_shape, ), batch_shape=batch_shape, ).to(**tkwargs) if self.train_Yvar is not None: self.likelihood = FixedNoiseGaussianLikelihood( noise=self.train_Yvar, batch_shape=batch_shape ).to(**tkwargs) else: self.likelihood = GaussianLikelihood( batch_shape=batch_shape, noise_constraint=GreaterThan(MIN_INFERRED_NOISE_LEVEL), ).to(**tkwargs) self.likelihood.noise_covar.noise = ( mcmc_samples["noise"] .detach() .clone() .view(self.likelihood.noise_covar.noise.shape) .clamp_min(MIN_INFERRED_NOISE_LEVEL) .to(**tkwargs) ) self.covar_module.base_kernel.lengthscale = ( mcmc_samples["lengthscale"] .detach() .clone() .view(self.covar_module.base_kernel.lengthscale.shape) .to(**tkwargs) ) self.covar_module.outputscale = ( mcmc_samples["outputscale"] .detach() .clone() .view(self.covar_module.outputscale.shape) .to(**tkwargs) ) self.mean_module.constant.data = ( mcmc_samples["mean"] .detach() .clone() .view(self.mean_module.constant.shape) .to(**tkwargs) )
def __init__(self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor) -> None: r"""A single-task exact GP model using a heteroskedastic noise model. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x m` or `batch_shape x n x m` (batch mode) tensor of training observations. train_Yvar: A `batch_shape x n x m` or `batch_shape x n x m` (batch mode) tensor of observed measurement noise. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> se = torch.norm(train_X, dim=1, keepdim=True) >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y) >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar) """ validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) self._set_dimensions(train_X=train_X, train_Y=train_Y) noise_likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log), batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0 ), ) noise_model = SingleTaskGP( train_X=train_X, train_Y=train_Yvar.log(), likelihood=noise_likelihood ) likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) super().__init__(train_X=train_X, train_Y=train_Y, likelihood=likelihood) self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, likelihood: Optional[Likelihood] = None, covar_module: Optional[Module] = None, ) -> None: r"""A single-task exact GP model. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x m` or `batch_shape x n x m` (batch mode) tensor of training observations. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. covar_module: The covariance (kernel) matrix. If omitted, use the MaternKernel. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> model = SingleTaskGP(train_X, train_Y) """ validate_input_scaling(train_X=train_X, train_Y=train_Y) self._validate_tensor_args(X=train_X, Y=train_Y) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y) if likelihood is None: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: self._is_custom_likelihood = True ExactGP.__init__(self, train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) if covar_module is None: self.covar_module = ScaleKernel( MaternKernel( nu=2.5, ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) else: self.covar_module = covar_module self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r"""A single-task exact GP model using a heteroskedastic noise model. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling `.posterior` on the model will be on the original scale). Note that the noise model internally log-transforms the variances, which will happen after this transform is applied. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> se = torch.norm(train_X, dim=1, keepdim=True) >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y) >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar) """ if outcome_transform is not None: train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) self._set_dimensions(train_X=train_X, train_Y=train_Y) noise_likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log), batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan(MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0), ) noise_model = SingleTaskGP( train_X=train_X, train_Y=train_Yvar, likelihood=noise_likelihood, outcome_transform=Log(), ) likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) super().__init__(train_X=train_X, train_Y=train_Y, likelihood=likelihood) self.register_added_loss_term("noise_added_loss") self.update_added_loss_term("noise_added_loss", NoiseModelAddedLossTerm(noise_model)) if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def _train_gp_models(self, x, y2): X = torch.tensor(x) y2 = torch.tensor(y2) ll1 = GaussianLikelihood() ll2 = GaussianLikelihood(noise_constraint=Positive()) Xgrid = convert_to_xgrid_torch(X, self.transform).double() y1_pred, y1_latent = self.aux_model(Xgrid, return_latent=True) train_y1 = y1_latent if self.use_latent else y1_pred train_y1 = (train_y1.data[..., self.slice] - self.y1_lower) / (self.y1_upper - self.y1_lower) warm_gp = GPWarm(train_y1, y2, ll1) train(train_y1, y2, warm_gp, self.train_cf1) transform_input_fn = tensor_x_to_tensor_grid(self.transform) cold_gp = GPCold(X, y2, ll2, transform_input_fn=transform_input_fn) train(X, y2, cold_gp, self.train_cf2) return warm_gp, cold_gp
def __init__(self, train_X: Tensor, train_Y: Tensor, likelihood: Optional[Likelihood] = None) -> None: r"""A single-task exact GP model. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x (o)` or `batch_shape x n x (o)` (batch mode) tensor of training observations. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X[:, 0]) + torch.cos(train_X[:, 1]) >>> model = SingleTaskGP(train_X, train_Y) """ ard_num_dims = train_X.shape[-1] train_X, train_Y, _ = self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = multioutput_to_batch_mode_transform( train_X=train_X, train_Y=train_Y, num_outputs=self._num_outputs) if likelihood is None: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: self._likelihood_state_dict = deepcopy(likelihood.state_dict()) ExactGP.__init__(self, train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) self.covar_module = ScaleKernel( MaternKernel( nu=2.5, ard_num_dims=ard_num_dims, batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) self.to(train_X)
def initialize_model(self, train_X, train_Y, state_dict=None): """Initialise model for BO.""" # From: https://github.com/pytorch/botorch/issues/179 noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate MIN_INFERRED_NOISE_LEVEL = 1e-3 likelihood = GaussianLikelihood( noise_prior=noise_prior, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) # train_x = self.scale_to_0_1_bounds(train_X) train_Y = standardize(train_Y) gp = SingleTaskGP(train_X, train_Y, likelihood=likelihood) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) # load state dict if it is passed if state_dict is not None: gp.load_state_dict(state_dict) return mll, gp
def __init__( self, train_X: Tensor, train_Y: Tensor, task_feature: int, covar_module: Optional[Module] = None, task_covar_prior: Optional[Prior] = None, output_tasks: Optional[List[int]] = None, rank: Optional[int] = None, input_transform: Optional[InputTransform] = None, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r"""Multi-Task GP model using an ICM kernel, inferring observation noise. Args: train_X: A `n x (d + 1)` or `b x n x (d + 1)` (batch mode) tensor of training data. One of the columns should contain the task features (see `task_feature` argument). train_Y: A `n x 1` or `b x n x 1` (batch mode) tensor of training observations. task_feature: The index of the task feature (`-d <= task_feature <= d`). output_tasks: A list of task indices for which to compute model outputs for. If omitted, return outputs for all task indices. rank: The rank to be used for the index kernel. If omitted, use a full rank (i.e. number of tasks) kernel. task_covar_prior : A Prior on the task covariance matrix. Must operate on p.s.d. matrices. A common prior for this is the `LKJ` prior. input_transform: An input transform that is applied in the model's forward pass. Example: >>> X1, X2 = torch.rand(10, 2), torch.rand(20, 2) >>> i1, i2 = torch.zeros(10, 1), torch.ones(20, 1) >>> train_X = torch.cat([ >>> torch.cat([X1, i1], -1), torch.cat([X2, i2], -1), >>> ]) >>> train_Y = torch.cat(f1(X1), f2(X2)).unsqueeze(-1) >>> model = MultiTaskGP(train_X, train_Y, task_feature=-1) """ with torch.no_grad(): transformed_X = self.transform_inputs( X=train_X, input_transform=input_transform) self._validate_tensor_args(X=transformed_X, Y=train_Y) all_tasks, task_feature, d = self.get_all_tasks( transformed_X, task_feature, output_tasks) if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) # squeeze output dim train_Y = train_Y.squeeze(-1) if output_tasks is None: output_tasks = all_tasks else: if set(output_tasks) - set(all_tasks): raise RuntimeError( "All output tasks must be present in input data.") self._output_tasks = output_tasks self._num_outputs = len(output_tasks) # TODO (T41270962): Support task-specific noise levels in likelihood likelihood = GaussianLikelihood(noise_prior=GammaPrior(1.1, 0.05)) # construct indexer to be used in forward self._task_feature = task_feature self._base_idxr = torch.arange(d) self._base_idxr[task_feature:] += 1 # exclude task feature super().__init__(train_inputs=train_X, train_targets=train_Y, likelihood=likelihood) self.mean_module = ConstantMean() if covar_module is None: self.covar_module = ScaleKernel( base_kernel=MaternKernel(nu=2.5, ard_num_dims=d, lengthscale_prior=GammaPrior( 3.0, 6.0)), outputscale_prior=GammaPrior(2.0, 0.15), ) else: self.covar_module = covar_module num_tasks = len(all_tasks) self._rank = rank if rank is not None else num_tasks self.task_covar_module = IndexKernel(num_tasks=num_tasks, rank=self._rank, prior=task_covar_prior) if input_transform is not None: self.input_transform = input_transform if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, cat_dims: List[int], cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None, likelihood: Optional[Likelihood] = None, outcome_transform: Optional[OutcomeTransform] = None, # TODO input_transform: Optional[InputTransform] = None, # TODO ) -> None: r"""A single-task exact GP model supporting categorical parameters. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. cat_dims: A list of indices corresponding to the columns of the input `X` that should be considered categorical features. cont_kernel_factory: A method that accepts `ard_num_dims` and `active_dims` arguments and returns an instatiated GPyTorch `Kernel` object to be used as the ase kernel for the continuous dimensions. If omitted, this model uses a Matern-2.5 kernel as the kernel for the ordinal parameters. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. # outcome_transform: An outcome transform that is applied to the # training data during instantiation and to the posterior during # inference (that is, the `Posterior` obtained by calling # `.posterior` on the model will be on the original scale). # input_transform: An input transform that is applied in the model's # forward pass. Example: >>> train_X = torch.cat( [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1) ) >>> train_Y = ( torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True) + train_X[..., -1:] ) >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1]) """ if outcome_transform is not None: raise UnsupportedError("outcome transforms not yet supported") if input_transform is not None: raise UnsupportedError("input transforms not yet supported") if len(cat_dims) == 0: raise ValueError( "Must specify categorical dimensions for MixedSingleTaskGP" ) input_batch_shape, aug_batch_shape = self.get_batch_dimensions( train_X=train_X, train_Y=train_Y ) if cont_kernel_factory is None: def cont_kernel_factory( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int] ) -> MaternKernel: return MaternKernel( nu=2.5, batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, ) if likelihood is None: # This Gamma prior is quite close to the Horseshoe prior min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6 likelihood = GaussianLikelihood( batch_shape=aug_batch_shape, noise_constraint=GreaterThan( min_noise, transform=None, initial_value=1e-3 ), noise_prior=GammaPrior(0.9, 10.0), ) d = train_X.shape[-1] cat_dims = normalize_indices(indices=cat_dims, d=d) ord_dims = sorted(set(range(d)) - set(cat_dims)) if len(ord_dims) == 0: covar_module = ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), ) ) else: sum_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) + ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) ) prod_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) * CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) covar_module = sum_kernel + prod_kernel super().__init__( train_X=train_X, train_Y=train_Y, likelihood=likelihood, covar_module=covar_module, outcome_transform=outcome_transform, input_transform=input_transform, )
def __init__( self, train_X: Tensor, train_Y: Tensor, likelihood: Optional[Likelihood] = None, covar_module: Optional[Module] = None, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r"""A single-task exact GP model. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. covar_module: The module computing the covariance (Kernel) matrix. If omitted, use a `MaternKernel`. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling `.posterior` on the model will be on the original scale). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> model = SingleTaskGP(train_X, train_Y) """ if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) validate_input_scaling(train_X=train_X, train_Y=train_Y) self._validate_tensor_args(X=train_X, Y=train_Y) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y) if likelihood is None: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: self._is_custom_likelihood = True ExactGP.__init__(self, train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) if covar_module is None: self.covar_module = ScaleKernel( MaternKernel( nu=2.5, ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) self._subset_batch_dict = { "likelihood.noise_covar.raw_noise": -2, "mean_module.constant": -2, "covar_module.raw_outputscale": -1, "covar_module.base_kernel.raw_lengthscale": -3, } else: self.covar_module = covar_module # TODO: Allow subsetting of other covar modules if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, task_feature: int, output_tasks: Optional[List[int]] = None, rank: Optional[int] = None, ) -> None: r"""Multi-Task GP model using an ICM kernel, inferring observation noise. Args: train_X: A `n x (d + 1)` or `b x n x (d + 1)` (batch mode) tensor of training data. One of the columns should contain the task features (see `task_feature` argument). train_Y: A `n` or `b x n` (batch mode) tensor of training observations. task_feature: The index of the task feature (`-d <= task_feature <= d`). output_tasks: A list of task indices for which to compute model outputs for. If omitted, return outputs for all task indices. rank: The rank to be used for the index kernel. If omitted, use a full rank (i.e. number of tasks) kernel. Example: >>> X1, X2 = torch.rand(10, 2), torch.rand(20, 2) >>> i1, i2 = torch.zeros(10, 1), torch.ones(20, 1) >>> train_X = torch.stack([ >>> torch.cat([X1, i1], -1), torch.cat([X2, i2], -1), >>> ]) >>> train_Y = torch.cat(f1(X1), f2(X2)) >>> model = MultiTaskGP(train_X, train_Y, task_feature=-1) """ if train_X.ndimension() != 2: # Currently, batch mode MTGPs are blocked upstream in GPyTorch raise ValueError(f"Unsupported shape {train_X.shape} for train_X.") d = train_X.shape[-1] - 1 if not (-d <= task_feature <= d): raise ValueError(f"Must have that -{d} <= task_feature <= {d}") all_tasks = train_X[:, task_feature].unique().to( dtype=torch.long).tolist() if output_tasks is None: output_tasks = all_tasks else: if any(t not in all_tasks for t in output_tasks): raise RuntimeError( "All output tasks must be present in input data.") self._output_tasks = output_tasks # TODO (T41270962): Support task-specific noise levels in likelihood likelihood = GaussianLikelihood(noise_prior=GammaPrior(1.1, 0.05)) # construct indexer to be used in forward self._task_feature = task_feature self._base_idxr = torch.arange(d) self._base_idxr[task_feature:] += 1 # exclude task feature super().__init__(train_inputs=train_X, train_targets=train_Y, likelihood=likelihood) self.mean_module = ConstantMean() self.covar_module = ScaleKernel( base_kernel=MaternKernel(nu=2.5, ard_num_dims=d, lengthscale_prior=GammaPrior(3.0, 6.0)), outputscale_prior=GammaPrior(2.0, 0.15), ) num_tasks = len(all_tasks) self._rank = rank if rank is not None else num_tasks # TODO: Add LKJ prior for the index kernel self.task_covar_module = IndexKernel(num_tasks=num_tasks, rank=self._rank) self.to(train_X)