def _get_fixed_prior_model(**tkwargs): train_X, train_Y = _get_random_mt_data(**tkwargs) sd_prior = GammaPrior(2.0, 0.15) sd_prior._event_shape = torch.Size([2]) model = MultiTaskGP(train_X, train_Y, task_feature=1, prior=LKJCovariancePrior(2, 0.6, sd_prior)) return model.to(**tkwargs)
def construct_inputs(cls, training_data: TrainingData, **kwargs) -> Dict[str, Any]: r"""Construct kwargs for the `Model` from `TrainingData` and other options. Args: training_data: `TrainingData` container with data for single outcome or for multiple outcomes for batched multi-output case. **kwargs: Additional options for the model that pertain to the training data, including: - `task_features`: Indices of the input columns containing the task features (expected list of length 1), - `task_covar_prior`: A GPyTorch `Prior` object to use as prior on the cross-task covariance matrix, - `prior_config`: A dict representing a prior config, should only be used if `prior` is not passed directly. Should contain: `use_LKJ_prior` (whether to use LKJ prior) and `eta` (eta value, float), - `rank`: The rank of the cross-task covariance matrix. """ task_features = kwargs.pop("task_features", None) if task_features is None: raise ValueError(f"`task_features` required for {cls.__name__}.") task_feature = task_features[0] inputs = { "train_X": training_data.X, "train_Y": training_data.Y, "task_feature": task_feature, "rank": kwargs.get("rank"), } prior = kwargs.get("task_covar_prior") prior_config = kwargs.get("prior_config") if prior and prior_config: raise ValueError( "Only one of `prior` and `prior_config` arguments expected.") if prior_config: if not prior_config.get("use_LKJ_prior"): raise ValueError( "Currently only config for LKJ prior is supported.") all_tasks, _, _ = MultiTaskGP.get_all_tasks( training_data.X, task_feature) num_tasks = len(all_tasks) sd_prior = GammaPrior(1.0, 0.15) sd_prior._event_shape = torch.Size([num_tasks]) eta = prior_config.get("eta", 0.5) if not isinstance(eta, float) and not isinstance(eta, int): raise ValueError( f"eta must be a real number, your eta was {eta}.") prior = LKJCovariancePrior(num_tasks, eta, sd_prior) inputs["task_covar_prior"] = prior return inputs
def _get_fixed_noise_and_prior_model(**tkwargs): train_X, train_Y = _get_random_mt_data(**tkwargs) train_Yvar = torch.full_like(train_Y, 0.05) sd_prior = GammaPrior(2.0, 0.15) sd_prior._event_shape = torch.Size([2]) model = FixedNoiseMultiTaskGP( train_X, train_Y, train_Yvar, task_feature=1, task_covar_prior=LKJCovariancePrior(2, 0.6, sd_prior), ) return model.to(**tkwargs)
def __init__( self, train_X: Tensor, train_Y: Tensor, likelihood: Optional[MultitaskGaussianLikelihood] = None, data_covar_module: Optional[Module] = None, task_covar_prior: Optional[Prior] = None, rank: Optional[int] = None, input_transform: Optional[InputTransform] = None, outcome_transform: Optional[OutcomeTransform] = None, **kwargs: Any, ) -> None: r"""Multi-task GP with Kronecker structure, using a simple ICM kernel. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. likelihood: A `MultitaskGaussianLikelihood`. If omitted, uses a `MultitaskGaussianLikelihood` with a `GammaPrior(1.1, 0.05)` noise prior. data_covar_module: The module computing the covariance (Kernel) matrix in data space. If omitted, use a `MaternKernel`. task_covar_prior : A Prior on the task covariance matrix. Must operate on p.s.d. matrices. A common prior for this is the `LKJ` prior. If omitted, uses `LKJCovariancePrior` with `eta` parameter as specified in the keyword arguments (if not specified, use `eta=1.5`). rank: The rank of the ICM kernel. If omitted, use a full rank kernel. kwargs: Additional arguments to override default settings of priors, including: - eta: The eta parameter on the default LKJ task_covar_prior. A value of 1.0 is uninformative, values <1.0 favor stronger correlations (in magnitude), correlations vanish as eta -> inf. - sd_prior: A scalar prior over nonnegative numbers, which is used for the default LKJCovariancePrior task_covar_prior. - likelihood_rank: The rank of the task covariance matrix to fit. Defaults to 0 (which corresponds to a diagonal covariance matrix). Example: >>> train_X = torch.rand(10, 2) >>> train_Y = torch.cat([f_1(X), f_2(X)], dim=-1) >>> model = KroneckerMultiTaskGP(train_X, train_Y) """ with torch.no_grad(): transformed_X = self.transform_inputs( X=train_X, input_transform=input_transform) if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) self._validate_tensor_args(X=transformed_X, Y=train_Y) self._num_outputs = train_Y.shape[-1] batch_shape, ard_num_dims = train_X.shape[:-2], train_X.shape[-1] num_tasks = train_Y.shape[-1] if rank is None: rank = num_tasks if likelihood is None: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = MultitaskGaussianLikelihood( num_tasks=num_tasks, batch_shape=batch_shape, noise_prior=noise_prior, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), rank=kwargs.get("likelihood_rank", 0), ) if task_covar_prior is None: task_covar_prior = LKJCovariancePrior( n=num_tasks, eta=torch.tensor(kwargs.get("eta", 1.5)).to(train_X), sd_prior=kwargs.get( "sd_prior", SmoothedBoxPrior(math.exp(-6), math.exp(1.25), 0.05), ), ) super().__init__(train_X, train_Y, likelihood) self.mean_module = MultitaskMean( base_means=ConstantMean(batch_shape=batch_shape), num_tasks=num_tasks) if data_covar_module is None: data_covar_module = MaternKernel( nu=2.5, ard_num_dims=ard_num_dims, lengthscale_prior=GammaPrior(3.0, 6.0), batch_shape=batch_shape, ) else: data_covar_module = data_covar_module self.covar_module = MultitaskKernel( data_covar_module=data_covar_module, num_tasks=num_tasks, rank=rank, batch_shape=batch_shape, task_covar_prior=task_covar_prior, ) if outcome_transform is not None: self.outcome_transform = outcome_transform if input_transform is not None: self.input_transform = input_transform self.to(train_X)
def _get_model( X: Tensor, Y: Tensor, Yvar: Tensor, task_feature: Optional[int] = None, fidelity_features: Optional[List[int]] = None, use_input_warping: bool = False, **kwargs: Any, ) -> GPyTorchModel: """Instantiate a model of type depending on the input data. Args: X: A `n x d` tensor of input features. Y: A `n x m` tensor of input observations. Yvar: A `n x m` tensor of input variances (NaN if unobserved). task_feature: The index of the column pertaining to the task feature (if present). fidelity_features: List of columns of X that are fidelity parameters. Returns: A GPyTorchModel (unfitted). """ Yvar = Yvar.clamp_min(MIN_OBSERVED_NOISE_LEVEL) # pyre-ignore[16] is_nan = torch.isnan(Yvar) any_nan_Yvar = torch.any(is_nan) all_nan_Yvar = torch.all(is_nan) if any_nan_Yvar and not all_nan_Yvar: if task_feature: # TODO (jej): Replace with inferred noise before making perf judgements. Yvar[Yvar != Yvar] = MIN_OBSERVED_NOISE_LEVEL else: raise ValueError( "Mix of known and unknown variances indicates valuation function " "errors. Variances should all be specified, or none should be." ) if use_input_warping: warp_tf = get_warping_transform( d=X.shape[-1], task_feature=task_feature, batch_shape=X.shape[:-2], # pyre-ignore [6] ) else: warp_tf = None if fidelity_features is None: fidelity_features = [] if len(fidelity_features) == 0: # only pass linear_truncated arg if there are fidelities kwargs = {k: v for k, v in kwargs.items() if k != "linear_truncated"} if len(fidelity_features) > 0: if task_feature: raise NotImplementedError( # pragma: no cover "multi-task multi-fidelity models not yet available" ) # at this point we can assume that there is only a single fidelity parameter gp = SingleTaskMultiFidelityGP( train_X=X, train_Y=Y, data_fidelity=fidelity_features[0], input_transform=warp_tf, **kwargs, ) elif task_feature is None and all_nan_Yvar: gp = SingleTaskGP(train_X=X, train_Y=Y, input_transform=warp_tf, **kwargs) elif task_feature is None: gp = FixedNoiseGP( train_X=X, train_Y=Y, train_Yvar=Yvar, input_transform=warp_tf, **kwargs ) else: # instantiate multitask GP all_tasks, _, _ = MultiTaskGP.get_all_tasks(X, task_feature) num_tasks = len(all_tasks) prior_dict = kwargs.get("prior") prior = None if prior_dict is not None: prior_type = prior_dict.get("type", None) if issubclass(prior_type, LKJCovariancePrior): sd_prior = prior_dict.get("sd_prior", GammaPrior(1.0, 0.15)) sd_prior._event_shape = torch.Size([num_tasks]) eta = prior_dict.get("eta", 0.5) if not isinstance(eta, float) and not isinstance(eta, int): raise ValueError(f"eta must be a real number, your eta was {eta}") prior = LKJCovariancePrior(num_tasks, eta, sd_prior) else: raise NotImplementedError( "Currently only LKJ prior is supported," f"your prior type was {prior_type}." ) if all_nan_Yvar: gp = MultiTaskGP( train_X=X, train_Y=Y, task_feature=task_feature, rank=kwargs.get("rank"), task_covar_prior=prior, input_transform=warp_tf, ) else: gp = FixedNoiseMultiTaskGP( train_X=X, train_Y=Y, train_Yvar=Yvar, task_feature=task_feature, rank=kwargs.get("rank"), task_covar_prior=prior, input_transform=warp_tf, ) return gp
def test_KroneckerMultiTaskGP_custom(self): for batch_shape, dtype in itertools.product( (torch.Size(), ), # torch.Size([3])), TODO: Fix and test batch mode (torch.float, torch.double), ): tkwargs = {"device": self.device, "dtype": dtype} # initialization with custom settings likelihood = MultitaskGaussianLikelihood( num_tasks=2, rank=1, batch_shape=batch_shape, ) data_covar_module = MaternKernel( nu=1.5, lengthscale_prior=GammaPrior(2.0, 4.0), ) task_covar_prior = LKJCovariancePrior( n=2, eta=0.5, sd_prior=SmoothedBoxPrior(math.exp(-3), math.exp(2), 0.1), ) model_kwargs = { "likelihood": likelihood, "data_covar_module": data_covar_module, "task_covar_prior": task_covar_prior, "rank": 1, } model, train_X, _ = _get_kronecker_model_and_training_data( model_kwargs=model_kwargs, batch_shape=batch_shape, **tkwargs) self.assertIsInstance(model, KroneckerMultiTaskGP) self.assertEqual(model.num_outputs, 2) self.assertIsInstance(model.likelihood, MultitaskGaussianLikelihood) self.assertEqual(model.likelihood.rank, 1) self.assertIsInstance(model.mean_module, MultitaskMean) self.assertIsInstance(model.covar_module, MultitaskKernel) base_kernel = model.covar_module self.assertIsInstance(base_kernel.data_covar_module, MaternKernel) self.assertIsInstance(base_kernel.task_covar_module, IndexKernel) task_covar_prior = base_kernel.task_covar_module.IndexKernelPrior self.assertIsInstance(task_covar_prior, LKJCovariancePrior) self.assertEqual(task_covar_prior.correlation_prior.eta, 0.5) lengthscale_prior = base_kernel.data_covar_module.lengthscale_prior self.assertIsInstance(lengthscale_prior, GammaPrior) self.assertEqual(lengthscale_prior.concentration, 2.0) self.assertEqual(lengthscale_prior.rate, 4.0) self.assertEqual( base_kernel.task_covar_module.covar_factor.shape[-1], 1) # test model fitting mll = ExactMarginalLogLikelihood(model.likelihood, model) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1) # test posterior test_x = torch.rand(2, 2, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal) self.assertEqual(posterior_f.mean.shape, torch.Size([2, 2])) self.assertEqual(posterior_f.variance.shape, torch.Size([2, 2])) # test observation noise posterior_noisy = model.posterior(test_x, observation_noise=True) self.assertTrue( torch.allclose(posterior_noisy.variance, model.likelihood(posterior_f.mvn).variance)) # test posterior (batch eval) test_x = torch.rand(3, 2, 2, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal) self.assertEqual(posterior_f.mean.shape, torch.Size([3, 2, 2])) self.assertEqual(posterior_f.variance.shape, torch.Size([3, 2, 2]))