def posterior(self, X: Tensor, observation_noise: Union[bool, Tensor] = False, **kwargs: Any) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q`). Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode with gpt_posterior_settings(): mvn = self(X) if observation_noise is not False: if torch.is_tensor(observation_noise): # TODO: Make sure observation noise is transformed correctly self._validate_tensor_args(X=X, Y=observation_noise) if observation_noise.size(-1) == 1: observation_noise = observation_noise.squeeze(-1) mvn = self.likelihood(mvn, X, noise=observation_noise) else: mvn = self.likelihood(mvn, X) posterior = GPyTorchPosterior(mvn=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def test_gpt_posterior_settings(self): for propagate_grads in (False, True): with settings.propagate_grads(propagate_grads): with gpt_posterior_settings(): self.assertTrue(gpt_settings.debug.off()) self.assertTrue(gpt_settings.fast_pred_var.on()) if settings.propagate_grads.off(): self.assertTrue(gpt_settings.detach_test_caches.on()) else: self.assertTrue(gpt_settings.detach_test_caches.off())
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: implement Kronecker + diagonal solves so that this is possible. # if torch.is_tensor(observation_noise): # # TODO: Validate noise shape # # make observation_noise `batch_shape x q x n` # obs_noise = observation_noise.transpose(-1, -2) # mvn = self.likelihood(mvn, X, noise=obs_noise) # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # noise = self.likelihood.noise.mean().expand(X.shape[:-1]) # mvn = self.likelihood(mvn, X, noise=noise) # else: mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2)) if no_pred_variance: pred_variance = mvn.variance else: joint_covar = self._get_joint_covariance([X]) pred_variance = self.make_posterior_variances(joint_covar) full_covar = KroneckerProductLazyTensor( full_covar, *joint_covar.lazy_tensors[1:] ) joint_covar_list = [self.covar_modules[0](X, train_inputs)] batch_shape = joint_covar_list[0].batch_shape for cm, param in zip(self.covar_modules[1:], self.latent_parameters): covar = cm(param) if covar.batch_shape != batch_shape: covar = BatchRepeatLazyTensor(covar, batch_shape) joint_covar_list.append(covar) test_train_covar = KroneckerProductLazyTensor(*joint_covar_list) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=self.prediction_strategy.lik_train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=full_covar, output_shape=Size( ( *X.shape[:-1], *self.target_shape, ) ), num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise from the respective likelihoods. If a Tensor, specifies the observation noise levels to add. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise` is specified. """ if output_indices is None: output_indices = self._output_tasks num_outputs = len(output_indices) if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") cls_name = self.__class__.__name__ if hasattr(self, "outcome_transform"): raise NotImplementedError( f"Outcome transforms currently not supported by {cls_name}") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode with gpt_posterior_settings(): mvn = self(X_full) if observation_noise is not False: raise NotImplementedError( f"Specifying observation noise is not yet supported by {cls_name}" ) # If single-output, return the posterior of a single-output model if num_outputs == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-2], num_outputs, -1).transpose(-1, -2), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the respective likelihoods to the posterior. If a Tensor of shape `(batch_shape) x q x m`, use it directly as the observation noise (with `observation_noise[...,i]` added to the posterior of the `i`-th model). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise` is specified. """ self.eval() # make sure model is in eval mode mvn_gen: Iterator with gpt_posterior_settings(): # only compute what's necessary if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise is not False: if torch.is_tensor(observation_noise): lh_kwargs = [{ "noise": observation_noise[..., i] } for i, lh in enumerate(self.likelihood.likelihoods)] else: lh_kwargs = [ { "noise": lh.noise.mean().expand(X.shape[:-1]) } if isinstance( lh, FixedNoiseGaussianLikelihood) else {} for lh in self.likelihood.likelihoods ] mvns = [ self.likelihood_i(i, mvn, X, **lkws) for i, mvn, lkws in zip( output_indices, mvns, lh_kwargs) ] mvn_gen = zip(output_indices, mvns) else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise is not False: if torch.is_tensor(observation_noise): mvns = self.likelihood(*[(mvn, X) for mvn in mvns], noise=observation_noise) else: mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) mvn_gen = enumerate(mvns) # apply output transforms of individual models if present mvns = [] for i, mvn in mvn_gen: try: oct = self.models[i].outcome_transform tf_mvn = oct.untransform_posterior(GPyTorchPosterior(mvn)).mvn except AttributeError: tf_mvn = mvn mvns.append(tf_mvn) # return result as a GPyTorchPosteriors if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q x m`). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode with gpt_posterior_settings(): # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape) mvn = self(X) if observation_noise is not False: if torch.is_tensor(observation_noise): # TODO: Validate noise shape # make observation_noise `batch_shape x q x n` obs_noise = observation_noise.transpose(-1, -2) mvn = self.likelihood(mvn, X, noise=obs_noise) elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). noise = self.likelihood.noise.mean().expand(X.shape[:-1]) mvn = self.likelihood(mvn, X, noise=noise) else: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns) posterior = GPyTorchPosterior(mvn=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: ensure that this still works for structured noise solves. mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] # we now compute the data covariances for the training data, the testing # data, the joint covariances, and the test train cross-covariance train_train_covar = self.prediction_strategy.lik_train_train_covar.detach() base_train_train_covar = train_train_covar.lazy_tensor data_train_covar = base_train_train_covar.lazy_tensors[0] data_covar = self.covar_modules[0] data_train_test_covar = data_covar(X, train_inputs) data_test_test_covar = data_covar(X) data_joint_covar = data_train_covar.cat_rows( cross_mat=data_train_test_covar, new_mat=data_test_test_covar, ) # we detach the latents so that they don't cause gradient errors # TODO: Can we enable backprop through the latent covariances? batch_shape = data_train_test_covar.batch_shape latent_covar_list = [] for latent_covar in base_train_train_covar.lazy_tensors[1:]: if latent_covar.batch_shape != batch_shape: latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape) latent_covar_list.append(latent_covar.detach()) joint_covar = KroneckerProductLazyTensor( data_joint_covar, *latent_covar_list ) test_train_covar = KroneckerProductLazyTensor( data_train_test_covar, *latent_covar_list ) # compute the posterior variance if necessary if no_pred_variance: pred_variance = mvn.variance else: pred_variance = self.make_posterior_variances(joint_covar) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling # cloning the full covar allows backpropagation through it posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=joint_covar.clone(), output_shape=X.shape[:-1] + self.target_shape, num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior