def __call__(self, x, full_cov=False): # Training mode if self.training: if self.train_inputs is None: raise RuntimeError( "train_inputs, train_targets cannot be None in training mode. " "Call .eval() for prior predictions, or call .set_train_data() to add training data." ) if settings.debug.on(): if not torch.equal(self.X, x): raise RuntimeError("You must train on the training inputs!") return self.forward(x) # Prior mode elif settings.prior_mode.on() or self.train_inputs is None or self.train_targets is None: full_output = self.forward(x) if settings.debug().on(): if not isinstance(full_output, gpytorch.distributions.MultivariateNormal): raise RuntimeError("ExactGP.forward must return a MultivariateNormal") return full_output # Posterior mode else: cov_data_query = self.covar_module(self.X, x).evaluate() prior_pred = self.forward(x) pred_mean = prior_pred.mean.view(-1, 1) + cov_data_query.t() @ self.y_weights cov_weights = torch.cholesky_solve(cov_data_query, self.chol_cov_data) if full_cov: pred_cov = prior_pred.covariance_matrix - cov_data_query.t() @ cov_weights else: # Evaluates only diagonal (variances) as a diagonal lazy matrix diag_k = gpytorch.lazy.DiagLazyTensor(prior_pred.lazy_covariance_matrix.diag()) pred_cov = diag_k.add_diag(-cov_data_query.t().matmul(cov_weights).diag()) return gpytorch.distributions.MultivariateNormal(pred_mean.view_as(prior_pred.mean), pred_cov)
def posterior(self, X: Tensor, observation_noise: bool = False, **kwargs: Any) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) mvn = self(X) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, observation_noise: bool = False, **kwargs: Any ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise=True`. """ if output_indices is None: output_indices = self._output_tasks if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X_full) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X_full) # If single-output, return the posterior of a single-output model if len(output_indices) == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-1], len(output_indices)), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def gpt_posterior_settings(): r"""Context manager for settings used for computing model posteriors.""" with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches(settings.propagate_grads.off())) yield
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape) mvn = self(X) if observation_noise: if isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). noise = self.likelihood.noise.mean().expand(X.shape[:-1]) mvn = self.likelihood(mvn, X, noise=noise) else: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: lh_kwargs = [{ "noise": lh.noise.mean().expand(X.shape[:-1]) } if isinstance(lh, FixedNoiseGaussianLikelihood) else {} for lh in self.likelihood.likelihoods] mvns = [ self.likelihood_i(i, mvn, X, **lkws) for i, mvn, lkws in zip( output_indices, mvns, lh_kwargs) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. propagate_grads: If True, do not detach GPyTorch's test caches when computing of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `False`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = not kwargs.get("propagate_grads", False) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) if observation_noise: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ detach_test_caches = kwargs.get("detach_test_caches", True) self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: mvns = [ self.likelihood_i(i, mvn, X) for i, mvn in zip(output_indices, mvns) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) )
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) mean_x = mvn.mean covar_x = mvn.covariance_matrix if self._num_outputs > 1: output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def test_dynamics_model_kernel(): chosen_seed = np.random.randint(10000) print(chosen_seed) D = 5 n = 1 m = 2 U = np.random.rand(D, m) X = np.random.rand(D, n) Xtest = np.random.rand(1, n) A = rand_psd_matrix(n) B = rand_psd_matrix(1 + m) UH = np.concatenate((np.ones((D, 1)), U), axis=1) H = block_diag(*UH[:, None, :]) Kexp = kernel_train(H, A, B, X, DataKernel().forward) decoder, MXU = encode_from_XU_numpy(X, U) MXUtorch = torch.from_numpy(MXU).float() HMVKer = HetergeneousMatrixVariateKernel( task_covar_module=MatrixVariateIndexKernel( ConstantIndexKernel(torch.from_numpy(A).float()), ConstantIndexKernel(torch.from_numpy(B).float())), data_covar_module=DataKernel(), decoder=decoder) with gpsettings.debug(True): Kgot = HMVKer(MXUtorch, MXUtorch) Kgot_np = Kgot.evaluate().detach().cpu().numpy() assert Kgot_np == pytest.approx(Kexp) Kexp_test = kernel_test(H, A, B, Xtest, DataKernel().forward) _, MXUtest = encode_from_XU_numpy(Xtest, U, M=0) MXUtest_torch = torch.from_numpy(MXUtest).float() Kgot_test = HMVKer(MXUtest_torch, MXUtest_torch).evaluate().detach().cpu().numpy() assert Kgot_test == pytest.approx(Kexp_test) Kexp_train_test = kernel_train_test(H, A, B, X, Xtest, DataKernel().forward) MXUTrainTest = torch.cat((MXUtorch, MXUtest_torch), dim=0) Kgot_train_test = HMVKer(MXUTrainTest, MXUTrainTest).evaluate().detach().cpu().numpy() assert Kgot_train_test == pytest.approx(Kexp_train_test)
def test_random_restart_optimization(self): for double in (True, False): self._setUp(double=double) with gpt_settings.debug(False): best_f = self.model(self.train_x).mean.max().item() qEI = qExpectedImprovement(self.model, best_f=best_f) bounds = torch.tensor([[0.0], [1.0]]).type_as(self.train_x) batch_ics = torch.rand(2, 1).type_as(self.train_x) batch_candidates, batch_acq_values = gen_candidates_scipy( initial_conditions=batch_ics, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], options={"maxiter": 3}, ) candidates = get_best_candidates(batch_candidates=batch_candidates, batch_values=batch_acq_values) self.assertTrue(-EPS <= candidates <= 1 + EPS)
def test_random_restart_optimization(self, cuda=False): for double in (True, False): self._setUp(double=double, cuda=cuda) with settings.debug(False): best_f = self.model(self.train_x).mean.max().item() qEI = qExpectedImprovement(self.model, best_f=best_f) bounds = torch.tensor([[0.0], [1.0]]).type_as(self.train_x) batch_ics = torch.rand(2, 1).type_as(self.train_x) batch_candidates, batch_acq_values = gen_candidates_scipy( initial_conditions=batch_ics, acquisition_function=qEI, lower_bounds=bounds[0], upper_bounds=bounds[1], options={"maxiter": 3}, ) candidates = get_best_candidates( batch_candidates=batch_candidates, batch_values=batch_acq_values ) self.assertTrue(-EPS <= candidates <= 1 + EPS)
def __call__(self, u: torch.Tensor, f: Callable[[torch.Tensor], torch.Tensor], full_cov: bool = False): """Conditional mean prediction. :param u: control points (N-by-D array) :param f: callable implementing function whose expected value we want to compute. :param full_cov: whether or not to compute the full predictive covariance matrix, according to control kernel :return: a multivariate normal with the predictions """ # Training mode if self.training: if self.train_inputs is None: raise RuntimeError( "train_inputs, train_targets cannot be None in training mode. " "Call .eval() for prior predictions, or call .set_train_data() to add training data." ) if settings.debug.on(): if not torch.equal(self.U, u): raise RuntimeError("You must train on the training inputs!") return self.forward(u) # Prior mode elif settings.prior_mode.on() or self.train_inputs is None or self.train_targets is None: full_output = self.forward(u) if settings.debug().on(): if not isinstance(full_output, gpytorch.distributions.MultivariateNormal): raise RuntimeError("ExactGP.forward must return a MultivariateNormal") return full_output # Posterior mode else: cov_data_query = self.covar_module(self.U, u) prior_pred = self.forward(u) f_weights = torch.cholesky_solve(f(self.X).view(-1, 1), self.chol_cov_data) pred_mean = prior_pred.mean.view(-1, 1) + cov_data_query.t() @ f_weights cov_weights = torch.cholesky_solve(cov_data_query.evaluate(), self.chol_cov_data) if full_cov: pred_cov = prior_pred.covariance_matrix - cov_data_query.t().evaluate() @ cov_weights else: # Evaluates only diagonal (variances) as a diagonal lazy matrix diag_k = gpytorch.lazy.DiagLazyTensor(prior_pred.lazy_covariance_matrix.diag()) pred_cov = diag_k.add_diag(-cov_data_query.t().matmul(cov_weights).diag()) return gpytorch.distributions.MultivariateNormal(pred_mean.view(-1), pred_cov)
def __call__(self, *args, **kwargs): train_inputs = list( self.train_inputs) if self.train_inputs is not None else [] inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in args] # Training mode: optimizing if self.training: if self.train_inputs is None: raise RuntimeError( "train_inputs, train_targets cannot be None in training mode. " "Call .eval() for prior predictions, or call .set_train_data() to add training data." ) res = super(ExactGP, self).__call__(*inputs, **kwargs) return res # Prior mode elif settings.prior_mode.on( ) or self.train_inputs is None or self.train_targets is None: full_inputs = args full_output = super(ExactGP, self).__call__(*full_inputs, **kwargs) if settings.debug().on(): if not isinstance(full_output, MultivariateNormal): raise RuntimeError( "ExactGP.forward must return a MultivariateNormal") return full_output # Posterior mode else: # Get the terms that only depend on training data if self.prediction_strategy is None: train_output = super(ExactGP, self).__call__(*train_inputs, **kwargs) # Create the prediction strategy for self.prediction_strategy = prediction_strategy( train_inputs=train_inputs, train_prior_dist=train_output, train_labels=self.train_targets, likelihood=self.likelihood, ) # Concatenate the input to the training input full_inputs = [] batch_shape = train_inputs[0].shape[:-2] for train_input, input in zip(train_inputs, inputs): # Make sure the batch shapes agree for training/test data if batch_shape != train_input.shape[:-2]: batch_shape = _mul_broadcast_shape(batch_shape, train_input.shape[:-2]) train_input = train_input.expand(*batch_shape, *train_input.shape[-2:]) if batch_shape != input.shape[:-2]: batch_shape = _mul_broadcast_shape(batch_shape, input.shape[:-2]) train_input = train_input.expand(*batch_shape, *train_input.shape[-2:]) input = input.expand(*batch_shape, *input.shape[-2:]) full_inputs.append(torch.cat([train_input, input], dim=-2)) # Get the joint distribution for training/test data full_output = super(ExactGP, self).__call__(*full_inputs, **kwargs) if settings.debug().on(): if not isinstance(full_output, MultivariateNormal): raise RuntimeError( "ExactGP.forward must return a MultivariateNormal") full_mean, full_covar = full_output.loc, full_output.lazy_covariance_matrix # Determine the shape of the joint distribution batch_shape = full_output.batch_shape joint_shape = full_output.event_shape tasks_shape = joint_shape[1:] # For multitask learning test_shape = torch.Size([ joint_shape[0] - self.prediction_strategy.train_shape[0], *tasks_shape ]) # Make the prediction with settings._use_eval_tolerance(): predictive_mean, predictive_covar = self.prediction_strategy.exact_prediction( full_mean, full_covar) # Reshape predictive mean to match the appropriate event shape predictive_mean = predictive_mean.view(*batch_shape, *test_shape).contiguous() return full_output.__class__(predictive_mean, predictive_covar)