def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise from the respective likelihoods. If a Tensor, specifies the observation noise levels to add. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise` is specified. """ if output_indices is None: output_indices = self._output_tasks if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") cls_name = self.__class__.__name__ if hasattr(self, "outcome_transform"): raise NotImplementedError( f"Outcome transforms currently not supported by {cls_name}") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode with gpt_posterior_settings(): mvn = self(X_full) if observation_noise is not False: raise NotImplementedError( f"Specifying observation noise is not yet supported by {cls_name}" ) # If single-output, return the posterior of a single-output model if len(output_indices) == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-1], len(output_indices)), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def test_transformed_posterior(self): for dtype in (torch.float, torch.double): for m in (1, 2): shape = torch.Size([3, m]) mean = torch.rand(shape, dtype=dtype, device=self.device) variance = 1 + torch.rand( shape, dtype=dtype, device=self.device) if m == 1: covar = torch.diag_embed(variance.squeeze(-1)) mvn = MultivariateNormal(mean.squeeze(-1), lazify(covar)) else: covar = torch.diag_embed( variance.view(*variance.shape[:-2], -1)) mvn = MultitaskMultivariateNormal(mean, lazify(covar)) p_base = GPyTorchPosterior(mvn=mvn) p_tf = TransformedPosterior( # dummy transforms posterior=p_base, sample_transform=lambda s: s + 2, mean_transform=lambda m, v: 2 * m + v, variance_transform=lambda m, v: m + 2 * v, ) # mean, variance self.assertEqual(p_tf.device.type, self.device.type) self.assertTrue(p_tf.dtype == dtype) self.assertEqual(p_tf.event_shape, shape) self.assertEqual(p_tf.base_sample_shape, shape) self.assertTrue(torch.equal(p_tf.mean, 2 * mean + variance)) self.assertTrue(torch.equal(p_tf.variance, mean + 2 * variance)) # rsample samples = p_tf.rsample() self.assertEqual(samples.shape, torch.Size([1]) + shape) samples = p_tf.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4]) + shape) samples2 = p_tf.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2]) + shape) # rsample w/ base samples base_samples = torch.randn(4, *shape, device=self.device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): p_tf.rsample(sample_shape=torch.Size([3]), base_samples=base_samples) # make sure sample transform is applied correctly samples_base = p_base.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) samples_tf = p_tf.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) self.assertTrue(torch.equal(samples_tf, samples_base + 2)) # check error handling p_tf_2 = TransformedPosterior(posterior=p_base, sample_transform=lambda s: s + 2) with self.assertRaises(NotImplementedError): p_tf_2.mean with self.assertRaises(NotImplementedError): p_tf_2.variance
def test_degenerate_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor( [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device ) mean = torch.rand(3, dtype=dtype, device=device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) variance_exp = degenerate_covar.diag().unsqueeze(-1) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as w: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) with warnings.catch_warnings(record=True) as w: b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def posterior(self, X: Tensor, observation_noise: Union[bool, Tensor] = False, **kwargs: Any) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q`). Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode with gpt_posterior_settings(): mvn = self(X) if observation_noise is not False: if torch.is_tensor(observation_noise): # TODO: Make sure observation noise is transformed correctly self._validate_tensor_args(X=X, Y=observation_noise) if observation_noise.size(-1) == 1: observation_noise = observation_noise.squeeze(-1) mvn = self.likelihood(mvn, X, noise=observation_noise) else: mvn = self.likelihood(mvn, X) posterior = GPyTorchPosterior(mvn=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> Posterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `batch_shape x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: As defined in parent Model class, not used for this model. observation_noise: If True, add observation noise to the posterior. Returns: A `Posterior` object, representing joint distributions over `q` points. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode if output_indices is not None: raise RuntimeError( "output_indices is not None. PairwiseGP should not be a" "multi-output model.") post = self(X) if observation_noise: noise_module = self.noise_module(shape=post.mean.shape).evaluate() post = MultivariateNormal(post.mean, post.covariance_matrix + noise_module) return GPyTorchPosterior(post)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> Posterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `batch_shape x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: As defined in parent Model class, not used for this model. observation_noise: Ignored (since noise is not identifiable from scale in probit models). Returns: A `Posterior` object, representing joint distributions over `q` points. """ self.eval() # make sure model is in eval mode if output_indices is not None: raise RuntimeError( "output_indices is not None. PairwiseGP should not be a" "multi-output model.") post = self(X) return GPyTorchPosterior(post)
def posterior(self, X, output_indices=None, observation_noise=False, *args, **kwargs) -> GPyTorchPosterior: self.eval() # make sure model is in eval mode # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) # check for the multi-batch case for multi-outputs b/c this will throw # warnings X_ndim = X.ndim if self.num_outputs > 1 and X_ndim > 2: X = X.unsqueeze(-3).repeat(*[1] * (X_ndim - 2), self.num_outputs, 1, 1) dist = self.model(X) if observation_noise: dist = self.likelihood(dist, *args, **kwargs) posterior = GPyTorchPosterior(mvn=dist) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def _get_test_posterior(batch_shape, q=1, m=1, **tkwargs): mean = torch.rand(*batch_shape, q, m, **tkwargs) a = torch.rand(*batch_shape, q * m, q * m, **tkwargs) covar = a @ a.transpose(-1, -2) diag = torch.diagonal(covar, dim1=-2, dim2=-1) diag += torch.rand(*batch_shape, q * m, **tkwargs) # in-place mvn = MultitaskMultivariateNormal(mean, covar) return GPyTorchPosterior(mvn)
def _get_test_posterior(batch_shape, device, dtype, q=1, o=1): mean = torch.rand(*batch_shape, q, o, device=device, dtype=dtype) a = torch.rand(*batch_shape, q * o, q * o, device=device, dtype=dtype) covar = a @ a.transpose(-1, -2) diag = torch.diagonal(covar, dim1=-2, dim2=-1) diag += torch.rand(*batch_shape, q * o, device=device, dtype=dtype) # in-place mvn = MultitaskMultivariateNormal(mean, covar) return GPyTorchPosterior(mvn)
def test_GPyTorchPosterior_Multitask(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.rand(3, 2, dtype=dtype, device=device) variance = 1 + torch.rand(3, 2, dtype=dtype, device=device) covar = variance.view(-1).diag() mvn = MultitaskMultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) self.assertTrue(torch.equal(posterior.mean, mean)) self.assertTrue(torch.equal(posterior.variance, variance)) # rsample samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, 2, dtype=dtype, device=device) b_variance = 1 + torch.rand(2, 3, 2, dtype=dtype, device=device) b_covar = b_variance.view(2, 6, 1) * torch.eye(6).type_as(b_variance) b_mvn = MultitaskMultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=device, dtype=dtype) b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: assert output_indices is None assert not observation_noise mvn = self(X) return GPyTorchPosterior(mvn=mvn)
def posterior(self, X, output_indices=None, observation_noise=False, *args, **kwargs): self.model.eval() self.likelihood.eval() dist = self.model(X) if observation_noise: dist = self.likelihood(dist, *args, **kwargs) return GPyTorchPosterior(mvn=dist)
def _get_test_posterior(batch_shape: torch.Size, q: int = 1, m: int = 1, interleaved: bool = True, lazy: bool = False, independent: bool = False, **tkwargs) -> GPyTorchPosterior: r"""Generate a Posterior for testing purposes. Args: batch_shape: The batch shape of the data. q: The number of candidates m: The number of outputs. interleaved: A boolean indicating the format of the MultitaskMultivariateNormal lazy: A boolean indicating if the posterior should be lazy indepedent: A boolean indicating whether the outputs are independent tkwargs: `device` and `dtype` tensor constructor kwargs. """ if independent: mvns = [] for _ in range(m): mean = torch.rand(*batch_shape, q, **tkwargs) a = torch.rand(*batch_shape, q, q, **tkwargs) covar = a @ a.transpose(-1, -2) flat_diag = torch.rand(*batch_shape, q, **tkwargs) covar = covar + torch.diag_embed(flat_diag) mvns.append(MultivariateNormal(mean, covar)) mtmvn = MultitaskMultivariateNormal.from_independent_mvns(mvns) else: mean = torch.rand(*batch_shape, q, m, **tkwargs) a = torch.rand(*batch_shape, q * m, q * m, **tkwargs) covar = a @ a.transpose(-1, -2) flat_diag = torch.rand(*batch_shape, q * m, **tkwargs) if lazy: covar = AddedDiagLazyTensor(covar, DiagLazyTensor(flat_diag)) else: covar = covar + torch.diag_embed(flat_diag) mtmvn = MultitaskMultivariateNormal(mean, covar, interleaved=interleaved) return GPyTorchPosterior(mtmvn)
def test_evaluate_q_knowledge_gradient(self): for dtype in (torch.float, torch.double): # basic test n_f = 4 mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) self.assertTrue(torch.allclose(val, mean.mean(), atol=1e-4)) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # batched evaluation b = 2 mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1])) self.assertTrue( torch.allclose(val, mean.mean(dim=0).squeeze(-1), atol=1e-4) ) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # pending points and current value X_pending = torch.rand(2, 1, device=self.device, dtype=dtype) mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) current_value = torch.rand(1, device=self.device, dtype=dtype) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, X_pending=X_pending, current_value=current_value, ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1])) self.assertTrue(torch.allclose(val, mean.mean() - current_value, atol=1e-4)) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # test objective (inner MC sampling) objective = GenericMCObjective(objective=lambda Y, X: Y.norm(dim=-1)) samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(samples=samples)) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, objective=objective ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) self.assertTrue(torch.allclose(val, objective(samples).mean(), atol=1e-4)) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # test non-MC objective (ScalarizedObjective) weights = torch.rand(2, device=self.device, dtype=dtype) objective = ScalarizedObjective(weights=weights) mean = torch.tensor([1.0, 0.5], device=self.device, dtype=dtype).expand( n_f, 1, 2 ) cov = torch.tensor( [[1.0, 0.1], [0.1, 0.5]], device=self.device, dtype=dtype ).expand(n_f, 2, 2) posterior = GPyTorchPosterior(MultitaskMultivariateNormal(mean, cov)) mfm = MockModel(posterior) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 2 mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, objective=objective ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) val_expected = (mean * weights).sum(-1).mean(0) self.assertTrue(torch.allclose(val, val_expected))
def test_construct_base_samples_from_posterior(self): # noqa: C901 for dtype in (torch.float, torch.double): # single-output mean = torch.zeros(2, device=self.device, dtype=dtype) cov = torch.eye(2, device=self.device, dtype=dtype) mvn = MultivariateNormal(mean=mean, covariance_matrix=cov) posterior = GPyTorchPosterior(mvn=mvn) for sample_shape, qmc, seed in itertools.product( (torch.Size([5]), torch.Size([5, 3])), (False, True), (None, 1234) ): expected_shape = sample_shape + torch.Size([2, 1]) samples = construct_base_samples_from_posterior( posterior=posterior, sample_shape=sample_shape, qmc=qmc, seed=seed ) self.assertEqual(samples.shape, expected_shape) self.assertEqual(samples.device.type, self.device.type) self.assertEqual(samples.dtype, dtype) # single-output, batch mode mean = torch.zeros(2, 2, device=self.device, dtype=dtype) cov = torch.eye(2, device=self.device, dtype=dtype).expand(2, 2, 2) mvn = MultivariateNormal(mean=mean, covariance_matrix=cov) posterior = GPyTorchPosterior(mvn=mvn) for sample_shape, qmc, seed, collapse_batch_dims in itertools.product( (torch.Size([5]), torch.Size([5, 3])), (False, True), (None, 1234), (False, True), ): if collapse_batch_dims: expected_shape = sample_shape + torch.Size([1, 2, 1]) else: expected_shape = sample_shape + torch.Size([2, 2, 1]) samples = construct_base_samples_from_posterior( posterior=posterior, sample_shape=sample_shape, qmc=qmc, collapse_batch_dims=collapse_batch_dims, seed=seed, ) self.assertEqual(samples.shape, expected_shape) self.assertEqual(samples.device.type, self.device.type) self.assertEqual(samples.dtype, dtype) # multi-output mean = torch.zeros(2, 2, device=self.device, dtype=dtype) cov = torch.eye(4, device=self.device, dtype=dtype) mtmvn = MultitaskMultivariateNormal(mean=mean, covariance_matrix=cov) posterior = GPyTorchPosterior(mvn=mtmvn) for sample_shape, qmc, seed in itertools.product( (torch.Size([5]), torch.Size([5, 3])), (False, True), (None, 1234) ): expected_shape = sample_shape + torch.Size([2, 2]) samples = construct_base_samples_from_posterior( posterior=posterior, sample_shape=sample_shape, qmc=qmc, seed=seed ) self.assertEqual(samples.shape, expected_shape) self.assertEqual(samples.device.type, self.device.type) self.assertEqual(samples.dtype, dtype) # multi-output, batch mode mean = torch.zeros(2, 2, 2, device=self.device, dtype=dtype) cov = torch.eye(4, device=self.device, dtype=dtype).expand(2, 4, 4) mtmvn = MultitaskMultivariateNormal(mean=mean, covariance_matrix=cov) posterior = GPyTorchPosterior(mvn=mtmvn) for sample_shape, qmc, seed, collapse_batch_dims in itertools.product( (torch.Size([5]), torch.Size([5, 3])), (False, True), (None, 1234), (False, True), ): if collapse_batch_dims: expected_shape = sample_shape + torch.Size([1, 2, 2]) else: expected_shape = sample_shape + torch.Size([2, 2, 2]) samples = construct_base_samples_from_posterior( posterior=posterior, sample_shape=sample_shape, qmc=qmc, collapse_batch_dims=collapse_batch_dims, seed=seed, ) self.assertEqual(samples.shape, expected_shape) self.assertEqual(samples.device.type, self.device.type) self.assertEqual(samples.dtype, dtype)
def test_GPyTorchPosterior(self): for dtype in (torch.float, torch.double): n = 3 mean = torch.rand(n, dtype=dtype, device=self.device) variance = 1 + torch.rand(n, dtype=dtype, device=self.device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([n, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue( torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, n, 1])) for sample_shape in ([4], [4, 2]): samples = posterior.rsample( sample_shape=torch.Size(sample_shape)) self.assertEqual(samples.shape, torch.Size(sample_shape + [n, 1])) # check enabling of approximate root decomposition with ExitStack() as es: mock_func = es.enter_context( mock.patch(ROOT_DECOMP_PATH, return_value=torch.cholesky(covar))) es.enter_context(gpt_settings.max_cholesky_size(0)) es.enter_context( gpt_settings.fast_computations( covar_root_decomposition=True)) # need to clear cache, cannot re-use previous objects mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) posterior.rsample(sample_shape=torch.Size([4])) mock_func.assert_called_once() # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=self.device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample(sample_shape=torch.Size([3]), base_samples=base_samples) # ensure consistent result for sample_shape in ([4], [4, 2]): base_samples = torch.randn(*sample_shape, 3, 1, device=self.device, dtype=dtype) samples = [ posterior.rsample(sample_shape=torch.Size(sample_shape), base_samples=base_samples) for _ in range(2) ] self.assertTrue(torch.allclose(*samples)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=self.device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=self.device) b_covar = torch.diag_embed(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=self.device, dtype=dtype) b_samples = b_posterior.rsample(sample_shape=torch.Size([4]), base_samples=b_base_samples) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def test_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.rand(3, dtype=dtype, device=device) variance = 1 + torch.rand(3, dtype=dtype, device=device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue(torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, 3, 1])) samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample( sample_shape=torch.Size([3]), base_samples=base_samples ) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=device) b_covar = b_variance.unsqueeze(-1) * torch.eye(3).type_as(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=device, dtype=dtype) b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def test_GPyTorchPosterior_Multitask(self): for dtype in (torch.float, torch.double): mean = torch.rand(3, 2, dtype=dtype, device=self.device) variance = 1 + torch.rand(3, 2, dtype=dtype, device=self.device) covar = variance.view(-1).diag() mvn = MultitaskMultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) self.assertTrue(torch.equal(posterior.mean, mean)) self.assertTrue(torch.equal(posterior.variance, variance)) # rsample samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=self.device, dtype=dtype) samples_b1 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) samples_b2 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=self.device, dtype=dtype) samples2_b1 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) samples2_b2 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, 2, dtype=dtype, device=self.device) b_variance = 1 + torch.rand( 2, 3, 2, dtype=dtype, device=self.device) b_covar = torch.diag_embed(b_variance.view(2, 6)) b_mvn = MultitaskMultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=self.device, dtype=dtype) b_samples = b_posterior.rsample(sample_shape=torch.Size([4]), base_samples=b_base_samples) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
def test_degenerate_GPyTorchPosterior_Multitask(self): for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor([[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=self.device) mean = torch.rand(3, dtype=dtype, device=self.device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) mvn = MultitaskMultivariateNormal.from_independent_mvns([mvn, mvn]) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 2])) mean_exp = mean.unsqueeze(-1).repeat(1, 2) self.assertTrue(torch.equal(posterior.mean, mean_exp)) variance_exp = degenerate_covar.diag().unsqueeze(-1).repeat(1, 2) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as w: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(samples.shape, torch.Size([4, 3, 2])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2])) # rsample w/ base samples base_samples = torch.randn(4, 3, 2, device=self.device, dtype=dtype) samples_b1 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) samples_b2 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 2, device=self.device, dtype=dtype) samples2_b1 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) samples2_b2 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=self.device) b_degenerate_covar = degenerate_covar.expand( 2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_mvn = MultitaskMultivariateNormal.from_independent_mvns( [b_mvn, b_mvn]) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 2, device=self.device, dtype=dtype) with warnings.catch_warnings(record=True) as w: b_samples = b_posterior.rsample(sample_shape=torch.Size([4]), base_samples=b_base_samples) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
def test_q_neg_int_post_variance(self): no = "botorch.utils.testing.MockModel.num_outputs" for dtype in (torch.float, torch.double): # basic test mean = torch.zeros(4, 1, device=self.device, dtype=dtype) variance = torch.rand(4, 1, device=self.device, dtype=dtype) mc_points = torch.rand(10, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm): with mock.patch( no, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 # TODO: Make this work with arbitrary models mm = MockModel(None) qNIPV = qNegIntegratedPosteriorVariance( model=mm, mc_points=mc_points) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy val = qNIPV(X) self.assertTrue( torch.allclose(val, -(variance.mean()), atol=1e-4)) # batched model mean = torch.zeros(2, 4, 1, device=self.device, dtype=dtype) variance = torch.rand(2, 4, 1, device=self.device, dtype=dtype) mc_points = torch.rand(2, 10, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm): with mock.patch( no, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 # TODO: Make this work with arbitrary models mm = MockModel(None) qNIPV = qNegIntegratedPosteriorVariance( model=mm, mc_points=mc_points) # TODO: Allow broadcasting for batch evaluation X = torch.empty(2, 1, 1, device=self.device, dtype=dtype) # dummy val = qNIPV(X) val_exp = -variance.mean(dim=-2).squeeze(-1) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) # multi-output model mean = torch.zeros(4, 2, device=self.device, dtype=dtype) variance = torch.rand(4, 2, device=self.device, dtype=dtype) cov = torch.diag_embed(variance.view(-1)) f_posterior = GPyTorchPosterior( MultitaskMultivariateNormal(mean, cov)) mc_points = torch.rand(10, 1, device=self.device, dtype=dtype) mfm = MockModel(f_posterior) with mock.patch.object(MockModel, "fantasize", return_value=mfm): with mock.patch( no, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 2 mm = MockModel(None) # check error if objective is not ScalarizedObjective with self.assertRaises(UnsupportedError): qNegIntegratedPosteriorVariance( model=mm, mc_points=mc_points, objective=IdentityMCObjective(), ) weights = torch.tensor([0.5, 0.5], device=self.device, dtype=dtype) qNIPV = qNegIntegratedPosteriorVariance( model=mm, mc_points=mc_points, objective=ScalarizedObjective(weights=weights), ) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy val = qNIPV(X) self.assertTrue( torch.allclose(val, -0.5 * variance.mean(), atol=1e-4)) # batched multi-output model mean = torch.zeros(4, 3, 1, 2, device=self.device, dtype=dtype) variance = torch.rand(4, 3, 1, 2, device=self.device, dtype=dtype) cov = torch.diag_embed(variance.view(4, 3, -1)) f_posterior = GPyTorchPosterior( MultitaskMultivariateNormal(mean, cov)) mc_points = torch.rand(4, 1, device=self.device, dtype=dtype) mfm = MockModel(f_posterior) with mock.patch.object(MockModel, "fantasize", return_value=mfm): with mock.patch( no, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 2 mm = MockModel(None) weights = torch.tensor([0.5, 0.5], device=self.device, dtype=dtype) qNIPV = qNegIntegratedPosteriorVariance( model=mm, mc_points=mc_points, objective=ScalarizedObjective(weights=weights), ) X = torch.empty(3, 1, 1, device=self.device, dtype=dtype) # dummy val = qNIPV(X) val_exp = -0.5 * variance.mean(dim=0).view(3, -1).mean(dim=-1) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
def test_GPyTorchPosterior(self): for dtype in (torch.float, torch.double): mean = torch.rand(3, dtype=dtype, device=self.device) variance = 1 + torch.rand(3, dtype=dtype, device=self.device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue( torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, 3, 1])) samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=self.device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample(sample_shape=torch.Size([3]), base_samples=base_samples) samples_b1 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) samples_b2 = posterior.rsample(sample_shape=torch.Size([4]), base_samples=base_samples) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=self.device, dtype=dtype) samples2_b1 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) samples2_b2 = posterior.rsample(sample_shape=torch.Size([4, 2]), base_samples=base_samples2) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=self.device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=self.device) b_covar = torch.diag_embed(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=self.device, dtype=dtype) b_samples = b_posterior.rsample(sample_shape=torch.Size([4]), base_samples=b_base_samples) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q x m`). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode with gpt_posterior_settings(): # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape) mvn = self(X) if observation_noise is not False: if torch.is_tensor(observation_noise): # TODO: Validate noise shape # make observation_noise `batch_shape x q x n` obs_noise = observation_noise.transpose(-1, -2) mvn = self.likelihood(mvn, X, noise=obs_noise) elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). noise = self.likelihood.noise.mean().expand(X.shape[:-1]) mvn = self.likelihood(mvn, X, noise=noise) else: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns) posterior = GPyTorchPosterior(mvn=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the respective likelihoods to the posterior. If a Tensor of shape `(batch_shape) x q x m`, use it directly as the observation noise (with `observation_noise[...,i]` added to the posterior of the `i`-th model). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise` is specified. """ self.eval() # make sure model is in eval mode mvn_gen: Iterator with gpt_posterior_settings(): # only compute what's necessary if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise is not False: if torch.is_tensor(observation_noise): lh_kwargs = [{ "noise": observation_noise[..., i] } for i, lh in enumerate(self.likelihood.likelihoods)] else: lh_kwargs = [ { "noise": lh.noise.mean().expand(X.shape[:-1]) } if isinstance( lh, FixedNoiseGaussianLikelihood) else {} for lh in self.likelihood.likelihoods ] mvns = [ self.likelihood_i(i, mvn, X, **lkws) for i, mvn, lkws in zip( output_indices, mvns, lh_kwargs) ] mvn_gen = zip(output_indices, mvns) else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise is not False: if torch.is_tensor(observation_noise): mvns = self.likelihood(*[(mvn, X) for mvn in mvns], noise=observation_noise) else: mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) mvn_gen = enumerate(mvns) # apply output transforms of individual models if present mvns = [] for i, mvn in mvn_gen: try: oct = self.models[i].outcome_transform tf_mvn = oct.untransform_posterior(GPyTorchPosterior(mvn)).mvn except AttributeError: tf_mvn = mvn mvns.append(tf_mvn) # return result as a GPyTorchPosteriors if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns))
def posterior(self, x, **kwargs): mvn = self.forward(x, **kwargs) return GPyTorchPosterior(mvn)