Example #1
0
    def __call__(self, inputs, are_samples=False, **kwargs):
        """
        Forward data through this hidden GP layer. The output is a MultitaskMultivariateNormal distribution
        (or MultivariateNormal distribution is output_dims=None).

        If the input is >=2 dimensional Tensor (e.g. `n x d`), we pass the input through each hidden GP,
        resulting in a `n x h` multitask Gaussian distribution (where all of the `h` tasks represent an
        output dimension and are independent from one another).  We then draw `s` samples from these Gaussians,
        resulting in a `s x n x h` MultitaskMultivariateNormal distribution.

        If the input is a >=3 dimensional Tensor, and the `are_samples=True` kwarg is set, then we assume that
        the outermost batch dimension is a samples dimension. The output will have the same number of samples.
        For example, a `s x b x n x d` input will result in a `s x b x n x h` MultitaskMultivariateNormal distribution.

        The goal of these last two points is that if you have a tensor `x` that is `n x d`, then:
            >>> hidden_gp2(hidden_gp(x))

        will just work, and return a tensor of size `s x n x h2`, where `h2` is the output dimensionality of
        hidden_gp2. In this way, hidden GP layers are easily composable.
        """
        deterministic_inputs = not are_samples
        if isinstance(inputs, MultitaskMultivariateNormal):
            inputs = torch.distributions.Normal(
                loc=inputs.mean, scale=inputs.variance.sqrt()).rsample()
            deterministic_inputs = False

        if settings.debug.on():
            if not torch.is_tensor(inputs):
                raise ValueError(
                    "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got "
                    f"{inputs.__class__.__Name__}")

            if inputs.size(-1) != self.input_dims:
                raise RuntimeError(
                    f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}],"
                    f" expected [{self.input_dims}]")

        # Repeat the input for all possible outputs
        if self.output_dims is not None:
            inputs = inputs.unsqueeze(-3)
            inputs = inputs.expand(*inputs.shape[:-3], self.output_dims,
                                   *inputs.shape[-2:])

        # Now run samples through the GP
        output = ApproximateGP.__call__(self, inputs)
        if self.output_dims is not None:
            mean = output.loc.transpose(-1, -2)
            covar = BlockDiagLazyTensor(output.lazy_covariance_matrix,
                                        block_dim=-3)
            output = MultitaskMultivariateNormal(mean,
                                                 covar,
                                                 interleaved=False)

        # Maybe expand inputs?
        if deterministic_inputs:
            output = output.expand(
                torch.Size([settings.num_likelihood_samples.value()]) +
                output.batch_shape)

        return output
    def test_from_independent_mvns(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # Test non-batch mode mvns
            n_tasks = 2
            n = 4
            mvns = [
                MultivariateNormal(
                    mean=torch.randn(4, device=device, dtype=dtype),
                    covariance_matrix=DiagLazyTensor(
                        torch.randn(n, device=device, dtype=dtype).abs_()),
                ) for i in range(n_tasks)
            ]
            mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
            expected_mean_shape = [n, n_tasks]
            expected_covar_shape = [n * n_tasks] * 2
            self.assertEqual(list(mvn.mean.shape), expected_mean_shape)
            self.assertEqual(list(mvn.covariance_matrix.shape),
                             expected_covar_shape)

            # Test batch mode mvns
            b = 3
            mvns = [
                MultivariateNormal(
                    mean=torch.randn(b, n, device=device, dtype=dtype),
                    covariance_matrix=DiagLazyTensor(
                        torch.randn(b, n, device=device, dtype=dtype).abs_()),
                ) for i in range(n_tasks)
            ]
            mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
            self.assertEqual(list(mvn.mean.shape), [b] + expected_mean_shape)
            self.assertEqual(list(mvn.covariance_matrix.shape),
                             [b] + expected_covar_shape)
Example #3
0
 def test_degenerate_GPyTorchPosterior_Multitask(self):
     for dtype in (torch.float, torch.double):
         # singular covariance matrix
         degenerate_covar = torch.tensor(
             [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=self.device
         )
         mean = torch.rand(3, dtype=dtype, device=self.device)
         mvn = MultivariateNormal(mean, lazify(degenerate_covar))
         mvn = MultitaskMultivariateNormal.from_independent_mvns([mvn, mvn])
         posterior = GPyTorchPosterior(mvn=mvn)
         # basics
         self.assertEqual(posterior.device.type, self.device.type)
         self.assertTrue(posterior.dtype == dtype)
         self.assertEqual(posterior.event_shape, torch.Size([3, 2]))
         mean_exp = mean.unsqueeze(-1).repeat(1, 2)
         self.assertTrue(torch.equal(posterior.mean, mean_exp))
         variance_exp = degenerate_covar.diag().unsqueeze(-1).repeat(1, 2)
         self.assertTrue(torch.equal(posterior.variance, variance_exp))
         # rsample
         with warnings.catch_warnings(record=True) as ws:
             # we check that the p.d. warning is emitted - this only
             # happens once per posterior, so we need to check only once
             samples = posterior.rsample(sample_shape=torch.Size([4]))
             self.assertTrue(any(issubclass(w.category, RuntimeWarning) for w in ws))
             self.assertTrue(any("not p.d" in str(w.message) for w in ws))
         self.assertEqual(samples.shape, torch.Size([4, 3, 2]))
         samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
         self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2]))
         # rsample w/ base samples
         base_samples = torch.randn(4, 3, 2, device=self.device, dtype=dtype)
         samples_b1 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         samples_b2 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         self.assertTrue(torch.allclose(samples_b1, samples_b2))
         base_samples2 = torch.randn(4, 2, 3, 2, device=self.device, dtype=dtype)
         samples2_b1 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         samples2_b2 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
         # collapse_batch_dims
         b_mean = torch.rand(2, 3, dtype=dtype, device=self.device)
         b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape)
         b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar))
         b_mvn = MultitaskMultivariateNormal.from_independent_mvns([b_mvn, b_mvn])
         b_posterior = GPyTorchPosterior(mvn=b_mvn)
         b_base_samples = torch.randn(4, 1, 3, 2, device=self.device, dtype=dtype)
         with warnings.catch_warnings(record=True) as ws:
             b_samples = b_posterior.rsample(
                 sample_shape=torch.Size([4]), base_samples=b_base_samples
             )
             self.assertTrue(any(issubclass(w.category, RuntimeWarning) for w in ws))
             self.assertTrue(any("not p.d" in str(w.message) for w in ws))
         self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
Example #4
0
 def test_GPyTorchPosterior_Multitask(self):
     for dtype in (torch.float, torch.double):
         mean = torch.rand(3, 2, dtype=dtype, device=self.device)
         variance = 1 + torch.rand(3, 2, dtype=dtype, device=self.device)
         covar = variance.view(-1).diag()
         mvn = MultitaskMultivariateNormal(mean, lazify(covar))
         posterior = GPyTorchPosterior(mvn=mvn)
         # basics
         self.assertEqual(posterior.device.type, self.device.type)
         self.assertTrue(posterior.dtype == dtype)
         self.assertEqual(posterior.event_shape, torch.Size([3, 2]))
         self.assertTrue(torch.equal(posterior.mean, mean))
         self.assertTrue(torch.equal(posterior.variance, variance))
         # rsample
         samples = posterior.rsample(sample_shape=torch.Size([4]))
         self.assertEqual(samples.shape, torch.Size([4, 3, 2]))
         samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
         self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2]))
         # rsample w/ base samples
         base_samples = torch.randn(4,
                                    3,
                                    2,
                                    device=self.device,
                                    dtype=dtype)
         samples_b1 = posterior.rsample(sample_shape=torch.Size([4]),
                                        base_samples=base_samples)
         samples_b2 = posterior.rsample(sample_shape=torch.Size([4]),
                                        base_samples=base_samples)
         self.assertTrue(torch.allclose(samples_b1, samples_b2))
         base_samples2 = torch.randn(4,
                                     2,
                                     3,
                                     2,
                                     device=self.device,
                                     dtype=dtype)
         samples2_b1 = posterior.rsample(sample_shape=torch.Size([4, 2]),
                                         base_samples=base_samples2)
         samples2_b2 = posterior.rsample(sample_shape=torch.Size([4, 2]),
                                         base_samples=base_samples2)
         self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
         # collapse_batch_dims
         b_mean = torch.rand(2, 3, 2, dtype=dtype, device=self.device)
         b_variance = 1 + torch.rand(
             2, 3, 2, dtype=dtype, device=self.device)
         b_covar = b_variance.view(2, 6,
                                   1) * torch.eye(6).type_as(b_variance)
         b_mvn = MultitaskMultivariateNormal(b_mean, lazify(b_covar))
         b_posterior = GPyTorchPosterior(mvn=b_mvn)
         b_base_samples = torch.randn(4,
                                      1,
                                      3,
                                      2,
                                      device=self.device,
                                      dtype=dtype)
         b_samples = b_posterior.rsample(sample_shape=torch.Size([4]),
                                         base_samples=b_base_samples)
         self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))
 def test_multitask_multivariate_normal_batch(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     mean = torch.tensor([[0, 1], [2, 3]], dtype=torch.float, device=device).repeat(2, 1, 1)
     variance = 1 + torch.arange(4, dtype=torch.float, device=device)
     covmat = torch.diag(variance).repeat(2, 1, 1)
     mtmvn = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covmat)
     self.assertTrue(torch.equal(mtmvn.mean, mean))
     self.assertTrue(approx_equal(mtmvn.variance, variance.repeat(2, 1).view(2, 2, 2)))
     self.assertTrue(torch.equal(mtmvn.scale_tril, covmat.sqrt()))
     mvn_plus1 = mtmvn + 1
     self.assertTrue(torch.equal(mvn_plus1.mean, mtmvn.mean + 1))
     self.assertTrue(torch.equal(mvn_plus1.covariance_matrix, mtmvn.covariance_matrix))
     mvn_times2 = mtmvn * 2
     self.assertTrue(torch.equal(mvn_times2.mean, mtmvn.mean * 2))
     self.assertTrue(torch.equal(mvn_times2.covariance_matrix, mtmvn.covariance_matrix * 4))
     mvn_divby2 = mtmvn / 2
     self.assertTrue(torch.equal(mvn_divby2.mean, mtmvn.mean / 2))
     self.assertTrue(torch.equal(mvn_divby2.covariance_matrix, mtmvn.covariance_matrix / 4))
     self.assertTrue(approx_equal(mtmvn.entropy(), 7.2648 * torch.ones(2, device=device)))
     logprob = mtmvn.log_prob(torch.zeros(2, 2, 2, device=device))
     logprob_expected = -7.3064 * torch.ones(2, device=device)
     self.assertTrue(approx_equal(logprob, logprob_expected))
     logprob = mtmvn.log_prob(torch.zeros(3, 2, 2, 2, device=device))
     logprob_expected = -7.3064 * torch.ones(3, 2, device=device)
     self.assertTrue(approx_equal(logprob, logprob_expected))
     conf_lower, conf_upper = mtmvn.confidence_region()
     self.assertTrue(approx_equal(conf_lower, mtmvn.mean - 2 * mtmvn.stddev))
     self.assertTrue(approx_equal(conf_upper, mtmvn.mean + 2 * mtmvn.stddev))
     self.assertTrue(mtmvn.sample().shape == torch.Size([2, 2, 2]))
     self.assertTrue(mtmvn.sample(torch.Size([3])).shape == torch.Size([3, 2, 2, 2]))
     self.assertTrue(mtmvn.sample(torch.Size([3, 4])).shape == torch.Size([3, 4, 2, 2, 2]))
Example #6
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the
                dimension of the feature space (not including task indices) and
                `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices`. Includes measurement noise if
            `observation_noise=True`.
        """
        if output_indices is None:
            output_indices = self._output_tasks
        if any(i not in self._output_tasks for i in output_indices):
            raise ValueError("Too many output indices")

        # construct evaluation X
        X_full = _make_X_full(X=X,
                              output_indices=output_indices,
                              tf=self._task_feature)

        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            mvn = self(X_full)
            if observation_noise:
                # TODO: Allow passing in observation noise via kwarg
                mvn = self.likelihood(mvn, X_full)
        # If single-output, return the posterior of a single-output model
        if len(output_indices) == 1:
            return GPyTorchPosterior(mvn=mvn)
        # Otherwise, make a MultitaskMultivariateNormal out of this
        mtmvn = MultitaskMultivariateNormal(
            mean=mvn.mean.view(*X.shape[:-1], len(output_indices)),
            covariance_matrix=mvn.lazy_covariance_matrix,
            interleaved=False,
        )
        return GPyTorchPosterior(mvn=mtmvn)
 def _create_marginal_input(self, batch_shape=torch.Size([])):
     mat = torch.randn(*batch_shape, 5, 5)
     mat2 = torch.randn(*batch_shape, 4, 4)
     covar = KroneckerProductLazyTensor(RootLazyTensor(mat),
                                        RootLazyTensor(mat2))
     return MultitaskMultivariateNormal(torch.randn(*batch_shape, 5, 4),
                                        covar)
Example #8
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension
                of the feature space and `q` is the number of points considered
                jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add the observation noise from the
                likelihood to the posterior. If a Tensor, use it directly as the
                observation noise (must be of shape `(batch_shape) x q x m`).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if specified.
        """
        self.eval()  # make sure model is in eval mode
        with gpt_posterior_settings():
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape)
            mvn = self(X)
            if observation_noise is not False:
                if torch.is_tensor(observation_noise):
                    # TODO: Validate noise shape
                    # make observation_noise `batch_shape x q x n`
                    obs_noise = observation_noise.transpose(-1, -2)
                    mvn = self.likelihood(mvn, X, noise=obs_noise)
                elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                    # Use the mean of the previous noise values (TODO: be smarter here).
                    noise = self.likelihood.noise.mean().expand(X.shape[:-1])
                    mvn = self.likelihood(mvn, X, noise=noise)
                else:
                    mvn = self.likelihood(mvn, X)
            if self._num_outputs > 1:
                mean_x = mvn.mean
                covar_x = mvn.covariance_matrix
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    ) for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(
                    mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Example #9
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the
                dimension of the feature space (not including task indices) and
                `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise from the respective
                likelihoods. If a Tensor, specifies the observation noise levels
                to add.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices`. Includes measurement noise if
            `observation_noise` is specified.
        """
        if output_indices is None:
            output_indices = self._output_tasks
        if any(i not in self._output_tasks for i in output_indices):
            raise ValueError("Too many output indices")
        cls_name = self.__class__.__name__
        if hasattr(self, "outcome_transform"):
            raise NotImplementedError(
                f"Outcome transforms currently not supported by {cls_name}")

        # construct evaluation X
        X_full = _make_X_full(X=X,
                              output_indices=output_indices,
                              tf=self._task_feature)

        self.eval()  # make sure model is in eval mode
        with gpt_posterior_settings():
            mvn = self(X_full)
            if observation_noise is not False:
                raise NotImplementedError(
                    f"Specifying observation noise is not yet supported by {cls_name}"
                )
        # If single-output, return the posterior of a single-output model
        if len(output_indices) == 1:
            return GPyTorchPosterior(mvn=mvn)
        # Otherwise, make a MultitaskMultivariateNormal out of this
        mtmvn = MultitaskMultivariateNormal(
            mean=mvn.mean.view(*X.shape[:-1], len(output_indices)),
            covariance_matrix=mvn.lazy_covariance_matrix,
            interleaved=False,
        )
        return GPyTorchPosterior(mvn=mtmvn)
Example #10
0
def _get_test_posterior(batch_shape, q=1, m=1, **tkwargs):
    mean = torch.rand(*batch_shape, q, m, **tkwargs)
    a = torch.rand(*batch_shape, q * m, q * m, **tkwargs)
    covar = a @ a.transpose(-1, -2)
    diag = torch.diagonal(covar, dim1=-2, dim2=-1)
    diag += torch.rand(*batch_shape, q * m, **tkwargs)  # in-place
    mvn = MultitaskMultivariateNormal(mean, covar)
    return GPyTorchPosterior(mvn)
 def test_multitask_multivariate_normal_exceptions(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.tensor([0, 1], device=device, dtype=dtype)
         covmat = torch.eye(2, device=device, dtype=dtype)
         with self.assertRaises(RuntimeError):
             MultitaskMultivariateNormal(mean=mean,
                                         covariance_matrix=covmat)
Example #12
0
 def test_transformed_posterior(self):
     for dtype in (torch.float, torch.double):
         for m in (1, 2):
             shape = torch.Size([3, m])
             mean = torch.rand(shape, dtype=dtype, device=self.device)
             variance = 1 + torch.rand(
                 shape, dtype=dtype, device=self.device)
             if m == 1:
                 covar = torch.diag_embed(variance.squeeze(-1))
                 mvn = MultivariateNormal(mean.squeeze(-1), lazify(covar))
             else:
                 covar = torch.diag_embed(
                     variance.view(*variance.shape[:-2], -1))
                 mvn = MultitaskMultivariateNormal(mean, lazify(covar))
             p_base = GPyTorchPosterior(mvn=mvn)
             p_tf = TransformedPosterior(  # dummy transforms
                 posterior=p_base,
                 sample_transform=lambda s: s + 2,
                 mean_transform=lambda m, v: 2 * m + v,
                 variance_transform=lambda m, v: m + 2 * v,
             )
             # mean, variance
             self.assertEqual(p_tf.device.type, self.device.type)
             self.assertTrue(p_tf.dtype == dtype)
             self.assertEqual(p_tf.event_shape, shape)
             self.assertEqual(p_tf.base_sample_shape, shape)
             self.assertTrue(torch.equal(p_tf.mean, 2 * mean + variance))
             self.assertTrue(torch.equal(p_tf.variance,
                                         mean + 2 * variance))
             # rsample
             samples = p_tf.rsample()
             self.assertEqual(samples.shape, torch.Size([1]) + shape)
             samples = p_tf.rsample(sample_shape=torch.Size([4]))
             self.assertEqual(samples.shape, torch.Size([4]) + shape)
             samples2 = p_tf.rsample(sample_shape=torch.Size([4, 2]))
             self.assertEqual(samples2.shape, torch.Size([4, 2]) + shape)
             # rsample w/ base samples
             base_samples = torch.randn(4,
                                        *shape,
                                        device=self.device,
                                        dtype=dtype)
             # incompatible shapes
             with self.assertRaises(RuntimeError):
                 p_tf.rsample(sample_shape=torch.Size([3]),
                              base_samples=base_samples)
             # make sure sample transform is applied correctly
             samples_base = p_base.rsample(sample_shape=torch.Size([4]),
                                           base_samples=base_samples)
             samples_tf = p_tf.rsample(sample_shape=torch.Size([4]),
                                       base_samples=base_samples)
             self.assertTrue(torch.equal(samples_tf, samples_base + 2))
             # check error handling
             p_tf_2 = TransformedPosterior(posterior=p_base,
                                           sample_transform=lambda s: s + 2)
             with self.assertRaises(NotImplementedError):
                 p_tf_2.mean
             with self.assertRaises(NotImplementedError):
                 p_tf_2.variance
Example #13
0
    def test_expected_improvement_batch(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            mean = torch.tensor([-0.5, 0.0, 0.5], device=device, dtype=dtype).view(
                3, 1, 1
            )
            variance = torch.ones(3, 1, 1, device=device, dtype=dtype)
            mm = MockModel(MockPosterior(mean=mean, variance=variance))
            module = ExpectedImprovement(model=mm, best_f=0.0)
            X = torch.empty(3, 1, 1, device=device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected = torch.tensor(
                [0.19780, 0.39894, 0.69780], device=device, dtype=dtype
            )
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
            # check for proper error if multi-output model
            mean2 = torch.rand(3, 1, 2, device=device, dtype=dtype)
            variance2 = torch.rand(3, 1, 2, device=device, dtype=dtype)
            mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2))
            module2 = ExpectedImprovement(model=mm2, best_f=0.0)
            with self.assertRaises(UnsupportedError):
                module2(X)

            # test objective (single-output)
            mean = torch.tensor([[[0.5]], [[0.25]]], device=device, dtype=dtype)
            covar = torch.tensor([[[[0.16]]], [[[0.125]]]], device=device, dtype=dtype)
            mvn = MultivariateNormal(mean, covar)
            p = GPyTorchPosterior(mvn)
            mm = MockModel(p)
            weights = torch.tensor([0.5], device=device, dtype=dtype)
            obj = ScalarizedObjective(weights)
            ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj)
            X = torch.rand(2, 1, 2, device=device, dtype=dtype)
            ei_expected = torch.tensor([[0.2601], [0.1500]], device=device, dtype=dtype)
            torch.allclose(ei(X), ei_expected, atol=1e-4)

            # test objective (multi-output)
            mean = torch.tensor(
                [[[-0.25, 0.5]], [[0.2, -0.1]]], device=device, dtype=dtype
            )
            covar = torch.tensor(
                [[[0.5, 0.125], [0.125, 0.5]], [[0.25, -0.1], [-0.1, 0.25]]],
                device=device,
                dtype=dtype,
            )
            mvn = MultitaskMultivariateNormal(mean, covar)
            p = GPyTorchPosterior(mvn)
            mm = MockModel(p)
            weights = torch.tensor([2.0, 1.0], device=device, dtype=dtype)
            obj = ScalarizedObjective(weights)
            ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj)
            X = torch.rand(2, 1, 2, device=device, dtype=dtype)
            ei_expected = torch.tensor([0.6910, 0.5371], device=device, dtype=dtype)
            torch.allclose(ei(X), ei_expected, atol=1e-4)

        # test bad objective class
        with self.assertRaises(UnsupportedError):
            ExpectedImprovement(model=mm, best_f=0.0, objective=IdentityMCObjective())
Example #14
0
    def test_expected_improvement(self):
        for dtype in (torch.float, torch.double):
            mean = torch.tensor([[-0.5]], device=self.device, dtype=dtype)
            variance = torch.ones(1, 1, device=self.device, dtype=dtype)
            mm = MockModel(MockPosterior(mean=mean, variance=variance))

            # basic test
            module = ExpectedImprovement(model=mm, best_f=0.0)
            X = torch.empty(1, 1, device=self.device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected = torch.tensor(0.19780,
                                       device=self.device,
                                       dtype=dtype)
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))

            # test maximize
            module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False)
            X = torch.empty(1, 1, device=self.device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected = torch.tensor(0.6978, device=self.device, dtype=dtype)
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
            with self.assertRaises(UnsupportedError):
                module.set_X_pending(None)

            # test posterior transform (single-output)
            mean = torch.tensor([0.5], device=self.device, dtype=dtype)
            covar = torch.tensor([[0.16]], device=self.device, dtype=dtype)
            mvn = MultivariateNormal(mean, covar)
            p = GPyTorchPosterior(mvn)
            mm = MockModel(p)
            weights = torch.tensor([0.5], device=self.device, dtype=dtype)
            transform = ScalarizedPosteriorTransform(weights)
            ei = ExpectedImprovement(model=mm,
                                     best_f=0.0,
                                     posterior_transform=transform)
            X = torch.rand(1, 2, device=self.device, dtype=dtype)
            ei_expected = torch.tensor(0.2601, device=self.device, dtype=dtype)
            torch.allclose(ei(X), ei_expected, atol=1e-4)

            # test posterior transform (multi-output)
            mean = torch.tensor([[-0.25, 0.5]],
                                device=self.device,
                                dtype=dtype)
            covar = torch.tensor([[[0.5, 0.125], [0.125, 0.5]]],
                                 device=self.device,
                                 dtype=dtype)
            mvn = MultitaskMultivariateNormal(mean, covar)
            p = GPyTorchPosterior(mvn)
            mm = MockModel(p)
            weights = torch.tensor([2.0, 1.0], device=self.device, dtype=dtype)
            transform = ScalarizedPosteriorTransform(weights)
            ei = ExpectedImprovement(model=mm,
                                     best_f=0.0,
                                     posterior_transform=transform)
            X = torch.rand(1, 2, device=self.device, dtype=dtype)
            ei_expected = torch.tensor(0.6910, device=self.device, dtype=dtype)
            torch.allclose(ei(X), ei_expected, atol=1e-4)
    def test_log_prob(self):
        mean = torch.randn(4, 3)
        var = torch.randn(12).abs_()
        values = mean + 0.5
        diffs = (values - mean).view(-1)

        res = MultitaskMultivariateNormal(mean, DiagLazyTensor(var)).log_prob(values)
        actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum() + (diffs / var * diffs).sum())
        self.assertLess((res - actual).div(res).abs().item(), 1e-2)

        mean = torch.randn(3, 4, 3)
        var = torch.randn(3, 12).abs_()
        values = mean + 0.5
        diffs = (values - mean).view(3, -1)

        res = MultitaskMultivariateNormal(mean, DiagLazyTensor(var)).log_prob(values)
        actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum(-1) + (diffs / var * diffs).sum(-1))
        self.assertLess((res - actual).div(res).abs().norm(), 1e-2)
Example #16
0
def _get_test_posterior(batch_shape, device, dtype, q=1, o=1):
    mean = torch.rand(*batch_shape, q, o, device=device, dtype=dtype)
    a = torch.rand(*batch_shape, q * o, q * o, device=device, dtype=dtype)
    covar = a @ a.transpose(-1, -2)
    diag = torch.diagonal(covar, dim1=-2, dim2=-1)
    diag += torch.rand(*batch_shape, q * o, device=device,
                       dtype=dtype)  # in-place
    mvn = MultitaskMultivariateNormal(mean, covar)
    return GPyTorchPosterior(mvn)
Example #17
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(gpt_settings.debug(False))
            es.enter_context(gpt_settings.fast_pred_var())
            es.enter_context(
                gpt_settings.detach_test_caches(
                    settings.propagate_grads.off()))
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape)
            mvn = self(X)
            if observation_noise:
                if isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                    # Use the mean of the previous noise values (TODO: be smarter here).
                    noise = self.likelihood.noise.mean().expand(X.shape[:-1])
                    mvn = self.likelihood(mvn, X, noise=noise)
                else:
                    mvn = self.likelihood(mvn, X)
            if self._num_outputs > 1:
                mean_x = mvn.mean
                covar_x = mvn.covariance_matrix
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    ) for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(
                    mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Example #18
0
def _get_test_posterior(batch_shape: torch.Size,
                        q: int = 1,
                        m: int = 1,
                        interleaved: bool = True,
                        lazy: bool = False,
                        independent: bool = False,
                        **tkwargs) -> GPyTorchPosterior:
    r"""Generate a Posterior for testing purposes.

    Args:
        batch_shape: The batch shape of the data.
        q: The number of candidates
        m: The number of outputs.
        interleaved: A boolean indicating the format of the
            MultitaskMultivariateNormal
        lazy: A boolean indicating if the posterior should be lazy
        indepedent: A boolean indicating whether the outputs are independent
        tkwargs: `device` and `dtype` tensor constructor kwargs.


    """
    if independent:
        mvns = []
        for _ in range(m):
            mean = torch.rand(*batch_shape, q, **tkwargs)
            a = torch.rand(*batch_shape, q, q, **tkwargs)
            covar = a @ a.transpose(-1, -2)
            flat_diag = torch.rand(*batch_shape, q, **tkwargs)
            covar = covar + torch.diag_embed(flat_diag)
            mvns.append(MultivariateNormal(mean, covar))
        mtmvn = MultitaskMultivariateNormal.from_independent_mvns(mvns)
    else:
        mean = torch.rand(*batch_shape, q, m, **tkwargs)
        a = torch.rand(*batch_shape, q * m, q * m, **tkwargs)
        covar = a @ a.transpose(-1, -2)
        flat_diag = torch.rand(*batch_shape, q * m, **tkwargs)
        if lazy:
            covar = AddedDiagLazyTensor(covar, DiagLazyTensor(flat_diag))
        else:
            covar = covar + torch.diag_embed(flat_diag)
        mtmvn = MultitaskMultivariateNormal(mean,
                                            covar,
                                            interleaved=interleaved)
    return GPyTorchPosterior(mtmvn)
Example #19
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
                feature space, `q` is the number of points considered jointly,
                and `b` is the batch dimension.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes measurement noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(gpt_settings.debug(False))
            es.enter_context(gpt_settings.fast_pred_var())
            es.enter_context(
                gpt_settings.detach_test_caches(
                    settings.propagate_grads.off()))
            if output_indices is not None:
                mvns = [self.forward_i(i, X) for i in output_indices]
                if observation_noise:
                    lh_kwargs = [{
                        "noise": lh.noise.mean().expand(X.shape[:-1])
                    } if isinstance(lh, FixedNoiseGaussianLikelihood) else {}
                                 for lh in self.likelihood.likelihoods]
                    mvns = [
                        self.likelihood_i(i, mvn, X,
                                          **lkws) for i, mvn, lkws in zip(
                                              output_indices, mvns, lh_kwargs)
                    ]
            else:
                mvns = self(*[X for _ in range(self.num_outputs)])
                if observation_noise:
                    # TODO: Allow passing in observation noise via kwarg
                    mvns = self.likelihood(*[(mvn, X) for mvn in mvns])
        if len(mvns) == 1:
            return GPyTorchPosterior(mvn=mvns[0])
        else:
            return GPyTorchPosterior(
                mvn=MultitaskMultivariateNormal.from_independent_mvns(
                    mvns=mvns))
Example #20
0
    def __call__(self,
                 inputs,
                 are_samples=False,
                 expand_for_quadgrid=True,
                 **kwargs):
        if isinstance(inputs, MultitaskMultivariateNormal):
            # inputs is definitely in the second layer, and mean is n x t
            mus, sigmas = inputs.mean, inputs.variance.sqrt()

            if expand_for_quadgrid:
                xi_mus = mus.unsqueeze(0)  # 1 x n x t
                xi_sigmas = sigmas.unsqueeze(0)  # 1 x n x t
            else:
                xi_mus = mus
                xi_sigmas = sigmas

            # unsqueeze sigmas to 1 x n x t, locations from [q] to Q^T x 1 x T.
            # Broadcasted result will be Q^T x N x T
            qg = self.quad_sites.view([self.num_quad_sites] + [1] *
                                      (xi_mus.dim() - 2) + [self.input_dims])
            xi_sigmas = xi_sigmas * qg

            inputs = xi_mus + xi_sigmas  # q^t x n x t
        if settings.debug.on():
            if not torch.is_tensor(inputs):
                raise ValueError(
                    "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got "
                    f"{inputs.__class__.__Name__}")

            if inputs.size(-1) != self.input_dims:
                raise RuntimeError(
                    f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}],"
                    f" expected [{self.input_dims}]")

        # Repeat the input for all possible outputs
        if self.output_dims is not None:
            inputs = inputs.unsqueeze(-3)
            inputs = inputs.expand(*inputs.shape[:-3], self.output_dims,
                                   *inputs.shape[-2:])
        # Now run samples through the GP
        output = ApproximateGP.__call__(self, inputs, **kwargs)

        if self.num_quad_sites > 0:
            if self.output_dims is not None and not isinstance(
                    output, MultitaskMultivariateNormal):
                mean = output.loc.transpose(-1, -2)
                covar = BlockDiagLazyTensor(output.lazy_covariance_matrix,
                                            block_dim=-3)
                output = MultitaskMultivariateNormal(mean,
                                                     covar,
                                                     interleaved=False)
        else:
            output = output.loc.transpose(
                -1, -2)  # this layer provides noiseless kernel interpolation

        return output
Example #21
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            propagate_grads: If True, do not detach GPyTorch's test caches when
                computing of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement). Defaults to `False`.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        detach_test_caches = not kwargs.get("propagate_grads", False)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape
                )
            mvn = self(X)
            if observation_noise:
                mvn = self.likelihood(mvn, X)
            if self._num_outputs > 1:
                mean_x = mvn.mean
                covar_x = mvn.covariance_matrix
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    )
                    for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Example #22
0
 def __call__(self, function, *params, **kwargs):
     if isinstance(function, Distribution) and not isinstance(
             function, MultitaskMultivariateNormal):
         warnings.warn(
             "The input to DeepGaussianLikelihood should be a MultitaskMultivariateNormal (num_data x num_tasks). "
             "Batch MultivariateNormal inputs (num_tasks x num_data) will be deprectated.",
             DeprecationWarning,
         )
         function = MultitaskMultivariateNormal.from_batch_mvn(function)
     return super().__call__(function, *params, **kwargs)
Example #23
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
                feature space, `q` is the number of points considered jointly,
                and `b` is the batch dimension.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes measurement noise if
            `observation_noise=True`.
        """
        detach_test_caches = kwargs.get("detach_test_caches", True)
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            if output_indices is not None:
                mvns = [self.forward_i(i, X) for i in output_indices]
                if observation_noise:
                    mvns = [
                        self.likelihood_i(i, mvn, X)
                        for i, mvn in zip(output_indices, mvns)
                    ]
            else:
                mvns = self(*[X for _ in range(self.num_outputs)])
                if observation_noise:
                    # TODO: Allow passing in observation noise via kwarg
                    mvns = self.likelihood(*[(mvn, X) for mvn in mvns])
        if len(mvns) == 1:
            return GPyTorchPosterior(mvn=mvns[0])
        else:
            return GPyTorchPosterior(
                mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
            )
Example #24
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement). Defaults to `True`.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape
                )
            mvn = self(X)
            mean_x = mvn.mean
            covar_x = mvn.covariance_matrix
            if self._num_outputs > 1:
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    )
                    for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Example #25
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
                feature space, `q` is the number of points considered jointly,
                and `b` is the batch dimension.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes measurement noise if
            `observation_noise=True`.
        """
        detach_test_caches = kwargs.get("detach_test_caches", True)
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            if output_indices is not None:
                mvns = [self.forward_i(i, X) for i in output_indices]
                if observation_noise:
                    mvns = [
                        self.likelihood_i(i, mvn, X)
                        for i, mvn in zip(output_indices, mvns)
                    ]
            else:
                mvns = self(*[X for _ in range(self.num_outputs)])
                if observation_noise:
                    # TODO: Allow passing in observation noise via kwarg
                    mvns = self.likelihood(*[(mvn, X) for mvn in mvns])
        if len(mvns) == 1:
            return GPyTorchPosterior(mvn=mvns[0])
        else:
            return GPyTorchPosterior(
                mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
            )
Example #26
0
    def __call__(self, inputs, are_samples=False, **kwargs):
        deterministic_inputs = not are_samples
        if isinstance(inputs, MultitaskMultivariateNormal):
            inputs = torch.distributions.Normal(
                loc=inputs.mean, scale=inputs.variance.sqrt()).rsample()
            deterministic_inputs = False

        if settings.debug.on():
            if not torch.is_tensor(inputs):
                raise ValueError(
                    "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got "
                    f"{inputs.__class__.__Name__}")

            if inputs.size(-1) != self.input_dims:
                raise RuntimeError(
                    f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}],"
                    f" expected [{self.input_dims}]")

        # Repeat the input for all possible outputs
        if self.output_dims is not None:
            inputs = inputs.unsqueeze(-3)
            inputs = inputs.expand(*inputs.shape[:-3], self.output_dims,
                                   *inputs.shape[-2:])

        # Now run samples through the GP
        output = ApproximateGP.__call__(self, inputs)
        if self.output_dims is not None:
            mean = output.loc.transpose(-1, -2)
            covar = BlockDiagLazyTensor(output.lazy_covariance_matrix,
                                        block_dim=-3)
            output = MultitaskMultivariateNormal(mean,
                                                 covar,
                                                 interleaved=False)

        # Maybe expand inputs?
        if deterministic_inputs:
            output = output.expand(
                torch.Size([settings.num_likelihood_samples.value()]) +
                output.batch_shape)

        return output
Example #27
0
    def __call__(self, inputs, **kwargs):
        if isinstance(inputs, MultitaskMultivariateNormal):
            # This is for subsequent layers. We apply quadrature here
            # Mean, stdv are q x ... x n x t
            mus, sigmas = inputs.mean, inputs.variance.sqrt()
            qg = self.quad_sites.view([self.num_quad_sites] + [1] *
                                      (mus.dim() - 2) + [self.input_dims])
            sigmas = sigmas * qg
            inputs = mus + sigmas  # q^t x n x t
            deterministic_inputs = False
        else:
            deterministic_inputs = True

        if settings.debug.on():
            if not torch.is_tensor(inputs):
                raise ValueError(
                    "`inputs` should either be a MultitaskMultivariateNormal or a Tensor, got "
                    f"{inputs.__class__.__Name__}")

            if inputs.size(-1) != self.input_dims:
                raise RuntimeError(
                    f"Input shape did not match self.input_dims. Got total feature dims [{inputs.size(-1)}],"
                    f" expected [{self.input_dims}]")

        # Repeat the input for all possible outputs
        if self.output_dims is not None:
            inputs = inputs.unsqueeze(-3)
            inputs = inputs.expand(*inputs.shape[:-3], self.output_dims,
                                   *inputs.shape[-2:])

        # Now run samples through the GP
        output = ApproximateGP.__call__(self, inputs, **kwargs)

        # If this is the first layer (deterministic inputs), expand the output
        # This allows quadrature to be applied to future layers
        if deterministic_inputs:
            output = output.expand(
                torch.Size([self.num_quad_sites]) + output.batch_shape)

        if self.num_quad_sites > 0:
            if self.output_dims is not None and not isinstance(
                    output, MultitaskMultivariateNormal):
                mean = output.loc.transpose(-1, -2)
                covar = BlockDiagLazyTensor(output.lazy_covariance_matrix,
                                            block_dim=-3)
                output = MultitaskMultivariateNormal(mean,
                                                     covar,
                                                     interleaved=False)
        else:
            output = output.loc.transpose(
                -1, -2)  # this layer provides noiseless kernel interpolation

        return output
 def test_multitask_from_repeat(self):
     mean = torch.randn(2, 3)
     variance = torch.randn(2, 3).clamp_min(1e-6)
     mvn = MultivariateNormal(mean, DiagLazyTensor(variance))
     mmvn = MultitaskMultivariateNormal.from_repeated_mvn(mvn, num_tasks=4)
     self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal))
     self.assertEqual(mmvn.batch_shape, torch.Size([2]))
     self.assertEqual(mmvn.event_shape, torch.Size([3, 4]))
     self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([2, 12, 12]))
     for i in range(4):
         self.assertEqual(mmvn.mean[..., i], mean)
         self.assertEqual(mmvn.variance[..., i], variance)
Example #29
0
 def forward(self, x, xe):
     m = self.mean(x)
     if x.shape[1] > 0:
         K = self.kern(x)
         if xe.shape[1] > 0:
             x_emb = self.emb_trans(xe)
             K *= self.kern_emb(x_emb)
     else:
         K = self.kern_emb(self.emb_trans(xe))
     return MultivariateNormal(
         m, K) if not self.multi_task else MultitaskMultivariateNormal(
             m, K)
    def test_multitask_from_batch(self):
        mean = torch.randn(2, 3)
        variance = torch.randn(2, 3).clamp_min(1e-6)
        mvn = MultivariateNormal(mean, DiagLazyTensor(variance))
        mmvn = MultitaskMultivariateNormal.from_batch_mvn(mvn, task_dim=-1)
        self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal))
        self.assertEqual(mmvn.batch_shape, torch.Size([]))
        self.assertEqual(mmvn.event_shape, torch.Size([3, 2]))
        self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([6, 6]))
        self.assertEqual(mmvn.mean, mean.transpose(-1, -2))
        self.assertEqual(mmvn.variance, variance.transpose(-1, -2))

        mean = torch.randn(2, 4, 3)
        variance = torch.randn(2, 4, 3).clamp_min(1e-6)
        mvn = MultivariateNormal(mean, DiagLazyTensor(variance))
        mmvn = MultitaskMultivariateNormal.from_batch_mvn(mvn, task_dim=0)
        self.assertTrue(isinstance(mmvn, MultitaskMultivariateNormal))
        self.assertEqual(mmvn.batch_shape, torch.Size([4]))
        self.assertEqual(mmvn.event_shape, torch.Size([3, 2]))
        self.assertEqual(mmvn.covariance_matrix.shape, torch.Size([4, 6, 6]))
        self.assertEqual(mmvn.mean, mean.permute(1, 2, 0))
        self.assertEqual(mmvn.variance, variance.permute(1, 2, 0))
Example #31
0
def _get_test_posterior(shape, device, dtype, interleaved=True, lazy=False):
    mean = torch.rand(shape, device=device, dtype=dtype)
    n_covar = shape[-2:].numel()
    diag = torch.rand(shape, device=device, dtype=dtype)
    diag = diag.view(*diag.shape[:-2], n_covar)
    a = torch.rand(*shape[:-2], n_covar, n_covar, device=device, dtype=dtype)
    covar = a @ a.transpose(-1, -2) + torch.diag_embed(diag)
    if lazy:
        covar = NonLazyTensor(covar)
    if shape[-1] == 1:
        mvn = MultivariateNormal(mean.squeeze(-1), covar)
    else:
        mvn = MultitaskMultivariateNormal(mean, covar, interleaved=interleaved)
    return GPyTorchPosterior(mvn)
    def test_log_prob(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            mean = torch.randn(4, 3, device=device, dtype=dtype)
            var = torch.randn(12, device=device, dtype=dtype).abs_()
            values = mean + 0.5
            diffs = (values - mean).view(-1)

            res = MultitaskMultivariateNormal(
                mean, DiagLazyTensor(var)).log_prob(values)
            actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum() +
                             (diffs / var * diffs).sum())
            self.assertLess((res - actual).div(res).abs().item(), 1e-2)

            mean = torch.randn(3, 4, 3, device=device, dtype=dtype)
            var = torch.randn(3, 12, device=device, dtype=dtype).abs_()
            values = mean + 0.5
            diffs = (values - mean).view(3, -1)

            res = MultitaskMultivariateNormal(
                mean, DiagLazyTensor(var)).log_prob(values)
            actual = -0.5 * (math.log(math.pi * 2) * 12 + var.log().sum(-1) +
                             (diffs / var * diffs).sum(-1))
            self.assertLess((res - actual).div(res).abs().norm(), 1e-2)
Example #33
0
 def test_degenerate_GPyTorchPosterior_Multitask(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         # singular covariance matrix
         degenerate_covar = torch.tensor(
             [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device
         )
         mean = torch.rand(3, dtype=dtype, device=device)
         mvn = MultivariateNormal(mean, lazify(degenerate_covar))
         mvn = MultitaskMultivariateNormal.from_independent_mvns([mvn, mvn])
         posterior = GPyTorchPosterior(mvn=mvn)
         # basics
         self.assertEqual(posterior.device.type, device.type)
         self.assertTrue(posterior.dtype == dtype)
         self.assertEqual(posterior.event_shape, torch.Size([3, 2]))
         mean_exp = mean.unsqueeze(-1).repeat(1, 2)
         self.assertTrue(torch.equal(posterior.mean, mean_exp))
         variance_exp = degenerate_covar.diag().unsqueeze(-1).repeat(1, 2)
         self.assertTrue(torch.equal(posterior.variance, variance_exp))
         # rsample
         with warnings.catch_warnings(record=True) as w:
             # we check that the p.d. warning is emitted - this only
             # happens once per posterior, so we need to check only once
             samples = posterior.rsample(sample_shape=torch.Size([4]))
             self.assertEqual(len(w), 1)
             self.assertTrue(issubclass(w[-1].category, RuntimeWarning))
             self.assertTrue("not p.d." in str(w[-1].message))
         self.assertEqual(samples.shape, torch.Size([4, 3, 2]))
         samples2 = posterior.rsample(sample_shape=torch.Size([4, 2]))
         self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 2]))
         # rsample w/ base samples
         base_samples = torch.randn(4, 3, 2, device=device, dtype=dtype)
         samples_b1 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         samples_b2 = posterior.rsample(
             sample_shape=torch.Size([4]), base_samples=base_samples
         )
         self.assertTrue(torch.allclose(samples_b1, samples_b2))
         base_samples2 = torch.randn(4, 2, 3, 2, device=device, dtype=dtype)
         samples2_b1 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         samples2_b2 = posterior.rsample(
             sample_shape=torch.Size([4, 2]), base_samples=base_samples2
         )
         self.assertTrue(torch.allclose(samples2_b1, samples2_b2))
         # collapse_batch_dims
         b_mean = torch.rand(2, 3, dtype=dtype, device=device)
         b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape)
         b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar))
         b_mvn = MultitaskMultivariateNormal.from_independent_mvns([b_mvn, b_mvn])
         b_posterior = GPyTorchPosterior(mvn=b_mvn)
         b_base_samples = torch.randn(4, 1, 3, 2, device=device, dtype=dtype)
         with warnings.catch_warnings(record=True) as w:
             b_samples = b_posterior.rsample(
                 sample_shape=torch.Size([4]), base_samples=b_base_samples
             )
             self.assertEqual(len(w), 1)
             self.assertTrue(issubclass(w[-1].category, RuntimeWarning))
             self.assertTrue("not p.d." in str(w[-1].message))
         self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 2]))