Example #1
0
def test_logpdf(x, w):
    prior = Measure()
    f1, e1 = GP(EQ(), measure=prior), GP(2e-1 * Delta(), measure=prior)
    f2, e2 = GP(Linear(), measure=prior), GP(1e-1 * Delta(), measure=prior)
    gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2))

    # Generate some data.
    y = gpar.sample(x, w, latent=True)

    # Compute logpdf.
    x1 = WeightedUnique(x, w[:, 0])
    x2 = WeightedUnique(B.concat(x, y[:, 0:1], axis=1), w[:, 1])
    logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0])
    logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1])

    # Test computation of GPAR.
    assert gpar.logpdf(x, y, w) == logpdf1 + logpdf2
    assert gpar.logpdf(x, y, w, only_last_layer=True) == logpdf2

    # Test resuming computation.
    x_partial, x_ind_partial = gpar.logpdf(x,
                                           y,
                                           w,
                                           return_inputs=True,
                                           outputs=[0])
    assert gpar.logpdf(x_partial, y, w, x_ind=x_ind_partial,
                       outputs=[1]) == logpdf2

    # Test that sampling missing gives a stochastic estimate.
    y[1, 0] = np.nan
    all_different(
        gpar.logpdf(x, y, w, sample_missing=True),
        gpar.logpdf(x, y, w, sample_missing=True),
    )
Example #2
0
def test_obs(x):
    prior = Measure()
    f = GP(EQ(), measure=prior)
    e = GP(1e-1 * Delta(), measure=prior)

    # Generate some data.
    w = B.rand(B.shape(x)[0]) + 1e-2
    y = f(x).sample()

    # Set some observations to be missing.
    y_missing = y.copy()
    y_missing[::2] = np.nan

    # Check dense case.
    gpar = GPAR()
    obs = gpar._obs(x, None, y_missing, w, f, e)
    assert isinstance(obs, Obs)
    approx(
        prior.logpdf(obs),
        (f + e)(WeightedUnique(x[1::2], w[1::2])).logpdf(y[1::2]),
        atol=1e-6,
    )

    # Check sparse case.
    gpar = GPAR(x_ind=x)
    obs = gpar._obs(x, x, y_missing, w, f, e)
    assert isinstance(obs, SparseObs)
    approx(
        prior.logpdf(obs),
        (f + e)(WeightedUnique(x[1::2], w[1::2])).logpdf(y[1::2]),
        atol=1e-6,
    )
Example #3
0
    def sample(self, x, w, latent=False):
        """Sample.

        Args:
            x (tensor): Inputs to sample at.
            w (tensor): Weights.
            latent (bool, optional): Sample latent function. Defaults to
                `False`.

        Returns:
            tensor: Sample.
        """
        sample = B.zeros(B.dtype(x), B.shape(x)[0], 0)
        x_ind = self.x_ind

        for i, (is_last, model) in enumerate(last(self.layers)):
            f, e = model()  # Construct model.

            x_weighted = WeightedUnique(x, w[:, i])

            if latent:
                # Sample latent function: use ancestral sampling.
                f_sample = f(x_weighted).sample()
                y_sample = f_sample + e(x_weighted).sample()
                sample = B.concat(sample, f_sample, axis=1)
            else:
                # Sample observed function.
                y_sample = (f + e)(x_weighted).sample()
                sample = B.concat(sample, y_sample, axis=1)

            # Update inputs.
            if not is_last:
                x, x_ind = self._update_inputs(x, x_ind, y_sample, f, None)

        return sample
Example #4
0
def test_logpdf(x, w):
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(
        replace=False,
        impute=False,
        nonlinear=True,
        nonlinear_scale=0.1,
        linear=True,
        linear_scale=10.0,
        noise=1e-2,
        normalise_y=False,
    )
    y = reg.sample(x, w, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2)
    f1, e1 = gpar.layers[0]()
    f2, e2 = gpar.layers[1]()

    # Test computation under prior.
    x1 = x
    x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1)
    if w is not None:
        x1 = WeightedUnique(x1, w[:, 0])
        x2 = WeightedUnique(x2, w[:, 1])
    logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0])
    logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1])
    approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6)

    # Test computation under posterior.
    post1 = f1.measure | ((f1 + e1)(x1), y[:, 0])
    post2 = f2.measure | ((f2 + e2)(x2), y[:, 1])
    e1_post = GP(e1.mean, e1.kernel, measure=post1)
    e2_post = GP(e2.mean, e2.kernel, measure=post2)
    logpdf1 = (post1(f1) + e1_post)(x1).logpdf(y[:, 0])
    logpdf2 = (post2(f2) + e2_post)(x2).logpdf(y[:, 1])
    with pytest.raises(RuntimeError):
        reg.logpdf(x, y, w, posterior=True)
    reg.condition(x, y, w)
    approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6)

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    all_different(
        reg.logpdf(x, y, w, sample_missing=True),
        reg.logpdf(x, y, w, sample_missing=True),
    )
Example #5
0
    def _obs(self, x, x_ind, y, w, f, e):
        # Filter available data points.
        available = ~B.isnan(y[:, 0])
        x = x[available]
        y = y[available]
        w = w[available]

        # Perform weighting.
        x = WeightedUnique(x, w=w)

        if self.sparse:
            return SparseObs(f(x_ind), e, f(x), y)
        else:
            return Obs((f + e)(x), y)
Example #6
0
    def logpdf(
        self,
        x,
        y,
        w,
        only_last_layer=False,
        sample_missing=False,
        return_inputs=False,
        x_ind=None,
        outputs=None,
    ):
        """Compute the logpdf.

        Args:
            x (tensor): Inputs.
            y (tensor): Outputs.
            w (tensor): Weights.
            only_last_layer (:obj:`bool`, optional): Compute the logpdf for only the last
                layer. Defaults to `False`.
            sample_missing (:obj:`bool`, optional): Sample missing data to compute an
                unbiased estimate of the pdf, *not* logpdf. Defaults to `False`.
            return_inputs (:obj:`bool`, optional): Instead return the inputs and inputs for
                the inducing points with previous outputs concatenated. This can be used
                to perform precomputation. Defaults to `False`.
            x_ind (tensor, optional): Inputs for the inducing points. This can be
                used to resume a computation. Defaults to :attr:`.model.GPAR.x_ind`.
            outputs (:obj:`list[int]`, optional): Only compute the logpdf for a subset of
                outputs. The list specifies the indices of the outputs. Defaults to
                computing the logpdf for all outputs.

        Returns:
            scalar: Logpdf. If `return_inputs` is set to `True`, instead return a
                tuple containing the inputs and the inputs for the inducing points
                with previous outputs concatenated
        """
        logpdf = B.cast(B.dtype(x), 0)
        x_ind = self.x_ind if x_ind is None else x_ind

        y_per_output = per_output(y, w, keep=self.impute or sample_missing)
        for is_last, ((y, w, mask), model) in last(zip(y_per_output,
                                                       self.layers),
                                                   select=outputs):
            x = x[mask]  # Filter according to mask.
            f, e = model()  # Construct model.
            obs = self._obs(x, x_ind, y, w, f, e)  # Construct observations.

            # Accumulate logpdf.
            if not only_last_layer or (is_last and only_last_layer):
                logpdf = logpdf + f.measure.logpdf(obs)

            if not is_last:
                missing = B.isnan(y[:, 0])
                # Sample missing data for an unbiased sample of the pdf.
                if sample_missing and B.any(missing):
                    post = f.measure | obs
                    x_missing_weighted = WeightedUnique(x[missing], w[missing])
                    y = merge(y,
                              post(f + e)(x_missing_weighted).sample(),
                              missing)

                # Update inputs.
                x, x_ind = self._update_inputs(x, x_ind, y, f, obs)

        # Return inputs if asked for.
        return (x, x_ind) if return_inputs else logpdf