def test_logpdf(x, w): prior = Measure() f1, e1 = GP(EQ(), measure=prior), GP(2e-1 * Delta(), measure=prior) f2, e2 = GP(Linear(), measure=prior), GP(1e-1 * Delta(), measure=prior) gpar = GPAR().add_layer(lambda: (f1, e1)).add_layer(lambda: (f2, e2)) # Generate some data. y = gpar.sample(x, w, latent=True) # Compute logpdf. x1 = WeightedUnique(x, w[:, 0]) x2 = WeightedUnique(B.concat(x, y[:, 0:1], axis=1), w[:, 1]) logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0]) logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1]) # Test computation of GPAR. assert gpar.logpdf(x, y, w) == logpdf1 + logpdf2 assert gpar.logpdf(x, y, w, only_last_layer=True) == logpdf2 # Test resuming computation. x_partial, x_ind_partial = gpar.logpdf(x, y, w, return_inputs=True, outputs=[0]) assert gpar.logpdf(x_partial, y, w, x_ind=x_ind_partial, outputs=[1]) == logpdf2 # Test that sampling missing gives a stochastic estimate. y[1, 0] = np.nan all_different( gpar.logpdf(x, y, w, sample_missing=True), gpar.logpdf(x, y, w, sample_missing=True), )
def test_obs(x): prior = Measure() f = GP(EQ(), measure=prior) e = GP(1e-1 * Delta(), measure=prior) # Generate some data. w = B.rand(B.shape(x)[0]) + 1e-2 y = f(x).sample() # Set some observations to be missing. y_missing = y.copy() y_missing[::2] = np.nan # Check dense case. gpar = GPAR() obs = gpar._obs(x, None, y_missing, w, f, e) assert isinstance(obs, Obs) approx( prior.logpdf(obs), (f + e)(WeightedUnique(x[1::2], w[1::2])).logpdf(y[1::2]), atol=1e-6, ) # Check sparse case. gpar = GPAR(x_ind=x) obs = gpar._obs(x, x, y_missing, w, f, e) assert isinstance(obs, SparseObs) approx( prior.logpdf(obs), (f + e)(WeightedUnique(x[1::2], w[1::2])).logpdf(y[1::2]), atol=1e-6, )
def sample(self, x, w, latent=False): """Sample. Args: x (tensor): Inputs to sample at. w (tensor): Weights. latent (bool, optional): Sample latent function. Defaults to `False`. Returns: tensor: Sample. """ sample = B.zeros(B.dtype(x), B.shape(x)[0], 0) x_ind = self.x_ind for i, (is_last, model) in enumerate(last(self.layers)): f, e = model() # Construct model. x_weighted = WeightedUnique(x, w[:, i]) if latent: # Sample latent function: use ancestral sampling. f_sample = f(x_weighted).sample() y_sample = f_sample + e(x_weighted).sample() sample = B.concat(sample, f_sample, axis=1) else: # Sample observed function. y_sample = (f + e)(x_weighted).sample() sample = B.concat(sample, y_sample, axis=1) # Update inputs. if not is_last: x, x_ind = self._update_inputs(x, x_ind, y_sample, f, None) return sample
def test_logpdf(x, w): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor( replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10.0, noise=1e-2, normalise_y=False, ) y = reg.sample(x, w, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2) f1, e1 = gpar.layers[0]() f2, e2 = gpar.layers[1]() # Test computation under prior. x1 = x x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1) if w is not None: x1 = WeightedUnique(x1, w[:, 0]) x2 = WeightedUnique(x2, w[:, 1]) logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0]) logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1]) approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6) # Test computation under posterior. post1 = f1.measure | ((f1 + e1)(x1), y[:, 0]) post2 = f2.measure | ((f2 + e2)(x2), y[:, 1]) e1_post = GP(e1.mean, e1.kernel, measure=post1) e2_post = GP(e2.mean, e2.kernel, measure=post2) logpdf1 = (post1(f1) + e1_post)(x1).logpdf(y[:, 0]) logpdf2 = (post2(f2) + e2_post)(x2).logpdf(y[:, 1]) with pytest.raises(RuntimeError): reg.logpdf(x, y, w, posterior=True) reg.condition(x, y, w) approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6) # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan all_different( reg.logpdf(x, y, w, sample_missing=True), reg.logpdf(x, y, w, sample_missing=True), )
def _obs(self, x, x_ind, y, w, f, e): # Filter available data points. available = ~B.isnan(y[:, 0]) x = x[available] y = y[available] w = w[available] # Perform weighting. x = WeightedUnique(x, w=w) if self.sparse: return SparseObs(f(x_ind), e, f(x), y) else: return Obs((f + e)(x), y)
def logpdf( self, x, y, w, only_last_layer=False, sample_missing=False, return_inputs=False, x_ind=None, outputs=None, ): """Compute the logpdf. Args: x (tensor): Inputs. y (tensor): Outputs. w (tensor): Weights. only_last_layer (:obj:`bool`, optional): Compute the logpdf for only the last layer. Defaults to `False`. sample_missing (:obj:`bool`, optional): Sample missing data to compute an unbiased estimate of the pdf, *not* logpdf. Defaults to `False`. return_inputs (:obj:`bool`, optional): Instead return the inputs and inputs for the inducing points with previous outputs concatenated. This can be used to perform precomputation. Defaults to `False`. x_ind (tensor, optional): Inputs for the inducing points. This can be used to resume a computation. Defaults to :attr:`.model.GPAR.x_ind`. outputs (:obj:`list[int]`, optional): Only compute the logpdf for a subset of outputs. The list specifies the indices of the outputs. Defaults to computing the logpdf for all outputs. Returns: scalar: Logpdf. If `return_inputs` is set to `True`, instead return a tuple containing the inputs and the inputs for the inducing points with previous outputs concatenated """ logpdf = B.cast(B.dtype(x), 0) x_ind = self.x_ind if x_ind is None else x_ind y_per_output = per_output(y, w, keep=self.impute or sample_missing) for is_last, ((y, w, mask), model) in last(zip(y_per_output, self.layers), select=outputs): x = x[mask] # Filter according to mask. f, e = model() # Construct model. obs = self._obs(x, x_ind, y, w, f, e) # Construct observations. # Accumulate logpdf. if not only_last_layer or (is_last and only_last_layer): logpdf = logpdf + f.measure.logpdf(obs) if not is_last: missing = B.isnan(y[:, 0]) # Sample missing data for an unbiased sample of the pdf. if sample_missing and B.any(missing): post = f.measure | obs x_missing_weighted = WeightedUnique(x[missing], w[missing]) y = merge(y, post(f + e)(x_missing_weighted).sample(), missing) # Update inputs. x, x_ind = self._update_inputs(x, x_ind, y, f, obs) # Return inputs if asked for. return (x, x_ind) if return_inputs else logpdf