def test_conditional_whiten(Xnew, X, kernel, f_loc, f_scale_tril, loc, cov): if f_scale_tril is None: return loc0, cov0 = conditional(Xnew, X, kernel, f_loc, f_scale_tril, full_cov=True, whiten=False) Kff = kernel(X) + torch.eye(3) * 1e-6 Lff = Kff.potrf(upper=False) whiten_f_loc = Lff.inverse().matmul(f_loc) whiten_f_scale_tril = Lff.inverse().matmul(f_scale_tril) loc1, cov1 = conditional(Xnew, X, kernel, whiten_f_loc, whiten_f_scale_tril, full_cov=True, whiten=True) assert_equal(loc0, loc1) assert_equal(cov0, cov1)
def test_conditional(Xnew, X, kernel, f_loc, f_scale_tril, loc, cov): loc0, cov0 = conditional(Xnew, X, kernel, f_loc, f_scale_tril, full_cov=True) loc1, var1 = conditional(Xnew, X, kernel, f_loc, f_scale_tril, full_cov=False) if loc is not None: assert_equal(loc0, loc) assert_equal(loc1, loc) n = cov0.shape[-1] var0 = torch.stack([mat.diag() for mat in cov0.view(-1, n, n)]).reshape(cov0.shape[:-1]) assert_equal(var0, var1) if cov is not None: assert_equal(cov0, cov)
def test_conditional(Xnew, X, kernel, f_loc, f_scale_tril, loc, cov): loc0, cov0 = conditional(Xnew, X, kernel, f_loc, f_scale_tril, full_cov=True) loc1, var1 = conditional(Xnew, X, kernel, f_loc, f_scale_tril, full_cov=False) if loc is not None: assert_equal(loc0, loc) assert_equal(loc1, loc) n = cov0.shape[-1] var0 = torch.stack([mat.diag() for mat in cov0.view(-1, n, n)]).reshape(cov0.shape[:-1]) assert_equal(var0, var1) if cov is not None: assert_equal(cov0, cov)
def test_conditional_whiten(Xnew, X, kernel, f_loc, f_scale_tril, loc, cov): if f_scale_tril is None: return loc0, cov0 = conditional(Xnew, X, kernel, f_loc, f_scale_tril, full_cov=True, whiten=False) Kff = kernel(X) + torch.eye(3) * 1e-6 Lff = Kff.potrf(upper=False) whiten_f_loc = Lff.inverse().matmul(f_loc) whiten_f_scale_tril = Lff.inverse().matmul(f_scale_tril) loc1, cov1 = conditional(Xnew, X, kernel, whiten_f_loc, whiten_f_scale_tril, full_cov=True, whiten=True) assert_equal(loc0, loc1) assert_equal(cov0, cov1)
def forward(self, Xnew, full_cov=False): r""" Computes the mean and covariance matrix (or variance) of Gaussian Process posterior on a test input data :math:`X_{new}`: .. math:: p(f^* \mid X_{new}, X, y, k, f_{loc}, f_{scale\_tril}) = \mathcal{N}(loc, cov). .. note:: Variational parameters ``f_loc``, ``f_scale_tril``, together with kernel's parameters have been learned from a training procedure (MCMC or SVI). :param torch.Tensor Xnew: A input data for testing. Note that ``Xnew.shape[1:]`` must be the same as ``self.X.shape[1:]``. :param bool full_cov: A flag to decide if we want to predict full covariance matrix or just variance. :returns: loc and covariance matrix (or variance) of :math:`p(f^*(X_{new}))` :rtype: tuple(torch.Tensor, torch.Tensor) """ self._check_Xnew_shape(Xnew) self.set_mode("guide") loc, cov = conditional(Xnew, self.X, self.kernel, self.f_loc, self.f_scale_tril, full_cov=full_cov, whiten=self.whiten, jitter=self.jitter) return loc + self.mean_function(Xnew), cov
def forward(self, Xnew, full_cov=False): r""" Computes the mean and covariance matrix (or variance) of Gaussian Process posterior on a test input data :math:`X_{new}`: .. math:: p(f^* \mid X_{new}, X, y, k, f_{loc}, f_{scale\_tril}) = \mathcal{N}(loc, cov). .. note:: Variational parameters ``f_loc``, ``f_scale_tril``, together with kernel's parameters have been learned from a training procedure (MCMC or SVI). :param torch.Tensor Xnew: A input data for testing. Note that ``Xnew.shape[1:]`` must be the same as ``self.X.shape[1:]``. :param bool full_cov: A flag to decide if we want to predict full covariance matrix or just variance. :returns: loc and covariance matrix (or variance) of :math:`p(f^*(X_{new}))` :rtype: tuple(torch.Tensor, torch.Tensor) """ self._check_Xnew_shape(Xnew) # avoid sampling the unnecessary latent f self._sample_latent = False f_loc, f_scale_tril = self.guide() self._sample_latent = True loc, cov = conditional(Xnew, self.X, self.kernel, f_loc, f_scale_tril, full_cov=full_cov, whiten=self.whiten, jitter=self.jitter) return loc + self.mean_function(Xnew), cov
def model(self): self.set_mode("model") Xu = self.get_param("Xu") u_loc = self.get_param("u_loc") u_scale_tril = self.get_param("u_scale_tril") M = Xu.shape[0] Kuu = self.kernel(Xu) + torch.eye(M, out=Xu.new_empty(M, M)) * self.jitter Luu = Kuu.potrf(upper=False) zero_loc = Xu.new_zeros(u_loc.shape) u_name = param_with_module_name(self.name, "u") if self.whiten: Id = torch.eye(M, out=Xu.new_empty(M, M)) pyro.sample(u_name, dist.MultivariateNormal(zero_loc, scale_tril=Id) .independent(zero_loc.dim() - 1)) else: pyro.sample(u_name, dist.MultivariateNormal(zero_loc, scale_tril=Luu) .independent(zero_loc.dim() - 1)) f_loc, f_var = conditional(self.X, Xu, self.kernel, u_loc, u_scale_tril, Luu, full_cov=False, whiten=self.whiten, jitter=self.jitter) f_loc = f_loc + self.mean_function(self.X) if self.y is None: return f_loc, f_var else: with poutine.scale(None, self.num_data / self.X.shape[0]): return self.likelihood(f_loc, f_var, self.y)
def model(self): self.set_mode("model") M = self.Xu.size(0) Kuu = self.kernel(self.Xu).contiguous() Kuu.view(-1)[::M + 1] += self.jitter # add jitter to the diagonal Luu = Kuu.cholesky() zero_loc = self.Xu.new_zeros(self.u_loc.shape) if self.whiten: identity = eye_like(self.Xu, M) pyro.sample(self._pyro_get_fullname("u"), dist.MultivariateNormal(zero_loc, scale_tril=identity) .to_event(zero_loc.dim() - 1)) else: pyro.sample(self._pyro_get_fullname("u"), dist.MultivariateNormal(zero_loc, scale_tril=Luu) .to_event(zero_loc.dim() - 1)) f_loc, f_var = conditional(self.X, self.Xu, self.kernel, self.u_loc, self.u_scale_tril, Luu, full_cov=False, whiten=self.whiten, jitter=self.jitter) f_loc = f_loc + self.mean_function(self.X) if self.y is None: return f_loc, f_var else: # we would like to load likelihood's parameters outside poutine.scale context self.likelihood._load_pyro_samples() with poutine.scale(scale=self.num_data / self.X.size(0)): return self.likelihood(f_loc, f_var, self.y)
def forward(self, Xnew, full_cov=False, noiseless=True): r""" Computes the mean and covariance matrix (or variance) of Gaussian Process posterior on a test input data :math:`X_{new}`: .. math:: p(f^* \mid X_{new}, X, y, k, \epsilon) = \mathcal{N}(loc, cov). .. note:: The noise parameter ``noise`` (:math:`\epsilon`) together with kernel's parameters have been learned from a training procedure (MCMC or SVI). :param torch.Tensor Xnew: A input data for testing. Note that ``Xnew.shape[1:]`` must be the same as ``self.X.shape[1:]``. :param bool full_cov: A flag to decide if we want to predict full covariance matrix or just variance. :param bool noiseless: A flag to decide if we want to include noise in the prediction output or not. :returns: loc and covariance matrix (or variance) of :math:`p(f^*(X_{new}))` :rtype: tuple(torch.Tensor, torch.Tensor) """ self._check_Xnew_shape(Xnew) self.set_mode("guide") N = self.X.size(0) Kff = self.kernel(self.X).contiguous() Kff.view( -1)[::N + 1] += self.jitter + self.noise # add noise to the diagonal Lff = torch.linalg.cholesky(Kff) y_residual = self.y - self.mean_function(self.X) loc, cov = conditional( Xnew, self.X, self.kernel, y_residual, None, Lff, full_cov, jitter=self.jitter, ) if full_cov and not noiseless: M = Xnew.size(0) cov = cov.contiguous() cov.view(-1, M * M)[:, ::M + 1] += self.noise # add noise to the diagonal if not full_cov and not noiseless: cov = cov + self.noise return loc + self.mean_function(Xnew), cov
def sample_next(xnew, outside_vars): """Repeatedly samples from the Gaussian process posterior, conditioning on previously sampled values. """ warn_if_nan(xnew) # Variables from outer scope X, y, Kff = outside_vars["X"], outside_vars["y"], outside_vars[ "Kff"] # Compute Cholesky decomposition of kernel matrix Lff = Kff.cholesky() y_residual = y - self.mean_function(X) # Compute conditional mean and variance loc, cov = conditional(xnew, X, self.kernel, y_residual, None, Lff, False, jitter=self.jitter) if not noiseless: cov = cov + noise ynew = torchdist.Normal(loc + self.mean_function(xnew), cov.sqrt()).rsample() # Update kernel matrix N = outside_vars["N"] Kffnew = Kff.new_empty(N + 1, N + 1) Kffnew[:N, :N] = Kff cross = self.kernel(X, xnew).squeeze() end = self.kernel(xnew, xnew).squeeze() Kffnew[N, :N] = cross Kffnew[:N, N] = cross # No noise, just jitter for numerical stability Kffnew[N, N] = end + self.jitter # Heuristic to avoid adding degenerate points if Kffnew.logdet() > -15.: outside_vars["Kff"] = Kffnew outside_vars["N"] += 1 outside_vars["X"] = torch.cat((X, xnew)) outside_vars["y"] = torch.cat((y, ynew)) return ynew
def forward(self, Xnew, full_cov=False, noiseless=True): r""" Computes the mean and covariance matrix (or variance) of Gaussian Process posterior on a test input data :math:`X_{new}`: .. math:: p(f^* \mid X_{new}, X, y, k, \epsilon) = \mathcal{N}(loc, cov). .. note:: The noise parameter ``noise`` (:math:`\epsilon`) together with kernel's parameters have been learned from a training procedure (MCMC or SVI). :param torch.Tensor Xnew: A input data for testing. Note that ``Xnew.shape[1:]`` must be the same as ``self.X.shape[1:]``. :param bool full_cov: A flag to decide if we want to predict full covariance matrix or just variance. :param bool noiseless: A flag to decide if we want to include noise in the prediction output or not. :returns: loc and covariance matrix (or variance) of :math:`p(f^*(X_{new}))` :rtype: tuple(torch.Tensor, torch.Tensor) """ self._check_Xnew_shape(Xnew) noise = self.guide() Kff = self.kernel(self.X) + noise.expand(self.X.shape[0]).diag() Lff = Kff.potrf(upper=False) y_residual = self.y - self.mean_function(self.X) loc, cov = conditional(Xnew, self.X, self.kernel, y_residual, None, Lff, full_cov, jitter=self.jitter) if full_cov and not noiseless: cov = cov + noise.expand(Xnew.shape[0]).diag() if not full_cov and not noiseless: cov = cov + noise.expand(Xnew.shape[0]) return loc + self.mean_function(Xnew), cov
def forward(self, Xnew, full_cov=False, noiseless=True): r""" Computes the mean and covariance matrix (or variance) of Gaussian Process posterior on a test input data :math:`X_{new}`: .. math:: p(f^* \mid X_{new}, X, y, k, \epsilon) = \mathcal{N}(loc, cov). .. note:: The noise parameter ``noise`` (:math:`\epsilon`) together with kernel's parameters have been learned from a training procedure (MCMC or SVI). :param torch.Tensor Xnew: A input data for testing. Note that ``Xnew.shape[1:]`` must be the same as ``self.X.shape[1:]``. :param bool full_cov: A flag to decide if we want to predict full covariance matrix or just variance. :param bool noiseless: A flag to decide if we want to include noise in the prediction output or not. :returns: loc and covariance matrix (or variance) of :math:`p(f^*(X_{new}))` :rtype: tuple(torch.Tensor, torch.Tensor) """ self._check_Xnew_shape(Xnew) noise = self.guide() Kff = self.kernel(self.X) + noise.expand(self.X.shape[0]).diag() Lff = Kff.potrf(upper=False) y_residual = self.y - self.mean_function(self.X) loc, cov = conditional(Xnew, self.X, self.kernel, y_residual, None, Lff, full_cov, jitter=self.jitter) if full_cov and not noiseless: cov = cov + noise.expand(Xnew.shape[0]).diag() if not full_cov and not noiseless: cov = cov + noise.expand(Xnew.shape[0]) return loc + self.mean_function(Xnew), cov