def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): sigma2 = tt.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = tt.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": Lamd = tt.ones_like(Qffd) * sigma2 trace = ((1.0 / (2.0 * sigma2)) * (tt.sum(self.cov_func(X, diag=True)) - tt.sum(tt.sum(A * A, 0)))) else: # DTC Lamd = tt.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd c = solve_lower(L_B, tt.dot(A, r_l)) constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi) logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B))) quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace)
def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): sigma2 = tt.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = tt.sum(A * A, 0) if self.approx == "FITC": Kffd = cov_total(X, diag=True) Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 else: # VFE or DTC Lamd = tt.ones_like(Qffd) * sigma2 A_l = A / Lamd L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A))) r = y - mean_total(X) r_l = r / Lamd c = solve_lower(L_B, tt.dot(A, r_l)) Kus = self.cov_func(Xu, Xnew) As = solve_lower(Luu, Kus) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c)) C = solve_lower(L_B, As) if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0) if pred_noise: var += sigma2 return mu, var else: cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) + tt.dot(tt.transpose(C), C)) if pred_noise: cov += sigma2 * tt.identity_like(cov) return mu, stabilize(cov)
def conditional(self, name, Xnew, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the TP prior was over, the conditional distribution over a set of new points, `f_*` is Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ X = self.X f = self.f nu2, mu, covT = self._build_conditional(Xnew, X, f) chol = cholesky(stabilize(covT)) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvStudentT(name, nu=nu2, mu=mu, chol=chol, shape=shape, **kwargs)
def conditional(self, name, Xnew, given=None, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the GP prior was over, the conditional distribution over a set of new points, `f_*` is .. math:: f_* \mid f, X, X_* \sim \mathcal{GP}\left( K(X_*, X) K(X, X)^{-1} f \,, K(X_*, X_*) - K(X_*, X) K(X, X)^{-1} K(X, X_*) \right) Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. given : dict Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC3 for more information. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, *givens) chol = cholesky(stabilize(cov)) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): R""" Returns the approximate conditional distribution of the GP evaluated over new input locations `Xnew`. Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. pred_noise : bool Whether or not observation noise is included in the conditional. Default is `False`. given : dict Can optionally take as key value pairs: `X`, `Xu`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC3 for more information. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) chol = cholesky(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
def _build_prior(self, name, Xs, **kwargs): self.N = np.prod([len(X) for X in Xs]) mu = self.mean_func(cartesian(*Xs)) chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)] # remove reparameterization option v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=self.N, **kwargs) f = pm.Deterministic(name, mu + tt.flatten(kron_dot(chols, v))) return f
def _build_conditional(self, Xnew, X, f, cov_total, mean_total): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) v = solve_lower(L, f - mean_total(X)) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) Kss = self.cov_func(Xnew) cov = Kss - tt.dot(tt.transpose(A), A) return mu, cov
def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) cov = stabilize(self.cov_func(X)) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=shape, **kwargs) f = pm.Deterministic(name, mu + cholesky(cov).dot(v)) else: f = pm.MvNormal(name, mu=mu, cov=cov, shape=shape, **kwargs) return f
def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) chol = cholesky(stabilize(self.cov_func(X))) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) f = pm.Deterministic(name, mu + tt.dot(chol, v)) else: f = pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) return f
def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) chol = cholesky(stabilize(self.cov_func(X))) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: chi2 = pm.ChiSquared("chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + tt.dot(chol, v))) else: f = pm.MvStudentT(name, nu=self.nu, mu=mu, chol=chol, shape=shape, **kwargs) return f
def _build_conditional(self, Xnew, X, f): Kxx = self.cov_func(X) Kxs = self.cov_func(X, Xnew) Kss = self.cov_func(Xnew) L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) cov = Kss - tt.dot(tt.transpose(A), A) v = solve_lower(L, f - self.mean_func(X)) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) beta = tt.dot(v, v) nu2 = self.nu + X.shape[0] covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT
def _build_conditional(self, Xnew, X, f): Kxx = self.cov_func(X) Kxs = self.cov_func(X, Xnew) Kss = self.cov_func(Xnew) L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) cov = Kss - tt.dot(tt.transpose(A), A) v = solve_lower(L, f - self.mean_func(X)) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) beta = tt.dot(v, v) nu2 = self.nu + X.shape[0] covT = (self.nu + beta - 2)/(nu2 - 2) * cov return nu2, mu, covT
def _build_prior(self, name, Xs, **kwargs): self.N = np.prod([len(X) for X in Xs]) mu = self.mean_func(cartesian(*Xs)) chols = [ cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs) ] # remove reparameterization option v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=self.N, **kwargs) f = pm.Deterministic(name, mu + tt.flatten(kron_dot(chols, v))) return f
def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): R""" Returns the marginal likelihood distribution, given the input locations `X` and the data `y`. This is integral over the product of the GP prior and a normal likelihood. .. math:: y \mid X,\theta \sim \int p(y \mid f,\, X,\, \theta) \, p(f \mid X,\, \theta) \, df Parameters ---------- name : string Name of the random variable X : array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. y : array-like Data that is the sum of the function with the GP prior and Gaussian noise. Must have shape `(n, )`. noise : scalar, Variable, or Covariance Standard deviation of the Gaussian noise. Can also be a Covariance for non-white noise. is_observed : bool Whether to set `y` as an `observed` variable in the `model`. Default is `True`. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ if not isinstance(noise, Covariance): noise = pm.gp.cov.WhiteNoise(noise) mu, cov = self._build_marginal_likelihood(X, noise) chol = cholesky(stabilize(cov)) self.X = X self.y = y self.noise = noise if is_observed: return pm.MvNormal(name, mu=mu, chol=chol, observed=y, **kwargs) else: shape = infer_shape(X, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
def _build_conditional(self, Xnew): Xs, f = self.Xs, self.f X = cartesian(*Xs) delta = f - self.mean_func(X) covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)] chols = [cholesky(cov) for cov in covs] cholTs = [tt.transpose(chol) for chol in chols] Kss = self.cov_func(Xnew) Kxs = self.cov_func(X, Xnew) Ksx = tt.transpose(Kxs) alpha = kron_solve_lower(chols, delta) alpha = kron_solve_upper(cholTs, alpha) mu = tt.dot(Ksx, alpha).ravel() + self.mean_func(Xnew) A = kron_solve_lower(chols, Kxs) cov = stabilize(Kss - tt.dot(tt.transpose(A), A)) return mu, cov
def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) Knx = noise(X) rxx = y - mean_total(X) L = cholesky(stabilize(Kxx) + Knx) A = solve_lower(L, Kxs) v = solve_lower(L, rxx) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - tt.sum(tt.square(A), 0) if pred_noise: var += noise(Xnew, diag=True) return mu, var else: Kss = self.cov_func(Xnew) cov = Kss - tt.dot(tt.transpose(A), A) if pred_noise: cov += noise(Xnew) return mu, cov if pred_noise else stabilize(cov)
def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) Knx = noise(X) rxx = y - mean_total(X) L = cholesky(stabilize(Kxx) + Knx) A = solve_lower(L, Kxs) v = solve_lower(L, rxx) mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - tt.sum(tt.square(A), 0) if pred_noise: var += noise(Xnew, diag=True) return mu, var else: Kss = self.cov_func(Xnew) cov = Kss - tt.dot(tt.transpose(A), A) if pred_noise: cov += noise(Xnew) return mu, stabilize(cov)
def conditional(self, name, Xnew, pred_noise=False, **kwargs): """ Returns the conditional distribution evaluated over new input locations `Xnew`, just as in `Marginal`. `Xnew` will be split by columns and fed to the relevant covariance functions based on their `input_dim`. For example, if `cov_func1`, `cov_func2`, and `cov_func3` have `input_dim` of 2, 1, and 4, respectively, then `Xnew` must have 7 columns and a covariance between the prediction points .. code:: python cov_func(Xnew) = cov_func1(Xnew[:, :2]) * cov_func1(Xnew[:, 2:3]) * cov_func1(Xnew[:, 3:]) This `cov_func` does not have a Kronecker structure without a full grid, but the conditional distribution does not have a Kronecker structure regardless. Thus, the conditional method must fall back to using `MvNormal` rather than `KroneckerNormal` in either case. Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. pred_noise : bool Whether or not observation noise is included in the conditional. Default is `False`. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ mu, cov = self._build_conditional(Xnew, pred_noise, False) chol = cholesky(stabilize(cov)) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the GP prior was over, the conditional distribution over a set of new points, `f_*` is: .. math:: f_* \mid f, X, X_* \sim \mathcal{GP}\left( K(X_*, X) [K(X, X) + K_{n}(X, X)]^{-1} f \,, K(X_*, X_*) - K(X_*, X) [K(X, X) + K_{n}(X, X)]^{-1} K(X, X_*) \right) Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. pred_noise : bool Whether or not observation noise is included in the conditional. Default is `False`. given : dict Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC3 for more information. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) chol = cholesky(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
def conditional(self, name, Xnew, pred_noise=False, given={}, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. Given a set of function values `f` that the GP prior was over, the conditional distribution over a set of new points, `f_*` is: .. math:: f_* \mid f, X, X_* \sim \mathcal{GP}\left( K(X_*, X) [K(X, X) + K_{n}(X, X)]^{-1} f \,, K(X_*, X_*) - K(X_*, X) [K(X, X) + K_{n}(X, X)]^{-1} K(X, X_*) \right) Parameters ---------- name : string Name of the random variable Xnew : array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. pred_noise : bool Whether or not observation noise is included in the conditional. Default is `False`. given : dict Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC3 for more information. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) chol = cholesky(cov) shape = infer_shape(Xnew, kwargs.pop("shape", None)) return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)