def dlogp(inputs, gradients): (g_logp,) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = at.diag(chol_cov) ok = at.all(diag > 0) chol_cov = at.switch(ok, chol_cov, at.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * at.eye(k) - at.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = at.switch(ok, g_cov, -np.nan) g_delta = at.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): sigma2 = at.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) Lamd = at.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": Lamd = at.ones_like(Qffd) * sigma2 trace = (1.0 / (2.0 * sigma2)) * (at.sum(self.cov_func(X, diag=True)) - at.sum(at.sum(A * A, 0))) else: # DTC Lamd = at.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd L_B = cholesky(at.eye(Xu.shape[0]) + at.dot(A_l, at.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd c = solve_lower(L_B, at.dot(A, r_l)) constant = 0.5 * X.shape[0] * at.log(2.0 * np.pi) logdet = 0.5 * at.sum(at.log(Lamd)) + at.sum(at.log(at.diag(L_B))) quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace)
def merge_factors(self, X, Xs=None, diag=False): factor_list = [] for factor in self.factor_list: # make sure diag=True is handled properly if isinstance(factor, Covariance): factor_list.append(factor(X, Xs, diag)) elif isinstance(factor, np.ndarray): if np.ndim(factor) == 2 and diag: factor_list.append(np.diag(factor)) else: factor_list.append(factor) elif isinstance( factor, ( TensorConstant, TensorVariable, TensorSharedVariable, ), ): if factor.ndim == 2 and diag: factor_list.append(at.diag(factor)) else: factor_list.append(factor) else: factor_list.append(factor) return factor_list
def check_jacobian_det( transform, domain, constructor=at.dscalar, test=0, make_comparable=None, elemwise=False, rv_var=None, ): y = constructor("y") y.tag.test_value = test if rv_var is None: rv_var = y rv_inputs = rv_var.owner.inputs if rv_var.owner else [] x = transform.backward(y, *rv_inputs) if make_comparable: x = make_comparable(x) if not elemwise: jac = at.log(at.nlinalg.det(jacobian(x, [y]))) else: jac = at.log(at.abs_(at.diag(jacobian(x, [y])))) # ljd = log jacobian det actual_ljd = aesara.function([y], jac) computed_ljd = aesara.function( [y], at.as_tensor_variable(transform.log_jac_det(y, *rv_inputs)), on_unused_input="ignore" ) for yval in domain.vals: close_to(actual_ljd(yval), computed_ljd(yval), tol)
def make_model(cls): with pm.Model() as model: sd_mu = np.array([1, 2, 3, 4, 5]) sd_dist = pm.LogNormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, size=5) chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) cov = at.dot(chol, chol.T) stds = at.sqrt(at.diag(cov)) pm.Deterministic("log_stds", at.log(stds)) corr = cov / stds[None, :] / stds[:, None] corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2 pm.Deterministic("corr_entries_unit", corr_entries_unit) return model
def predict_mean_i(i, x_star, s_star, X, beta, h): n, D = shape(X) # rescale every dimension by the corresponding inverse lengthscale iL = at.diag(h[i, :D]) inp = (X - x_star).dot(iL) # compute the mean B = iL.dot(s_star).dot(iL) t = inp.dot(B) lb = (inp * t).sum() + beta.sum() Mi = at_sum(lb) * h[i, D] return Mi
def L_op(self, inputs, outputs, gradients): # Modified from aesara/tensor/slinalg.py # No handling for on_error = 'nan' dz = gradients[0] chol_x = outputs[0] # this is for nan mode # # ok = ~tensor.any(tensor.isnan(chol_x)) # chol_x = tensor.switch(ok, chol_x, 1) # dz = tensor.switch(ok, dz, 1) # deal with upper triangular by converting to lower triangular if not self.lower: chol_x = chol_x.T dz = dz.T def tril_and_halve_diagonal(mtx): """Extracts lower triangle of square matrix and halves diagonal.""" return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0) def conjugate_solve_triangular(outer, inner): """Computes L^{-T} P L^{-1} for lower-triangular L.""" return gpu_solve_upper_triangular( outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T ) s = conjugate_solve_triangular( chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)) ) if self.lower: grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s)) else: grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s)) return [grad]
def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): super().__init__(input_dim, active_dims) if len(self.active_dims) != 1: raise ValueError("Coregion requires exactly one dimension to be active") make_B = W is not None or kappa is not None if make_B and B is not None: raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion") if make_B: self.W = at.as_tensor_variable(W) self.kappa = at.as_tensor_variable(kappa) self.B = at.dot(self.W, self.W.T) + at.diag(self.kappa) elif B is not None: self.B = at.as_tensor_variable(B) else: raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion")
def cov(self): var = rho2sigma(self.rho)**2 if self.batched: return batched_diag(var) else: return at.diag(var)
def std(self): return at.sqrt(at.diag(self.cov))
def std(self): if self.batched: return at.sqrt(batched_diag(self.cov)) else: return at.sqrt(at.diag(self.cov))
def MvNormalLogp(): """Compute the log pdf of a multivariate normal distribution. This should be used in MvNormal.logp once Theano#5908 is released. Parameters ---------- cov: at.matrix The covariance matrix. delta: at.matrix Array of deviations from the mean. """ cov = at.matrix("cov") cov.tag.test_value = floatX(np.eye(3)) delta = at.matrix("delta") delta.tag.test_value = floatX(np.zeros((2, 3))) solve_lower = Solve(A_structure="lower_triangular") solve_upper = Solve(A_structure="upper_triangular") cholesky = Cholesky(lower=True, on_error="nan") n, k = delta.shape n, k = f(n), f(k) chol_cov = cholesky(cov) diag = at.diag(chol_cov) ok = at.all(diag > 0) chol_cov = at.switch(ok, chol_cov, at.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T result = n * k * at.log(f(2) * np.pi) result += f(2) * n * at.sum(at.log(diag)) result += (delta_trans ** f(2)).sum() result = f(-0.5) * result logp = at.switch(ok, result, -np.inf) def dlogp(inputs, gradients): (g_logp,) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = at.diag(chol_cov) ok = at.all(diag > 0) chol_cov = at.switch(ok, chol_cov, at.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * at.eye(k) - at.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = at.switch(ok, g_cov, -np.nan) g_delta = at.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp] return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
def diag(self, X): X, _ = self._slice(X, None) index = at.cast(X, "int32") return at.diag(self.B)[index.ravel()]
def full(self, X, Xs=None): if Xs is None: return at.diag(self.diag(X)) else: return at.alloc(0.0, X.shape[0], Xs.shape[0])
def tril_and_halve_diagonal(mtx): """Extracts lower triangle of square matrix and halves diagonal.""" return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)
def cov(self): var = rho2sigma(self.rho) ** 2 return at.diag(var)