def _CalculateNecessaryMatrices(self, scales, nuggets): K = self.covariance(self.X, self.X, scales) K = K + nuggets * nuggets * np.eye(K.shape[0]) L = np.linalg.cholesky(K) self.alpha = solve_triangular(L.transpose(), solve_triangular(L, self.Y, lower=True)) self.cholesky = L self.cov_matrix = K
def solve_via_cholesky(k_chol, y): """Solves a positive definite linear system via a Cholesky decomposition. Args: k_chol: The Cholesky factor of the matrix to solve. A lower triangular matrix, perhaps more commonly known as L. y: The vector to solve. """ # Solve Ls = y s = spl.solve_triangular(k_chol, y, lower=True) # Solve Lt b = s b = spl.solve_triangular(k_chol.T, s) return b
def _testfunc_mahal(a, b, use_my): if use_my: l = cholesky_factorization(a) else: l = anp.linalg.cholesky(a) x = aspl.solve_triangular(l, b) return anp.sum(anp.square(x))
def Predict(self, X): kstar = self.covariance(X, self.X, self.scales) predictive_mean = np.matmul(np.transpose(kstar), self.alpha) v = solve_triangular(self.cholesky, kstar, lower=True) predictive_variance = self.covariance(X, X, self.scales) - np.matmul( np.transpose(v), v) return predictive_mean.reshape( -1, 1), np.diag(predictive_variance).reshape(-1, 1, 1)
def predict(self, coords=None): """Return posterior mean and variance""" S = self.eval_S(self.kappa, self.sigma_f) K_chol = self.eval_K_chol(S, self.sigma_n, self.sigma_f) woodbury_vector = solve_triangular(K_chol, self.Y, lower=True) phi_S_phistar = (self.eigenfunctions[self.X] * S[None, :]) @ \ self.eigenfunctions.T W = solve_triangular(K_chol, phi_S_phistar, lower=True) mean = np.einsum('nt,n->t', W, woodbury_vector) B = solve_triangular(K_chol, phi_S_phistar, lower=True) variance = np.sum((self.eigenfunctions * S[None, :]).T * self.eigenfunctions.T, axis=0) - np.sum(B**2, axis=0) return mean, variance
def cholesky_computations(features, targets, mean, kernel, noise_variance, debug_log=False, test_intermediates=None): """ Given input matrix X (features), target matrix Y (targets), mean and kernel function, compute posterior state {L, P}, where L is the Cholesky factor of k(X, X) + sigsq_final * I and L P = Y - mean(X) Here, sigsq_final >= noise_variance is minimal such that the Cholesky factorization does not fail. :param features: Input matrix X (n,d) :param targets: Target matrix Y (n,m) :param mean: Mean function :param kernel: Kernel function :param noise_variance: Noise variance (may be increased) :param debug_log: Debug output during add_jitter CustomOp? :param test_intermediates: If given, all intermediates are written into this dict :return: L, P """ kernel_mat = kernel(features, features) # Add jitter to noise_variance (if needed) in order to guarantee that # Cholesky factorization works sys_mat = AddJitterOp(flatten_and_concat(kernel_mat, noise_variance), initial_jitter_factor=NOISE_VARIANCE_LOWER_BOUND, debug_log='true' if debug_log else 'false') chol_fact = cholesky_factorization(sys_mat) centered_y = targets - anp.reshape(mean(features), (-1, 1)) pred_mat = aspl.solve_triangular(chol_fact, centered_y, lower=True) # print('chol_fact', chol_fact) if test_intermediates is not None: assert isinstance(test_intermediates, dict) test_intermediates.update({ 'features': features, 'targets': targets, 'noise_variance': noise_variance, 'kernel_mat': kernel_mat, 'sys_mat': sys_mat, 'chol_fact': chol_fact, 'pred_mat': pred_mat, 'centered_y': centered_y }) test_intermediates.update(kernel.get_params()) test_intermediates.update(mean.get_params()) return chol_fact, pred_mat
def objective(self, params): kern_variance, kern_lengthscale, noise_variance, Z = self.to_params( params) sigma2 = noise_variance N = self.num_data Saa = self.Su_old ma = self.mu_old # a is old inducing points, b is new # f is training points Kfdiag = self.Kdiag(self.X, kern_variance) (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c, err, Qff) = self._build_objective_terms(Z, kern_variance, kern_lengthscale, noise_variance) LSa = anp.linalg.cholesky(Saa) Lainv_ma = solve_triangular(LSa, ma, lower=True) bound = 0 # constant term bound = -0.5 * N * anp.log(2 * anp.pi) # quadratic term bound += -0.5 * anp.sum(anp.square(err)) / sigma2 bound += -0.5 * anp.sum(anp.square(Lainv_ma)) bound += 0.5 * anp.sum(anp.square(LDinv_Lbinv_c)) # # log det term bound += -0.5 * N * anp.sum(anp.log(sigma2)) bound += -anp.sum(anp.log(anp.diag(LD))) # bound += -0.5 * N * anp.sum(anp.log(anp.where(sigma2,sigma2,1.))) # bound += - anp.sum(anp.log(anp.where(anp.diag(LD),anp.diag(LD),1.))) # # delta 1: trace term bound += -0.5 * anp.sum(Kfdiag) / sigma2 bound += 0.5 * anp.sum(anp.diag(Qff)) # # # delta 2: a and b difference bound += anp.sum(anp.log(anp.diag(La))) bound += -anp.sum(anp.log(anp.diag(LSa))) # Kaadiff = Kaa_cur - anp.matmul(anp.transpose(Lbinv_Kba), Lbinv_Kba) Sainv_Kaadiff = anp.linalg.solve(Saa, Kaadiff) Kainv_Kaadiff = anp.linalg.solve(Kaa, Kaadiff) # bound += -0.5 * anp.sum( anp.diag(Sainv_Kaadiff) - anp.diag(Kainv_Kaadiff)) return bound
def chol_inv(W): EYEN = np.eye(W.shape[0]) try: tri_W = np.linalg.cholesky(W) tri_W_inv = splg.solve_triangular(tri_W,EYEN,lower=True) #tri_W,lower = splg.cho_factor(W,lower=True) # W_inv = splg.cho_solve((tri_W,True),EYEN) W_inv = np.matmul(tri_W_inv.T,tri_W_inv) W_inv = (W_inv + W_inv.T)/2 return W_inv except Exception as e: return False
def sample_invwishart(S, nu): n = S.shape[0] chol = np.linalg.cholesky(S) if (nu <= 81 + n) and (nu == np.round(nu)): x = npr.randn(nu, n) else: x = np.diag(np.sqrt(np.atleast_1d(chi2.rvs(nu - np.arange(n))))) x[np.triu_indices_from(x, 1)] = npr.randn(n * (n - 1) // 2) R = np.linalg.qr(x, 'r') T = solve_triangular(R.T, chol.T, lower=True).T return np.dot(T, T.T)
def sample_invwishart(S, nu): n = S.shape[0] chol = np.linalg.cholesky(S) if (nu <= 81 + n) and (nu == np.round(nu)): x = npr.randn(nu, n) else: x = np.diag(np.sqrt(np.atleast_1d(chi2.rvs(nu - np.arange(n))))) x[np.triu_indices_from(x, 1)] = npr.randn(n*(n-1)//2) R = np.linalg.qr(x, 'r') T = solve_triangular(R.T, chol.T, lower=True).T return np.dot(T, T.T)
def fit(self, kernel, y, jitter): """ len(y) == len(reg)""" reg = jitter if self.memory_eff is True: kernel[np.diag_indices_from(kernel)] += reg kernel, lower = cho_factor(kernel, lower=False, overwrite_a=True, check_finite=False) L = kernel alpha = cho_solve((L, lower), y, overwrite_b=False).reshape( (1, -1)) elif self.memory_eff == 'autograd': alpha = np.linalg.solve(kernel + np.diag(reg), y).reshape((1, -1)) else: L = np.linalg.cholesky(kernel + np.diag(reg)) z = solve_triangular(L, y, lower=True) alpha = solve_triangular(L.T, z, lower=False, overwrite_b=True).reshape((1, -1)) return alpha
def _neg_log_likelihood_alt(self, sigma_f, kappa, sigma_n): if self.Y is None: return 0.0 S = self.eval_S(kappa, sigma_f) K_chol = self.eval_K_chol(S, sigma_n, sigma_f) K_chol_inv_Y = solve_triangular(K_chol, self.Y, lower=True) data_fit = 0.5 * np.sum(K_chol_inv_Y * K_chol_inv_Y) penalty = np.sum(np.log(np.diag(K_chol))) objective = data_fit + penalty + 0.5 * len(self.Y) * np.log(2*np.pi) return objective
def posterior_samples(self, nsamples): """ Compute `nsamples` posterior samples with the Materon rule """ prior = self.prior_samples(nsamples) S = self.eval_S(self.kappa, self.sigma_f) K_chol = self.eval_K_chol(S, self.sigma_n, self.sigma_f) residue = (self.Y - prior[..., self.X]).T # shape (n, s) K_chol_inv_R = solve_triangular(K_chol, residue, lower=True) # shape (n, s) phi_S_phistar = (self.eigenfunctions[self.X] * S[None, :]) @ \ self.eigenfunctions.T # shape (n, t) K_chol_inv_phi_S_phistar = solve_triangular(K_chol, phi_S_phistar, lower=True) # shape (n, t) update_term = np.einsum('nt,ns->st', K_chol_inv_phi_S_phistar, K_chol_inv_R) return prior + update_term # shape (s, t)
def sample_posterior_joint(features, mean, kernel, chol_fact, pred_mat, test_features, num_samples=1): """ Draws num_sample samples from joint posterior distribution over inputs test_features. This is done by computing mean and covariance matrix of this posterior, and using the Cholesky decomposition of the latter. If pred_mat is a matrix with m columns, the samples returned have shape (n_test, m, num_samples). :param features: Training inputs :param mean: Mean function :param kernel: Kernel function :param chol_fact: Part L of posterior state :param pred_mat: Part P of posterior state :param test_features: Test inputs :param num_samples: Number of samples to draw :return: Samples, shape (n_test, num_samples) or (n_test, m, num_samples) """ k_tr_te = kernel(features, test_features) linv_k_tr_te = aspl.solve_triangular(chol_fact, k_tr_te, lower=True) posterior_mean = anp.matmul(anp.transpose(linv_k_tr_te), pred_mat) + \ anp.reshape(mean(test_features), (-1, 1)) posterior_cov = kernel(test_features, test_features) - anp.dot( anp.transpose(linv_k_tr_te), linv_k_tr_te) jitter_init = anp.ones((1, )) * (1e-5) sys_mat = AddJitterOp(flatten_and_concat(posterior_cov, jitter_init), initial_jitter_factor=NOISE_VARIANCE_LOWER_BOUND) lfact = cholesky_factorization(sys_mat) # Draw samples # posterior_mean.shape = (n_test, m), where m is number of cols of pred_mat # Reshape to (n_test, m, 1) n_test = getval(posterior_mean.shape)[0] posterior_mean = anp.expand_dims(posterior_mean, axis=-1) n01_vecs = [ anp.random.normal(size=getval(posterior_mean.shape)) for _ in range(num_samples) ] n01_mat = anp.reshape(anp.concatenate(n01_vecs, axis=-1), (n_test, -1)) samples = anp.reshape(anp.dot(lfact, n01_mat), (n_test, -1, num_samples)) samples = samples + posterior_mean if samples.shape[1] == 1: samples = anp.reshape(samples, (n_test, -1)) # (n_test, num_samples) return samples
def _build_objective_terms(self, Z, kern_variance, kern_lengthscale, noise_variance): Mb = anp.shape(Z)[0] Ma = self.M_old jitter = 1e-3 sigma2 = noise_variance sigma = anp.sqrt(sigma2) Saa = self.Su_old ma = self.mu_old # a is old inducing points, b is new # f is training points # s is test points Kbf = self.K(kern_variance, kern_lengthscale, Z, self.X) Kbb = self.K(kern_variance, kern_lengthscale, Z) + anp.eye(Mb, dtype=float_type) * jitter Kba = self.K(kern_variance, kern_lengthscale, Z, self.Z_old) Kaa_cur = self.K(kern_variance, kern_lengthscale, self.Z_old) + anp.eye(Ma, dtype=float_type) * jitter Kaa = self.Kaa_old + anp.eye(Ma, dtype=float_type) * jitter err = self.Y Sainv_ma = anp.linalg.solve(Saa, ma) Sinv_y = self.Y / sigma2 c1 = anp.matmul(Kbf, Sinv_y) c2 = anp.matmul(Kba, Sainv_ma) c = c1 + c2 Lb = anp.linalg.cholesky(Kbb) Lbinv_c = solve_triangular(Lb, c, lower=True) Lbinv_Kba = solve_triangular(Lb, Kba, lower=True) Lbinv_Kbf = solve_triangular(Lb, Kbf, lower=True) / sigma d1 = anp.matmul(Lbinv_Kbf, anp.transpose(Lbinv_Kbf)) LSa = anp.linalg.cholesky(Saa) Kab_Lbinv = anp.transpose(Lbinv_Kba) LSainv_Kab_Lbinv = solve_triangular( LSa, Kab_Lbinv, lower=True) d2 = anp.matmul(anp.transpose(LSainv_Kab_Lbinv), LSainv_Kab_Lbinv) La = anp.linalg.cholesky(Kaa) Lainv_Kab_Lbinv = solve_triangular( La, Kab_Lbinv, lower=True) d3 = anp.matmul(anp.transpose(Lainv_Kab_Lbinv), Lainv_Kab_Lbinv) D = anp.eye(Mb, dtype=float_type) + d1 + d2 - d3 D = D + anp.eye(Mb, dtype=float_type) * jitter LD = anp.linalg.cholesky(D) LDinv_Lbinv_c = solve_triangular(LD, Lbinv_c, lower=True) return (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c, err, d1)
def compute_iKy(self, params, yknt, xt): k, n, t = yknt.shape Idt = np.eye(self.d * t) It = np.eye(t) kern_params, C, r, mu = self.unpack_params(params) KXX = self.kernel.build_Kxx(xt, xt, kern_params) KXX = KXX + np.eye(KXX.shape[0]) * self.fudge L = np.linalg.cholesky(KXX) iR12 = 1 / (np.sqrt(r)) A = L.T @ (np.kron(C.T @ np.diag(iR12), It)) # NT by NT B = Idt + L.T @ (np.kron(C.T @ np.diag(1 / r) @ C, It)) @ L M = np.linalg.cholesky(B) R_yk_nt = (yknt * iR12[None, :, None]).reshape([k, -1]) Ay = (A @ R_yk_nt.T).T # k by ... x = solve_triangular(M, Ay.T, lower=True) iBARy = solve_triangular(M.T, x, lower=False).T AiBARy = (A.T @ iBARy.T).T Idt_AiBARy = R_yk_nt - AiBARy # k by nt x = (Idt_AiBARy.reshape([k, n, t]) * iR12[None, :, None]).reshape( [k, -1]) return x, M, KXX
def predict_posterior_marginals(features, mean, kernel, chol_fact, pred_mat, test_features, test_intermediates=None): """ Computes posterior means and variances for test_features. If pred_mat is a matrix, so will be posterior_means, but not posterior_variances. Reflects the fact that for GP regression and fixed hyperparameters, the posterior mean depends on the targets y, but the posterior covariance does not. :param features: Training inputs :param mean: Mean function :param kernel: Kernel function :param chol_fact: Part L of posterior state :param pred_mat: Part P of posterior state :param test_features: Test inputs :return: posterior_means, posterior_variances """ k_tr_te = kernel(features, test_features) linv_k_tr_te = aspl.solve_triangular(chol_fact, k_tr_te, lower=True) posterior_means = anp.matmul(anp.transpose(linv_k_tr_te), pred_mat) + \ anp.reshape(mean(test_features), (-1, 1)) posterior_variances = kernel.diagonal(test_features) - anp.sum( anp.square(linv_k_tr_te), axis=0) if test_intermediates is not None: assert isinstance(test_intermediates, dict) test_intermediates.update({ 'k_tr_te': k_tr_te, 'linv_k_tr_te': linv_k_tr_te, 'test_features': test_features, 'pred_means': posterior_means, 'pred_vars': anp.reshape( anp.maximum(posterior_variances, MIN_POSTERIOR_VARIANCE), (-1, )) }) return posterior_means, anp.reshape( anp.maximum(posterior_variances, MIN_POSTERIOR_VARIANCE), (-1, ))
def batch_mahalanobis(L, x): """ Compute the squared Mahalanobis distance. :math:`x^T M^{-1} x` for a factored :math:`M = LL^T`. Copied from PyTorch torch.distributions.multivariate_normal. Parameters ---------- L : array_like (..., D, D) Cholesky factorization(s) of covariance matrix x : array_like (..., D) Points at which to evaluate the quadratic term Returns ------- y : array_like (...,) squared Mahalanobis distance :math:`x^T (LL^T)^{-1} x` x^T (LL^T)^{-1} x = x^T L^{-T} L^{-1} x """ # The most common shapes are x: (T, D) and L : (D, D) # Special case that one if x.ndim == 2 and L.ndim == 2: xs = solve_triangular(L, x.T, lower=True) return np.sum(xs**2, axis=0) # Flatten the Cholesky into a (-1, D, D) array flat_L = flatten_to_dim(L, 2) # Invert each of the K arrays and reshape like L L_inv = np.reshape(np.array([np.linalg.inv(Li.T) for Li in flat_L]), L.shape) # dot with L_inv^T; square and sum. xs = np.einsum('...i,...ij->...j', x, L_inv) return np.sum(xs**2, axis=-1)
def cholesky_factorization_backward(l, lbar): abar = copyltu(anp.matmul(anp.transpose(l), lbar)) abar = anp.transpose(aspl.solve_triangular(l, abar, lower=True, trans='T')) abar = aspl.solve_triangular(l, abar, lower=True, trans='T') return 0.5 * abar
def fun(B): return to_scalar( spla.solve_triangular(A, B, trans=trans, lower=lower))
def solve_triangular(L, x, trans='N'): return spla.solve_triangular(L, x, lower=True, trans=trans)
def fun(B): return to_scalar(spla.solve_triangular(A, B, trans=trans, lower=lower))
def _triangular_solve(a_, b_): return asla.solve_triangular(a_, b_, trans="N", lower=lower_a, check_finite=False)
def _compute_lvec(features, chol_fact, kernel, feature): kvec = anp.reshape(kernel(features, feature), (-1, 1)) return anp.reshape(aspl.solve_triangular(chol_fact, kvec, lower=True), (1, -1))