def predict(params, x, y, xstar): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_scale = unpack_kernel_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x, xstar) cov_y_y = cov_func(cov_params, x, x) + noise_scale * np.eye(len(y)) pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y - mean) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f) return pred_mean, pred_cov
def conditional(x, y, xstar): #Assume zero mean prior """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise). -assumed prior mean is zero mean u is the observed""" cov_f_f = RBF(xstar, xstar) cov_y_f = RBF(x, xstar) cov_y_y = RBF(x, x) + (noise_scale+tol) * np.eye(len(y)) pred_mean = np.dot(solve(cov_y_y, cov_y_f).T, y) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)+tol*np.eye(len(xstar)) return pred_mean, pred_cov
def predict(params, x, y, xstar): """ Returns the predictive mean f(xstar) and covariance f(xstar)""" mean, cov_params, noise_scale = unpack_kernel_params(params) K_ff = covariance(cov_params, xstar, xstar) K_yf = covariance(cov_params, x, xstar) K_yy = covariance(cov_params, x, x) + noise_scale * np.eye(len(y)) pred_mean = mean + np.dot(solve(K_yy, K_yf).T, y - mean) pred_cov = K_ff - np.dot(solve(K_yy, K_yf).T, K_yf) return pred_mean, pred_cov
def mvnlogpdf(x, mu, L): """ not really logpdf. we need to use the weights to keep track of normalizing factors that differ across clusters L cholesky decomposition of covariance matrix """ D = L.shape[0] logdet = 2 * np.sum(np.log(np.diagonal(L))) quad = np.inner(x - mu, solve(L.T, solve(L, (x - mu)))) return -0.5 *(D * np.log(2 * np.pi) + logdet + quad)
def cache(self): assert hasattr(self, "inputs") assert hasattr(self, "targets") x = np.atleast_2d(self.inputs) y = np.atleast_2d(self.targets) assert len(x) == len(y) n, D = x.shape n, E = y.shape self.K = self.kernel(self.hyp, x) self.iK = np.stack([solve(self.K[i], np.eye(n)) for i in range(E)]) self.alpha = np.vstack([solve(self.K[i], y[:, i]) for i in range(E)]).T
def predict(params, xstar, with_noise = False, FITC = False): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_scale, x0, y0 = unpack_gp_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x0, xstar) cov_y_y = cov_func(cov_params, x0, x0) + noise_scale * np.eye(len(y0)) pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y0 - mean) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f) if FITC: pred_cov = np.diag(np.diag(pred_cov)) if with_noise: pred_cov = pred_cov + noise_scale*np.eye(len(xstar)) return pred_mean, pred_cov
def predict(params, xstar, with_noise=False, FITC=False): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_scale, x0, y0 = unpack_gp_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x0, xstar) cov_y_y = cov_func(cov_params, x0, x0) + noise_scale * np.eye(len(y0)) pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y0 - mean) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f) if FITC: pred_cov = np.diag(np.diag(pred_cov)) if with_noise: pred_cov = pred_cov + noise_scale * np.eye(len(xstar)) return pred_mean, pred_cov
def gp2(self, m, s): assert hasattr(self, "hyp") self.cache() x = np.atleast_2d(self.inputs) y = np.atleast_2d(self.targets) n, D = x.shape n, E = y.shape X = self.hyp beta = self.alpha m = np.atleast_2d(m) inp = x - m # Compute the predicted mean and IO covariance. iL = np.stack([np.diag(exp(-X[i, :D])) for i in range(E)]) iN = np.matmul(inp, iL) B = iL @ s @ iL + np.eye(D) t = np.stack([solve(B[i].T, iN[i].T).T for i in range(E)]) q = exp(-np.sum(iN * t, 2) / 2) qb = q * beta.T tiL = np.matmul(t, iL) c = exp(2 * X[:, D]) / sqrt(det(B)) M = np.sum(qb, 1) * c V = (np.transpose(tiL, [0, 2, 1]) @ np.expand_dims(qb, 2)).reshape( E, D).T * c k = 2 * X[:, D].reshape(E, 1) - np.sum(iN**2, 2) / 2 # Compute the predicted covariance. inp = np.expand_dims(inp, 0) / np.expand_dims(exp(2 * X[:, :D]), 1) ii = np.repeat(inp[:, newaxis, :, :], E, 1) ij = np.repeat(inp[newaxis, :, :, :], E, 0) iL = np.stack([np.diag(exp(-2 * X[i, :D])) for i in range(E)]) siL = np.expand_dims(iL, 0) + np.expand_dims(iL, 1) R = np.matmul(s, siL) + np.eye(D) t = 1 / sqrt(det(R)) iRs = np.stack( [solve(R.reshape(-1, D, D)[i], s) for i in range(E * E)]) iRs = iRs.reshape(E, E, D, D) Q = exp(k[:, newaxis, :, newaxis] + k[newaxis, :, newaxis, :] + maha(ii, -ij, iRs / 2)) S = t * np.einsum('ji,iljk,kl->il', beta, Q, beta) + 1e-6 * np.eye(E) S = S - np.matmul(M[:, newaxis], M[newaxis, :]) return M, S, V
def gain(P, A, B, Q): n, m = B.shape AB = np.hstack([A, B]) H = np.dot(AB.T, np.dot(P, AB)) + Q Hux = H[n:n + m, 0:n] Huu = H[n:n + m, n:n + m] K = -la.solve(Huu, Hux) return K
def ricc(P, A, B, Q): n, m = B.shape AB = np.hstack([A, B]) H = np.dot(AB.T, np.dot(P, AB)) + Q Hxx = H[0:n, 0:n] Hxu = H[0:n, n:n + m] Hux = H[n:n + m, 0:n] Huu = H[n:n + m, n:n + m] return Hxx - np.dot(Hxu, la.solve(Huu, Hux))
def calc_step_direction(self, x, obj, state_aux): method = self.setting.step_method if method == 'gradient': return -obj.gradient(x) elif method == 'newton': H = obj.hessian(x) B = posdefify(H, self.setting.pos_hess_eps) return -la.solve(B, obj.gradient(x)) else: raise ValueError('Invalid step method!')
def loss_sat(self, m, s): D = len(m) W = self.W if hasattr(self, 'W') else np.eye(D) z = self.z if hasattr(self, 'z') else np.zeros(D) m, z = np.atleast_2d(m), np.atleast_2d(z) sW = np.dot(s, W) ispW = solve((np.eye(D) + sW).T, W.T).T L = -exp(-(m - z) @ ispW @ (m - z).T / 2) / sqrt(det(np.eye(D) + sW)) i2spW = solve((np.eye(D) + 2 * sW).T, W.T).T r2 = exp(-(m - z) @ i2spW @ (m - z).T) / sqrt(det(np.eye(D) + 2 * sW)) S = r2 - L**2 t = np.dot(W, z.T) - ispW @ (np.dot(sW, z.T) + m.T) C = L * t return L + 1, S, C
def predict_full(params, x, y, xstar, weights): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_variance = unpack_kernel_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x, xstar) cov_y_y = cov_func(cov_params, x, x) + \ np.diag(noise_variance / weights) z = solve(cov_y_y, cov_y_f).T pred_mean = mean + np.dot(z, (y - mean)) pred_cov = cov_f_f - np.dot(z, cov_y_f) return pred_mean, pred_cov
def elbo(y, phi, lam, pi, psi, sigma2s, mus, Sigmas, kernel_params): """ phi [N, K] sample membership (cell line cluster) lam [G, L] feature membership (expression cluster) pi [K] sample mixture weight psi [L] feature mixture weights y[N, G, T] data mus [K, L, T] means """ """ conditional = np.array([list(map( lambda f, s: norm.logpdf(y, f, s).sum(axis=-1), Q[:, :-1], Q[:, -1])) for Q in np.concatenate([mus, sigma2s[:, :, np.newaxis]], 2)]) conditional = conditional + np.log(mix)[:, :, np.newaxis, np.newaxis] assignments = np.einsum('nk, gl->klng', phi, lam) likelihood = np.sum(conditional * assignments) """ likelihood = 0 # data likelihood for l in range(L): for k in range(K): ll = np.sum(np.nan_to_num(norm.logpdf( y, mus[k, l], np.sqrt(sigma2s[k, l]))), axis=-1) ll = ll - 0.5 * (np.trace(Sigmas[k, l] / sigma2s[k, l])) ll = ll * phi[:, k][:, np.newaxis] ll = ll * lam[:, l] likelihood = likelihood + np.sum(ll) # assignment likelihood likelihood = likelihood + np.sum(np.log(pi) * phi) likelihood = likelihood + np.sum(np.log(psi) * lam) # function liklihood for k in range(K): for l in range(L): Ker = cov_func(kernel_params[k, l], inputs, inputs) likelihood = likelihood \ + mvn.logpdf(mus[k, l], np.zeros(T), Ker) \ - 0.5 * np.trace(solve(Ker, Sigmas[k, l])) entropy = np.sum(list(map(multinomial_entropy, phi)) + list(map(multinomial_entropy, lam))) for k in range(K): for l in range(L): entropy = entropy + mvn.entropy(mus[k, l], Sigmas[k, l]) return likelihood + entropy
def log_gp_prior(y_bnn, x): """ computes: the expectation value of the log of the gp prior : E [ log p_gp(f) ] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f) = -0.5 * E [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition) (we ignore constants for now as we are not optimizing the covariance hyper-params) bnn_weights | dim = [N_weights_samples, N_weights] K = covariance/Kernel matrix | dim = [N_data, N_data] ; dim L = dim K y_bnn output of a bnn | dim = [N_data, N_weights_samples] returns : E[log p_gp(y)] | dim = [N_function_samples] """ K = covariance(x, x)+noise_var*np.eye(len(x)) # shape [N_data, N_data] L = cholesky(K) # K = LL^T ; shape L = shape K a = solve(L, y_bnn) # a = L^-1 y_bnn ; shape L^-1 y_bnn = log_gp = -0.5*np.mean(a**2, axis=0) # Compute E [a^2] return log_gp
def log_gp_prior(f_bnn, x, t): """ computes: the expectation value of the log of the gp prior : E_{X~p(X)} [log p_gp(f)] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f) = -0.5 * E_{X~p(X)} [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition) (we ignore constants for now as we are not optimizing the covariance hyperparams) bnn_weights | dim = [N_weights_samples, N_weights] K = covariance/Kernel matrix | dim = [N_data, N_data] ; dim L = dim K f_bnn output of a bnn | dim = [N_data, N_weights_samples] returns : E[log p_gp(f)] | dim = [N_function_samples] """ s = 1e-6 * np.eye(len(x)) K = covariance(x, x) + s # shape [N_data, N_data] L = cholesky(K) + s # shape K = LL^T a = solve(L, f_bnn) # shape = shape f_bnn (L^-1 f_bnn) log_gp = -0.5 * np.mean(a**2, axis=0) # Compute E_{X~p(X)} return log_gp
def log_pdf(self, hyp): x = np.atleast_2d(self.inputs) y = np.atleast_2d(self.targets) n, D = x.shape n, E = y.shape hyp = hyp.reshape(E, -1) K = self.kernel(hyp, x) # [E, n, n] L = cholesky(K) alpha = np.hstack([solve(K[i], y[:, i]) for i in range(E)]) y = y.flatten(order='F') logp = 0.5 * n * E * log(2 * np.pi) + 0.5 * np.dot(y, alpha) + np.sum( [log(np.diag(L[i])) for i in range(E)]) return logp
def predict(params, x, y, xstar, weights=None, condense=True, prediction_noise=True): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" n, t = y.shape if weights is None: weights = np.ones(n) if not condense: return predict_full(params, np.tile(x, n), y.flatten(), xstar, np.tile(weights, (x.size, 1)).T.flatten()) mean, cov_params, noise_variance = unpack_kernel_params(params) if n == 0: # no data, return the prior prior_mean = mean * np.ones(xstar.size) prior_covariance = cov_func(cov_params, xstar, xstar) return prior_mean, prior_covariance y_bar = np.dot(weights, y) weights_full = (np.logical_not(np.isnan(y)) * weights[:, np.newaxis]).sum(axis=0) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = weights_full[:, np.newaxis] * cov_func(cov_params, x, xstar) cov_y_y = np.outer(weights_full, weights_full) * \ cov_func(cov_params, x, x) + \ noise_variance * np.diag(weights_full) z = solve(cov_y_y, cov_y_f).T pred_mean = mean + np.dot(z, y_bar - mean).flatten() pred_cov = cov_f_f - np.dot(z, cov_y_f) if prediction_noise: pred_cov = pred_cov + noise_variance * np.eye(xstar.size) return pred_mean, pred_cov
def check_are(K, A, B, Q, verbose=True): n, m = B.shape AB = np.hstack([A, B]) PK = mat(calc_vPK(K, A, B, Q)) H = np.dot(AB.T, np.dot(PK, AB)) + Q Hxx = H[0:n, 0:n] Huu = H[n:n + m, n:n + m] Hux = H[n:n + m, 0:n] LHS = PK RHS = Hxx - np.dot(Hux.T, la.solve(Huu, Hux)) diff = la.norm(LHS - RHS) if verbose: print(' Left-hand side of the ARE: Positive definite = %s' % is_pos_def(LHS)) print(LHS) print('') print('Right-hand side of the ARE: Positive definite = %s' % is_pos_def(RHS)) print(RHS) print('') print('Difference') print(LHS - RHS) print('\n') return diff
def calc_vPK(K, A, B, Q): n, m = B.shape AK = calc_AK(K, A, B) QK = calc_QK(K, Q) vQK = vec(QK) return la.solve(np.eye(n * n) - np.kron(AK.T, AK.T), vQK)
def plot_gp_posterior(x, xtest, y, s=1e-4, samples=10, title="", plot='gp'): N = len(x) print(N) n = len(xtest) K = covariance(x, x) + s * np.eye(N) print(K.shape) L = cholesky(K) # compute the mean at our test points. Lk = solve(L, covariance(x, xtest)) mu = np.dot(Lk.T, solve(L, y)) # compute the variance at our test points. K_ = covariance(xtest, xtest) var = np.diag(K_) - np.sum(Lk**2, axis=0) std = np.sqrt(var) # draw samples from the prior at our test points. L = cholesky(K_ + s * np.eye(n)) f_prior = np.dot(L, np.random.normal(size=(n, samples))) L = cholesky(K_ + s * np.eye(n) - np.dot(Lk.T, Lk)) f_post = mu + np.dot(L, np.random.normal(size=(n, samples))) # --------------------------PLOTTING-------------------------------- # PLOT PRIOR fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) ax.plot(x, y, 'ko', ms=4) # Get critical values for the deciles lvls = 0.1 * np.linspace(1, 9, 9) alphas = 1 - 0.5 * lvls zs = norm.ppf(alphas) pal = pal_col[plot] cols = colors[plot] print(f_prior.shape) print(f_post.shape) # plot samples, mean and deciles mean = np.mean(f_prior, axis=1) std = np.std(f_prior, axis=1) ax.plot(xtest, f_prior, sns.xkcd_rgb[sample_col[plot]], lw=1) ax.plot(xtest, mean, sns.xkcd_rgb[cols[0]], lw=1) print(xtest.shape, mean.shape, std.shape) for z, col in zip(zs, pal): ax.fill_between(xtest.ravel(), mean - z * std, mean + z * std, color=col) plt.tick_params(labelbottom='off') plt.xlim([-8, 8]) plt.legend() plt.savefig(title + "GP prior_draws.pdf", bbox_inches='tight') # PLOT POSTERIOR plt.clf() std = np.sqrt(var) fig = plt.figure() bx = fig.add_subplot(111) bx.plot(x, y, 'ko', ms=4) print(col[0]) # plot samples, mean and deciles bx.plot(xtest, f_post, sns.xkcd_rgb[sample_col[plot]], lw=1) # bx.plot(xtest, mu, sns.xkcd_rgb[cols[0]], lw=1) print(xtest.shape, mu.shape, std.shape) mu = mu.ravel() #for z, col in zip(zs, pal): # bx.fill_between(xtest.ravel(), mu - z * std, mu + z * std, color=col) plt.tick_params(labelbottom='off') plt.xlim([-8, 8]) plt.ylim([-2, 3]) plt.legend() plt.savefig(title + "GP post_draws.pdf", bbox_inches='tight')
def check_forward(L, x, trans, lower): ans1 = solve(T(L) if trans in (1, 'T') else L, x) ans2 = solve_triangular(L, x, lower=lower, trans=trans) assert np.allclose(ans1, ans2)
def calc_step(self, x, trust_radius, obj): tags = [] method = self.setting.step_method if method == 'dogleg': n = x.size g = obj.gradient(x) H = obj.hessian(x) B = posdefify(H, self.setting.pos_hess_eps) # Find the minimizing tau along the dogleg path pU = -(np.dot(g, g) / np.dot(g, np.dot(B, g))) * g pB = -la.solve(B, g) dp = pB - pU if la.norm(pB) <= trust_radius: # Minimum of model lies inside the trust region p = np.copy(pB) else: # Minimum of model lies outside the trust region tau_U = trust_radius / la.norm(pU) if tau_U <= 1: # First dogleg segment intersects trust region boundary p = tau_U * pU else: # Second dogleg segment intersects trust region boundary aa = np.dot(dp, dp) ab = 2 * np.dot(dp, pU) ac = np.dot(pU, pU) - trust_radius**2 alphas = quadratic_formula(aa, ab, ac) alpha = np.max(alphas) p = pU + alpha * dp return p, tags elif method == '2d_subspace': g = obj.gradient(x) H = obj.hessian(x) B = posdefify(H, self.setting.pos_hess_eps) # Project g and B onto the 2D-subspace spanned by (normalized versions of) -g and -B^-1 g s1 = -g s2 = -la.solve(B, g) Sorig = np.vstack([s1, s2]).T S, Rtran = la.qr( Sorig ) # This is necessary for us to use same trust_radius before/after transforming g2 = np.dot(S.T, g) B2 = np.dot(S.T, np.dot(B, S)) # Solve the 2D trust-region subproblem try: R, lower = cho_factor(B2) p2 = -cho_solve((R, lower), g2) p22 = np.dot(p2, p2) if np.dot(p2, p2) <= trust_radius**2: p = np.dot(S, p2) return p, tags except LinAlgError: pass a = B2[0, 0] * trust_radius**2 b = B2[0, 1] * trust_radius**2 c = B2[1, 1] * trust_radius**2 d = g2[0] * trust_radius f = g2[1] * trust_radius coeffs = np.array( [-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d]) t = np.roots(coeffs) # Can handle leading zeros t = np.real(t[np.isreal(t)]) p2 = trust_radius * np.vstack( (2 * t / (1 + t**2), (1 - t**2) / (1 + t**2))) value = 0.5 * np.sum(p2 * np.dot(B2, p2), axis=0) + np.dot(g2, p2) i = np.argmin(value) p2 = p2[:, i] # Project back into the original n-dim space p = np.dot(S, p2) return p, tags elif method == 'cg_steihaug': # Settings max_iters = 100000 # TODO put in settings # Init n = x.size g = obj.gradient(x) B = obj.hessian(x) z = np.zeros(n) r = np.copy(g) d = -np.copy(g) # Choose eps according to Algo 7.1 grad_norm = la.norm(g) eps = min(0.5, grad_norm**0.5) * grad_norm if la.norm(r) < eps: p = np.zeros(n) tags.append('Stopping tolerance reached!') return p, tags j = 0 while j + 1 < max_iters: # Check if 'd' is a direction of non-positive curvature dBd = np.dot(d, np.dot(B, d)) rr = np.dot(r, r) if dBd <= 0: ta = np.dot(d, d) tb = 2 * np.dot(d, z) tc = np.dot(z, z) - trust_radius**2 taus = quadratic_formula(ta, tb, tc) tau = np.max(taus) p = z + tau * d tags.append('Negative curvature encountered!') return p, tags alpha = rr / dBd z_new = z + alpha * d # Check if trust region bound violated if la.norm(z_new) >= trust_radius: ta = np.dot(d, d) tb = 2 * np.dot(d, z) tc = np.dot(z, z) - trust_radius**2 taus = quadratic_formula(ta, tb, tc) tau = np.max(taus) p = z + tau * d tags.append('Trust region boundary reached!') return p, tags z = np.copy(z_new) r = r + alpha * np.dot(B, d) rr_new = np.dot(r, r) if la.norm(r) < eps: p = np.copy(z) tags.append('Stopping tolerance reached!') return p, tags beta = rr_new / rr d = -r + beta * d j += 1 p = np.zeros(n) tags.append( 'ALERT! CG-Steihaug failed to solve trust-region subproblem within max_iters' ) return p, tags else: raise ValueError('Invalid step method!')
def get_gain(A, B, Q, R, S): P = care(A, B, Q, R, S) K = -la.solve(R, B.T.dot(P) + S.T) return K
def log_gp_prior(y_bnn, K): # [nf, nd] [nd, nd] """ computes: log p_gp(f), f ~ p_BNN(f) """ L = cholesky(K) a = solve(L, y_bnn.T) # a = L^-1 y_bnn [nf, nd] return -0.5 * np.mean(a**2, axis=0) # [nf]