def log_gaussian_pdf(x, mu=None, Sigma=None, is_cholesky=False, compute_grad=False, cov_scaling=1.): D = len(x) if mu is None: mu = np.zeros(D) assert len(mu) == D if Sigma is not None: assert D == Sigma.shape[0] assert D == Sigma.shape[1] if is_cholesky is False: L = np.linalg.cholesky(Sigma) else: L = Sigma # solve y=K^(-1)x = L^(-T)L^(-1)x x = np.array(x - mu) y = solve_triangular(L, x.T, lower=True) y = solve_triangular(L.T, y, lower=False) / cov_scaling cov_L_diag = np.diag(L) else: # assume isotropic covariance, solve y=K^(-1)x y = np.array(x - mu) / cov_scaling cov_L_diag = np.ones(D) if not compute_grad: log_determinant_part = -np.sum(np.log(np.sqrt(cov_scaling) * cov_L_diag)) quadratic_part = -0.5 * x.dot(y) const_part = -0.5 * D * np.log(2 * np.pi) return const_part + log_determinant_part + quadratic_part else: return -y
def find_mode_newton(self, return_full=False): """ Newton search for mode of p(y|f)p(f) from GP book, algorithm 3.1, added step size """ K = self.gp.K if self.newton_start is None: f = zeros(len(K)) else: f = self.newton_start if return_full: steps = [f] iteration = 0 norm_difference = inf objective_value = -inf while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon: # from GP book, algorithm 3.1, added step size # scale log_lik_grad_vector and K^-1 f = a w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f) w_sqrt = sqrt(w) # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt) b = f * w + self.newton_step * self.gp.likelihood.log_lik_grad_vector(self.gp.y, f) # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b)))) a = w_sqrt * (K.dot(b)) a = solve_triangular(L, a, lower=True) a = solve_triangular(L.T, a, lower=False) a = w_sqrt * a a = b - a f_new = K.dot(self.newton_step * a) # convergence stuff and next iteration objective_value_new = -0.5 * a.T.dot(f) + sum(self.gp.likelihood.log_lik_vector(self.gp.y, f)) norm_difference = norm(f - f_new) if objective_value_new > objective_value: f = f_new if return_full: steps.append(f) else: self.newton_step /= 2 iteration += 1 objective_value = objective_value_new self.computed = True if return_full: return f, L, asarray(steps) else: return f
def cholesky_solve(L, x): """ Solves X^-1 x = (LL^T) ^-1 x = L^-T L ^-1 * x for a given Cholesky X=LL^T """ x = solve_triangular(L, x.T, lower=True) x = solve_triangular(L.T, x, lower=False) return x
def _solve_P_Q(U, V, structure=None): """ A helper function for expm_2009. Parameters ---------- U : ndarray Pade numerator. V : ndarray Pade denominator. structure : str, optional A string describing the structure of both matrices `U` and `V`. Only `upper_triangular` is currently supported. Notes ----- The `structure` argument is inspired by similar args for theano and cvxopt functions. """ P = U + V Q = -U + V if isspmatrix(U): return spsolve(Q, P) elif structure is None: return solve(Q, P) elif structure == UPPER_TRIANGULAR: return solve_triangular(Q, P) else: raise ValueError('unsupported matrix structure: ' + str(structure))
def log_pdf_multiple_points(self, X): assert(len(shape(X)) == 2) assert(shape(X)[1] == self.dimension) log_determinant_part = -sum(log(diag(self.L))) quadratic_parts = zeros(len(X)) for i in range(len(X)): x = X[i] - self.mu # solve y=K^(-1)x = L^(-T)L^(-1)x y = solve_triangular(self.L, x.T, lower=True) y = solve_triangular(self.L.T, y, lower=False) quadratic_parts[i] = -0.5 * x.dot(y) const_part = -0.5 * len(self.L) * log(2 * pi) return const_part + log_determinant_part + quadratic_parts
def get_gaussian(self, f=None, L=None): if f is None or L is None: f, L, _ = self.find_mode_newton(return_full=True) w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f) w_sqrt = sqrt(w) K = self.gp.K # gp book 3.27, matrix inversion lemma on # (K^-1 +W)^-1 = K -KW^0.5 B^-1 W^0.5 K C = (K.T * w_sqrt).T C = solve_triangular(L, C, lower=True) C = solve_triangular(L.T, C, lower=False) C = (C.T * w_sqrt).T C = K.dot(C) C = K - C return Gaussian(f, C, is_cholesky=False)
def log_pdf(self, X): assert (len(shape(X)) == 2) assert (shape(X)[1] == self.dimension) log_determinant_part = -sum(log(diag(self.L))) quadratic_parts = zeros(len(X)) for i in range(len(X)): x = X[i] - self.mu # solve y=K^(-1)x = L^(-T)L^(-1)x y = solve_triangular(self.L, x.T, lower=True) y = solve_triangular(self.L.T, y, lower=False) quadratic_parts[i] = -0.5 * x.dot(y) const_part = -0.5 * len(self.L) * log(2 * pi) return const_part + log_determinant_part + quadratic_parts
def log_gaussian_pdf_multiple(X, mu=None, Sigma=None, is_cholesky=False, compute_grad=False, cov_scaling=1.): D = X.shape[1] if mu is None: mu = np.zeros(D) assert len(mu) == D if Sigma is not None: assert D == Sigma.shape[0] assert D == Sigma.shape[1] if is_cholesky is False: L = np.linalg.cholesky(Sigma) else: L = Sigma # solve Y=K^(-1)(X-mu) = L^(-T)L^(-1)(X-mu) X = np.array(X - mu) Y = solve_triangular(L, X.T, lower=True) Y = solve_triangular(L.T, Y, lower=False) / cov_scaling Y = Y.T cov_L_diag = np.diag(L) else: # assume isotropic covariance, solve y=K^(-1)x Y = np.array(X - mu) / cov_scaling cov_L_diag = np.ones(D) if not compute_grad: log_determinant_part = -np.sum( np.log(np.sqrt(cov_scaling) * cov_L_diag)) quadratic_part = -0.5 * np.sum(X * Y, axis=1) const_part = -0.5 * D * np.log(2 * np.pi) return const_part + log_determinant_part + quadratic_part else: return -Y
def log_gaussian_pdf(x, mu=None, Sigma=None, is_cholesky=False, compute_grad=False, cov_scaling=1.): D = len(x) if mu is None: mu = np.zeros(D) assert len(mu) == D if Sigma is not None: assert D == Sigma.shape[0] assert D == Sigma.shape[1] if is_cholesky is False: L = np.linalg.cholesky(Sigma) else: L = Sigma # solve y=K^(-1)x = L^(-T)L^(-1)x x = np.array(x - mu) y = solve_triangular(L, x.T, lower=True) y = solve_triangular(L.T, y, lower=False) / cov_scaling cov_L_diag = np.diag(L) else: # assume isotropic covariance, solve y=K^(-1)x y = np.array(x - mu) / cov_scaling cov_L_diag = np.ones(D) if not compute_grad: log_determinant_part = -np.sum( np.log(np.sqrt(cov_scaling) * cov_L_diag)) quadratic_part = -0.5 * x.dot(y) const_part = -0.5 * D * np.log(2 * np.pi) return const_part + log_determinant_part + quadratic_part else: return -y
def log_gaussian_pdf_multiple(X, mu=None, Sigma=None, is_cholesky=False, compute_grad=False, cov_scaling=1.): D = X.shape[1] if mu is None: mu = np.zeros(D) assert len(mu) == D if Sigma is not None: assert D == Sigma.shape[0] assert D == Sigma.shape[1] if is_cholesky is False: L = np.linalg.cholesky(Sigma) else: L = Sigma # solve Y=K^(-1)(X-mu) = L^(-T)L^(-1)(X-mu) X = np.array(X - mu) Y = solve_triangular(L, X.T, lower=True) Y = solve_triangular(L.T, Y, lower=False) / cov_scaling Y = Y.T cov_L_diag = np.diag(L) else: # assume isotropic covariance, solve y=K^(-1)x Y = np.array(X - mu) / cov_scaling cov_L_diag = np.ones(D) if not compute_grad: log_determinant_part = -np.sum(np.log(np.sqrt(cov_scaling) * cov_L_diag)) quadratic_part = -0.5 * np.sum(X * Y, axis=1) const_part = -0.5 * D * np.log(2 * np.pi) return const_part + log_determinant_part + quadratic_part else: return -Y
def predict(self, X_test, f_mode=None): """ Predictions for GP with Laplace approximation. from GP book, algorithm 3.2, """ if f_mode is None: f_mode = self.find_mode_newton() predictions = zeros(len(X_test)) K = self.gp.K K_train_test = self.gp.covariance.compute(self.gp.X, X_test) w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode) w_sqrt = sqrt(w) # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt) # iterator for all testing points for i in range(len(X_test)): k = K_train_test[:, i] k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0] f_mean = k.dot( self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode)) v = solve_triangular(L, w_sqrt * k, lower=True) f_var = k_self - v.T.dot(v) predictions[i] = integrate.quad( lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0] # # integrate over Gaussian using some crude numerical integration # samples=randn(1000)*sqrt(f_var) + f_mean # # log_liks=self.gp.likelihood.log_lik_vector(1.0, samples) # predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks) return predictions
def predict(self, X_test, f_mode=None): """ Predictions for GP with Laplace approximation. from GP book, algorithm 3.2, """ if f_mode is None: f_mode = self.find_mode_newton() predictions = zeros(len(X_test)) K = self.gp.K K_train_test = self.gp.covariance.compute(self.gp.X, X_test) w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode) w_sqrt = sqrt(w) # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt) # iterator for all testing points for i in range(len(X_test)): k = K_train_test[:, i] k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0] f_mean = k.dot(self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode)) v = solve_triangular(L, w_sqrt * k, lower=True) f_var = k_self - v.T.dot(v) predictions[i] = integrate.quad(lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0] # # integrate over Gaussian using some crude numerical integration # samples=randn(1000)*sqrt(f_var) + f_mean # # log_liks=self.gp.likelihood.log_lik_vector(1.0, samples) # predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks) return predictions
experiment_dir_base = str(sys.argv[1]) n = int(str(sys.argv[2])) # loop over parameters here experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep print "running experiments", n, "times at base", experiment_dir # load data data,labels=GPData.get_glass_data() # normalise and whiten dataset data-=mean(data, 0) L=cholesky(cov(data.T)) data=solve_triangular(L, data.T, lower=True).T dim=shape(data)[1] # prior on theta and posterior target estimate theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5) distribution=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) sigma = 23.0 print "using sigma", sigma kernel = GaussianKernel(sigma=sigma) for i in range(n): mcmc_samplers = []
if __name__ == '__main__': # load data data, labels = GPData.get_glass_data() # throw away some data n = 250 seed(1) idx = permutation(len(data)) idx = idx[:n] data = data[idx] labels = labels[idx] # normalise and whiten dataset data -= mean(data, 0) L = cholesky(cov(data.T)) data = solve_triangular(L, data.T, lower=True).T dim = shape(data)[1] # prior on theta and posterior target estimate theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5) target=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) # create sampler burnin = 10000 num_iterations = burnin + 300000 kernel = GaussianKernel(sigma=23.0) sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin) # sampler=AdaptiveMetropolisLearnScale(target) # sampler=StandardMetropolis(target)
def find_mode_newton(self, return_full=False): """ Newton search for mode of p(y|f)p(f) from GP book, algorithm 3.1, added step size """ K = self.gp.K if self.newton_start is None: f = zeros(len(K)) else: f = self.newton_start if return_full: steps = [f] iteration = 0 norm_difference = inf objective_value = -inf while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon: # from GP book, algorithm 3.1, added step size # scale log_lik_grad_vector and K^-1 f = a w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f) w_sqrt = sqrt(w) # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt) b = f * w + self.newton_step * \ self.gp.likelihood.log_lik_grad_vector(self.gp.y, f) # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b)))) a = (w_sqrt * (K.dot(b))) a = solve_triangular(L, a, lower=True) a = solve_triangular(L.T, a, lower=False) a = w_sqrt * a a = b - a f_new = K.dot(self.newton_step * a) # convergence stuff and next iteration objective_value_new = -0.5 * a.T.dot(f) + \ sum(self.gp.likelihood.log_lik_vector(self.gp.y, f)) norm_difference = norm(f - f_new) if objective_value_new > objective_value: f = f_new if return_full: steps.append(f) else: self.newton_step /= 2 iteration += 1 objective_value = objective_value_new self.computed = True if return_full: return f, L, asarray(steps) else: return f
def solve_system(self, A, b): solve_triangular(A, b, unit_diagonal=True, debug=False, overwrite_b=True, lower=False)