def posterior(self, X, full_covar): """Posterior prediction at points X. If not returning full covariance matrix, then mean and stdev are returned. """ if X.ndim == 1: X = X[:, None] if X.shape[1] != self.x.shape[1]: print("[ERROR]: inputs features do not match training data.") return # load precalculated quantities H, y, beta, s2 = self.H, self.y, self.beta, self.sigma**2 L, LQ = self.L, self.LQ L_T, L_H = self.L_T, self.L_H # new quantities A_cross = self.kernel.A_matrix(self.HP, X, self.x) H_pred = self.basis(X) L_A_cross = jst(L, A_cross.T, lower=True) R = H_pred - jnp.dot(L_A_cross.T, L_H) LQ_R = jst(LQ, R.T, lower=True) mean = jnp.dot(H_pred, beta) + jnp.dot(L_A_cross.T, L_T) # posterior mean if full_covar: A_pred = self.kernel.A_matrix(self.HP, X, X) tmp_1 = jnp.dot(L_A_cross.T, L_A_cross) tmp_2 = jnp.dot(LQ_R.T, LQ_R) var = s2 * (A_pred - tmp_1 + tmp_2) # posterior var return self.unscale(mean, stdev=False), self.unscale(var, stdev=True) else: A_pred = 1.0 tmp_1 = jnp.einsum("ij, ji -> i", L_A_cross.T, L_A_cross) tmp_2 = jnp.einsum("ij, ji -> i", LQ_R.T, LQ_R) var = s2 * (A_pred - tmp_1 + tmp_2) # pointwise posterior var return self.unscale(mean, stdev=False), self.unscale(jnp.sqrt(var), stdev=True)
def store_values(self, guess, fixed_nugget): """Calculate and save some important values.""" # save results of optimization # FIXME: save different things based on whether using mucm or not self.HP = self.kernel.HP_untransform(guess[0:self.kernel.dim]) if self.train_nugget == False: self.nugget = fixed_nugget gn = self.kernel.dim else: self.nugget = self.nugget_untransform(guess[-1]) gn = self.kernel.dim + 1 self.A = self.kernel.A_matrix( self.HP, self.x, self.x) + self.nugget**2 * jnp.eye(self.x.shape[0]) y, H = self.y, self.H n, m = self.x.shape[0], self.H.shape[1] L = jnp.linalg.cholesky(self.A) L_y = jst(L, y, lower=True) L_H = jst(L, H, lower=True) Q = jnp.dot(L_H.T, L_H) # Q = H A-1 H LQ = jnp.linalg.cholesky(Q) tmp = jnp.dot(L_H.T, L_y) # H A^-1 y tmp_2 = jst(LQ, tmp, lower=True) B = jnp.dot(tmp_2.T, tmp_2) # B = y.T A^-1 H (H A^-1 H)^-1 H A^-1 y beta = jst(LQ.T, tmp_2, lower=False) # beta = (H A^-1 H)^-1 H A^-1 y if guess.shape[0] > gn: s2 = self.nugget_untransform(guess[self.kernel.dim])**2 else: s2 = (1.0 / (n - m - 0)) * (jnp.dot(L_y.T, L_y) - B) # save important values self.L, self.LQ = L, LQ self.L_H = L_H self.L_T = jst(L, y - jnp.dot(H, beta), lower=True) self.beta = beta self.sigma = np.sqrt(s2) return
def LLH(self, guess, fixed_nugget): """ See Gaussian Processes for Machine Learning, page 29, eq 2.45 K: the covariance matrix between training data A: H K^-1 H.T C: K^-1 H.T A^-1 H K^-1 """ HP = self.kernel.HP_untransform(guess[0:self.kernel.dim]) # FIXME: I have realized the error now... fixed_nugget is never None, I used to work out settings based on guess length # problem now is that I can't do this because guess length doesn't tell me what is nugget and what is s2 if self.train_nugget == False: #print("fixing nugget") nugget = fixed_nugget gn = self.kernel.dim else: #print("optimizing nugget") nugget = self.nugget_untransform(guess[-1]) gn = self.kernel.dim + 1 #print("gn:", gn) #print("guess shape:", guess.shape) y = self.y H = self.H n, m = self.x.shape[0], self.H.shape[1] ## calculate LLH if True: K = self.kernel.A_matrix( HP, self.x, self.x) + nugget**2 * jnp.eye(self.x.shape[0]) L = jnp.linalg.cholesky(K) L_y = jst(L, y, lower=True) # Q = H A^-1 H L_H = jst(L, H, lower=True) Q = jnp.dot(L_H.T, L_H) LQ = jnp.linalg.cholesky(Q) logdetA = 2.0 * jnp.sum(jnp.log(jnp.diag(L))) # log|A| logdetQ = 2.0 * jnp.sum(jnp.log( jnp.diag(LQ))) # log|Q| where Q = H K^-1 H.T # calculate B = y.T A^-1 H (H A^-1 H)^-1 H A^-1 y # beta = (H A^-1 H)^-1 H A^-1 y tmp = jnp.dot(L_H.T, L_y) # H A^-1 y tmp_2 = jst(LQ, tmp, lower=True) B = jnp.dot(tmp_2.T, tmp_2) if guess.shape[0] > gn: #print("non-mucm") s2 = self.nugget_untransform(guess[self.kernel.dim])**2 #llh = 0.5 * ( -jnp.dot(L_y.T, L_y)/s2 + B/s2 - logdetA - logdetQ - (n - m) * jnp.log(2*np.pi) - (n - m) * jnp.log(s2) ) else: #print("mucm") s2 = (1.0 / (n - m - 0)) * (jnp.dot(L_y.T, L_y) - B) #llh = 0.5*(-(n - m)*jnp.log(s2) - logdetA - logdetQ) llh = 0.5 * (-jnp.dot(L_y.T, L_y) / s2 + B / s2 - logdetA - logdetQ - (n - m) * jnp.log(2 * np.pi) - (n - m) * jnp.log(s2)) return -llh
def LLH(self, guess, fixed_nugget): """Return the negative loglikelihood. Arguments: guess -- log(hyperparameter values) for training fixed_nugget -- value for fixed nugget """ # set the hyperparameters if guess.shape[0] > 2: # training on nugget HP = jnp.exp(guess[0:-1]) nugget = jnp.exp(guess[-1]) else: # fixed nugget HP = jnp.exp(guess) nugget = fixed_nugget # spectral density SD = self.spectralDensity(HP[0]) SD = HP[1] * SD # set outputs and ijnputs y = self.y V = jnp.sqrt(SD) * self.V[ self.vertex] # NOTE: absorb SD(eigenvalues) into the eigenvectors # define Q := phi phi^T + D # D is diagonal matrix of observation variances + nugget, but can represent as a vector only for ease # form noise matrix (diagonal, store as vector) D = self.yerr**2 + nugget invD = 1.0 / D # form Z (different from Solin paper) ones = jnp.eye(SD.shape[0]) Z = ones + (V.T).dot( invD[:, None] * V) # invD is diagonal, faster than diag(invD).dot(V) try: # attempt cholesky factorization # ------------------------------ L = jnp.linalg.cholesky(Z) cho_success = True # log |Q| = log |Z| + log |D| # --------------------------- logDetZ = 2.0 * jnp.sum(jnp.log(jnp.diag(L))) log_Q = logDetZ + jnp.sum(jnp.log(D)) # SD absorbed into eigenvectors # y^T Q^-1 y # ---------- tmp = V.T.dot(invD * y) tmp = jst(L, tmp, lower=True) # NOTE: JAX method tmp = tmp.T.dot(tmp) yQy = jnp.dot(y, invD * y) - tmp # LLH = 1/2 log|Q| + 1/2 y^T Q^-1 y + n/2 log(2 pi) # ------------------------------------------------- n_log_2pi = y.shape[0] * jnp.log(2 * jnp.pi) llh = 0.5 * (log_Q + yQy + n_log_2pi) return llh except: return np.nan