def load_gp(fname_base): kernel=None with open(fname_base+".json",'r') as f: my_json = json.load(f) my_X = np.loadtxt(fname_base+"_X.dat") my_y = np.loadtxt(fname_base+"_y.dat") my_alpha = np.loadtxt(fname_base+"_alpha.dat") dict_params = my_json['kernel_params'] eval("kernel = "+my_json['kernel']) gp = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=0) gp.kernel_ = kernel dict_params_eval = {} for name in dict_params: if not('length' in name or 'constant' in name): continue if name =="k2__k2__length_scale": one_space = ' '.join(dict_params[name].split()) dict_params_eval[name] = eval(one_space.replace(' ',',')) else: dict_params_eval[name] = eval(dict_params[name]) gp.kernel_.set_params(dict_params_eval) gp.X_train_ = my_X gp.y_train_ = my_y gp.alpha_ = my_alpha gp._y_train_std = float(my_json['y_train_std']) gp._y_train_mean = float(my_json['y_train_mean']) return gp
def __init__(self, gamma, beta, nugget, kernelName, k_lambda, xTrain, yTrain): """ Create a new GaussianProcess Object gamma: Hyperparameter beta: Hyperparameter k_lambda: Hyperparameter nugget: The noise hyperparameter kernelName: The name of the covariance kernel xTrain: Numpy array containing x training values yTrain: Numpy array containing y training values """ self.xTrain = xTrain self.yTrain = yTrain self.k_lambda = k_lambda self.beta = beta self.gamma = gamma self.nugget = nugget self.kernelName = kernelName # Setup the regressor as if gp.fit had been called # See https://github.com/scikit-learn/scikit-learn/master/sklearn/gaussian_process/gpr.py kernel = self._getKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=0) gp.K = kernel(xTrain) gp.X_train_ = xTrain gp.y_train_ = yTrain gp.L_ = cholesky(gp.K, lower=True) gp.alpha_ = cho_solve((gp.L_, True), yTrain) gp.fit(xTrain, yTrain) gp.kernel_ = kernel self.gp = gp self.kernel = kernel # Calculate the matrix inverses once. Save time later # This is only used for own own implimentation of the scoring engine self.L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0])) self.K_inv = L_inv.dot(L_inv.T)
index_y2 = 7 #only valid until len_x1 print("Radius is "+str(inputs_x_array[:,0][index_y2*len_x2:index_y2*len_x2 + len_x2][0])+"m") plt.figure() plt.scatter(inputs_x_array[:,1][index_y2*len_x2:index_y2*len_x2 + len_x2],y_pred[index_y2*len_x2:index_y2*len_x2 + len_x2]) #from x2_min to x2_max plt.xlabel('Y Label (time)') plt.ylabel('Z Label (density)') plt.show() print(gp.kernel_) #gives optimized hyperparameters print(gp.log_marginal_likelihood(gp.kernel_.theta)) #log-likelihood alpha=1e-10 input_prediction = gp.predict(X,return_std=True) K, K_gradient = gp.kernel_(X, eval_gradient=True) K[np.diag_indices_from(K)] += alpha L = cholesky(K, lower=True) # Line 2 # Support multi-dimensional output of self.y_train_ if y.ndim == 1: y = y[:, np.newaxis] alpha = cho_solve((L, True), y) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y, alpha) log_likelihood_dims -= np.log(np.diag(L)).sum() log_likelihood_dims -= (K.shape[0] / 2.) * np.log(2 * np.pi) log_likelihood = log_likelihood_dims.sum(-1) print(log_likelihood) mean_sq_rel_err = ((input_prediction[0][:,0] - y[:,0])**2./y[:,0]**2.) #mean square relative error
class GPR(keras.Model): r""" Implements a Gaussian Process with a squared exponential kernel. The Gaussian Process kernel has three hyperparameters controlling the magnitude, length scale, and noise for the GP. If there is known input noise, that noise is included in the model. The hyperparameters are optimized via maximum likelihood using a set number of time steps (this optimization is not supposed to be perfect, just good enough). Input arguments - x_l, locations of inputs, shape (input_dim, 2) columns house (lon,lat) coordinates respectively. - x, values at locations - noise, Input noise, if known. Shape is same as x. Output arguments - m, mean of the gaussian process. - V, covariance of the gaussian process. Subroutines - sample, produce a sample from the gaussian process - log_prob, estimate the log likelihood of the data given the gaussian process. - reanalysis, Produce the best update of mld given a model estimate and observed data. To do: Implement haversine distance in custom scikit learn kernel. """ def __init__(self, x_l, dtype='float64', **kwargs): super(GPR, self).__init__(name='gaussian_process', dtype='float64', **kwargs) self.x_l = tf.cast(x_l, dtype='float64') self.x_l = tf.reshape(self.x_l, (-1, 2)) self.input_dim = self.x_l.shape[0] self.kernel = 1.0 * Matern( length_scale=3.0, nu=2.5, length_scale_bounds=(.25, 10)) self.kernel_noise = 1.0 * Matern( length_scale=3.0, nu=2.5, length_scale_bounds=(.25, 10)) def fit(self, x, noise=None): x = tf.reshape(x, (-1, 1)).numpy() self.gpr = GaussianProcessRegressor(kernel=self.kernel, n_restarts_optimizer=0, random_state=0) self.gpr.fit(self.x_l, x) self.Kxx = self.gpr.kernel_(self.x_l) self.Kxx_chol = tf.linalg.cholesky(self.Kxx) self.gpr_noise = GaussianProcessRegressor(kernel=self.kernel_noise, n_restarts_optimizer=0, random_state=0) if noise is not None: noise = tf.reshape(noise, (-1, 1)).numpy() self.gpr_noise.fit(self.x_l, noise) self.Kxx_noise = self.gpr_noise.kernel_(self.x_l) self.Kxx_noise_chol = tf.linalg.cholesky(self.Kxx) def call(self, x, y_l, noise=None, batch_size=1, training=True): if not training: # Slower gaussian process regression. To do: speed up? self.Kyx = self.gpr.kernel_(y_l, self.x_l) self.Kyy = self.gpr.kernel_(y_l) self.m = tf.linalg.matmul( self.Kyx, tf.linalg.cholesky_solve(self.Kxx_chol, x)) self.V = self.Kyy - tf.linalg.matmul( self.Kyx, tf.linalg.cholesky_solve(self.Kxx_chol, self.Kyx.T)) if noise is not None: noise = tf.reshape(noise, (-1, )) self.Kyx_noise = self.gpr_noise.kernel_(y_l, self.x_l) self.Kyy_noise = self.gpr_noise.kernel_(y_l) self.V += tf.linalg.matmul( tf.linalg.matmul( self.Kyx_noise, tf.linalg.cholesky_solve(self.Kxx_noise_chol, tf.linalg.diag(noise))), self.Kyx_noise.T) if training: # Old, fast, linear interpolation x_dim = self.x_l.shape[0] y_dim = y_l.shape[0] l = [] ind = [] for i in range(y_dim): d = np.sin((self.x_l[:, 1] - y_l[i, 1]) / 180 * np.pi)**2 d += np.cos(self.x_l[:, 1] * np.pi / 180.) * np.cos( y_l[i, 1] * np.pi / 180.) * np.sin( (self.x_l[:, 0] - y_l[i, 0]) / 180. * np.pi)**2 d = 2 * 6.378 * np.arcsin(np.sqrt(d)) ind_temp = tf.where(d < 0.25) d = tf.gather(d, ind_temp) l_temp = tf.zeros((x_dim, 1), dtype='float64') l_temp = tf.tensor_scatter_nd_update(l_temp, ind_temp, d / tf.reduce_sum(d)) l.append(l_temp) self.L = tf.reshape(tf.stack(l), (y_dim, x_dim)) self.m = tf.linalg.matmul(self.L, x) self.m = tf.squeeze(self.m) self.V = 1e-3 * tf.eye(y_dim, dtype='float64') if noise is not None: self.V += tf.linalg.matmul( L, tf.linalg.matmul(tf.linalg.diag(noise), L, transpose_b=True)) if tf.reduce_min(tf.linalg.diag(self.V)) < 1e-3: self.V += 1e-3 * tf.eye(y_dim, dtype='float64') self.V_chol = tf.linalg.cholesky(self.V) return self.m, tf.linalg.diag_part(self.V) def sample(self, num_samples=1): noise = tf.random.normal(shape=(int(self.m.shape[0]), int(num_samples)), dtype='float64') sample = tf.linalg.matmul( self.V_chol, noise, ) sample += tf.reshape(self.m, (-1, 1)) return tf.reshape(sample, (-1, num_samples)) def log_prob(self, x, batch_size=1): z = (self.m - tf.reshape(x, (batch_size, -1, 1))) l = tf.reduce_mean( tf.matmul(z, tf.linalg.cholesky_solve(self.V_chol, z), transpose_a=True)) l += tf.reduce_mean(tf.math.log(tf.linalg.diag_part(self.V_chol))) return -l def reanalysis(self, x, y_d, noise): u = x S = tf.linalg.cholesky_solve( self.V_chol, tf.reshape(y_d, (-1, 1)) - tf.linalg.matmul( self.L, tf.reshape(tf.gather(x, self.indexes), (-1, 1)))) updates = tf.reshape(tf.gather( x, self.indexes), (-1, 1)) + tf.linalg.matmul( tf.linalg.diag(tf.gather(noise, self.indexes)), tf.linalg.matmul(self.L, S, transpose_a=True)) updates = tf.reshape(updates, (-1, )) u = tf.tensor_scatter_nd_update(u, tf.reshape(self.indexes, (-1, 1)), updates) return u
def gp(x, y, degree1, degree2, k1, k2, operator): ################################## ################################## ################################## #product operator if operator == "prod": #polynomial * polynomial if k1 == "poly" and k2 == "poly": k = ConstantKernel() * ((DotProduct()**degree1) * (DotProduct()**degree2)) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #rbf * rbf if k1 == "rbf" and k2 == "rbf": k = ConstantKernel() * ((RBF()) * (RBF())) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #polynomial * rbf if (k1 == "poly" and k2 == "rbf") or (k1 == "rbf" and k2 == "poly"): k = ConstantKernel() * ((DotProduct()**degree1) * (RBF())) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #matern nu=3/2 * matern nu=3/2 if k1 == "matern1.5" and k2 == "matern1.5": k = ConstantKernel() * ((Matern(nu=1.5)) * (Matern(nu=1.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #matern nu=5/2 * matern nu=5/2 if k1 == "matern2.5" and k2 == "matern2.5": k = ConstantKernel() * ((Matern(nu=2.5)) * (Matern(nu=2.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #poly * matern nu=3/2 if (k1 == "poly" and k2 == "matern1.5") or (k1 == "matern1.5" and k2 == "poly"): k = ConstantKernel() * ((DotProduct()**degree1) * (Matern(nu=1.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #poly * matern nu=5/2 if (k1 == "poly" and k2 == "matern2.5") or (k1 == "matern2.5" and k2 == "poly"): k = ConstantKernel() * ((DotProduct()**degree1) * (Matern(nu=2.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) ################################## ################################## ################################## #sum operator if operator == "sum": #polynomial + polynomial if k1 == "poly" and k2 == "poly": k = ConstantKernel() * ((DotProduct()**degree1) + (DotProduct()**degree2)) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #rbf + rbf if k1 == "rbf" and k2 == "rbf": k = ConstantKernel() * ((RBF()) + (RBF())) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #polynomial + rbf if (k1 == "poly" and k2 == "rbf") or (k1 == "rbf" and k2 == "poly"): k = ConstantKernel() * ((DotProduct()**degree1) + (RBF())) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #matern nu=3/2 + matern nu=3/2 if k1 == "matern1.5" and k2 == "matern1.5": k = ConstantKernel() * ((Matern(nu=1.5)) + (Matern(nu=1.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #matern nu=5/2 + matern nu=5/2 if k1 == "matern2.5" and k2 == "matern2.5": k = ConstantKernel() * ((Matern(nu=2.5)) + (Matern(nu=2.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #poly + matern nu=3/2 if (k1 == "poly" and k2 == "matern1.5") or (k1 == "matern1.5" and k2 == "poly"): k = ConstantKernel() * ((DotProduct()**degree1) + (Matern(nu=1.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #poly + matern nu=5/2 if (k1 == "poly" and k2 == "matern2.5") or (k1 == "matern2.5" and k2 == "poly"): k = ConstantKernel() * ((DotProduct()**degree1) + (Matern(nu=2.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) ################################## ################################## ################################## #single operator if operator == "single": #polynomial if k1 == "poly": k = ConstantKernel() * ((DotProduct()**degree1)) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #rbf if k1 == "rbf": k = ConstantKernel() * ((RBF())) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #matern1.5 if k1 == "matern1.5": k = ConstantKernel() * ((Matern(nu=1.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #matern2.5 if k1 == "matern2.5": k = ConstantKernel() * ((Matern(nu=2.5))) + WhiteKernel() gpr = GaussianProcessRegressor(kernel=k).fit(x, y) #get gram matrix GramM = gpr.kernel_(x) #return gpr from GaussianProcessRegressor and Gram matrix return {'gpr': gpr, 'GramMatrix': GramM}