def gp_lc_fit(t, m, e, npoints=10000, plot=True, theta=0.15): ''' Use Gaussian process regression to fit a functional form to a supernova lightcurve. t, m, e: 1D arrays for time, mag, and error theta: the autcorrelation timescale parameter. If the model needs to have more flexture, increase theta. If it needs less, decrease theta. returns: t, mag, err arrays of size npoints ''' edge = 1.0 # predict beyond the data by this amount on either side X = np.atleast_2d( t ).T # need a 2D array for skl procedures y = np.array(m) e = np.array(e) x = np.atleast_2d( np.linspace(t[0]-edge, t[-1]+edge, npoints)).T xf = x.flatten() gp = GaussianProcess(regr='linear', nugget=(e/y)**2, theta0=theta) gp.fit(X,y) y_pred, MSE = gp.predict(x, eval_MSE=True) ep = MSE**.5 # prediction error estimate if plot: plt.figure() plt.errorbar( t, m, yerr=e, fmt='b.' ) plt.plot(xf, y_pred, 'g', lw=2) plt.fill_between( xf, y_pred+ep, y_pred-ep, alpha=0.5, color='g' ) plt.gca().invert_yaxis() plt.title('Photometry, GP model, and errors') plt.show() return xf, y_pred, ep
class GaussianProcessRigression(object): def __init__(self,data_frame,test_df): self.df=data_frame self.test=test_df def fitModel(self,predictors,output_var): self.predictors=predictors self.output_var=output_var self.gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) self.gp.fit(self.df[predictors].values,self.df[output_var].values) self.y_pred, self.MSE = self.gp.predict(self.test[predictors], eval_MSE=True) self.sigma = np.sqrt(self.MSE) #works only when you have single parameter for predictors only #as for multiple parameters doesn't make sense to make a 2D curve. def givePlot(self,xlabel='$x$',ylabel='$f(x)$'): fig = pl.figure() pl.plot(self.test[self.predictors], self.test[self.output_var], 'r:', label=u'Actual curve') # pl.plot(X, y, 'r.', markersize=10, label=u'Observations') pl.plot(self.test[self.predictors], self.y_pred, 'b-', label=u'Prediction') pl.fill(np.concatenate([self.test[self.predictors], self.test[self.output_var]]), \ np.concatenate([self.y_pred - 1.9600 * self.sigma, (self.y_pred + 1.9600 * self.sigma)[::-1]]), \ alpha=.5, fc='b', ec='None', label='95% confidence interval') pl.xlabel(xlabel) pl.ylabel(ylabel) pl.ylim(-10, 20) pl.legend(loc='upper left') pl.show()
def crossValidation(self, n): kf = KFold(len(self.trainX), n_folds = n) total_error = 0 predictions = {} if self.algorithm != 'gp': for train,test in kf: this_x = [] this_y = [] for i in train: this_x.append(self.trainX[i]) this_y.append(self.trainY[i]) reg = self.model_for_algorithm() reg.fit(this_x, this_y) for test_i in test: predicted = reg.predict(self.trainX[test_i]) predictions[test_i] = predicted squared_error = (predicted - self.trainY[test_i])**2 total_error += squared_error self.count_accuracy(predictions) return total_error / len(self.trainX), predictions else: for train_idx, test_idx in kf: X_train = self.trainX[train_idx] y_train = self.trainY[train_idx] gp = GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1) gp.fit(X_train, y_train) for test_i in test_idx: predicted, sigma2 = gp.predict(self.trainX[test_i], eval_MSE=True) predictions[test_i] = (predicted, sigma2, self.trainY[test_i]) sigma = np.sqrt(sigma2) if self.trainY[test_i] > predicted + 1.96 * sigma or self.trainY[test_i] < predicted - 1.96 * sigma: total_error += 1 return total_error / float(len(self.trainX)), predictions
def gaussian_process(nx, ny, x, y, x_min, y_min, dx, dy): """ Gausian process method. To replace kriging. Description: http://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcess.html#sklearn.gaussian_process.GaussianProcess.predict The scikit learn python library should be installed Should be tested """ # Prediction is very sensitive to the parameters below, please use with care! gp = GaussianProcess(regr='quadratic', corr='cubic', theta0=0.1, thetaL=.001, thetaU=1., nugget=0.01) gp.fit(x, y) x_grid_x = np.linspace(x_min, x_min + dx * nx, nx) x_grid_y = np.linspace(y_min, y_min + dy * ny, ny) xv, yv = np.meshgrid(x_grid_x, x_grid_y) x_grid = np.dstack((xv.flatten(), yv.flatten()))[0] grid = np.reshape(gp.predict(x_grid, eval_MSE=False, batch_size=None), (ny, nx)) return grid
def nugget_kungfu(day=0, fx_id=0, hour=3, theta0=(0.4, 1.0)): G = X.nm[day, fx_id, :, hour] G_m = G.mean(axis=0) G_s = G.std(axis=0) from sklearn.gaussian_process.gaussian_process import MACHINE_EPSILON nugget = (G_s / G_m) ** 2.0 mask = ~np.isfinite(nugget) nugget[mask] = 10. * MACHINE_EPSILON nugget = nugget.ravel() est = GaussianProcess(corr='squared_exponential', theta0=theta0, #thetaL=(.5, 1.0), thetaU=(5.0, 10.0), #random_start=100, nugget=nugget, ) est.fit(x, G_m.ravel()) print('est.theta_: %s' % str(est.theta_)) pred, sigma = est.predict(np.c_[new_lats.ravel(), new_lons.ravel()], eval_MSE=True) pred = pred.reshape((10 * lon.shape[0], 10 * lat.shape[0])).T sigma = sigma.reshape((10 * lon.shape[0], 10 * lat.shape[0])).T fig, ([ax1, ax2, ax3, ax4]) = plt.subplots(4, 1) ax1.imshow(G_m, interpolation='none') ax1.set_ylabel('Ens mean') ax2.imshow(G_s, interpolation='none') ax2.set_ylabel('Ens std') ax3.imshow(pred, interpolation='none') ax3.set_ylabel('GP mean') ax4.imshow(sigma, interpolation='none') ax4.set_ylabel('GP sigma')
def test_2d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): # MLE estimation of a two-dimensional Gaussian Process model accounting for # anisotropy. Check random start optimization. # Test the interpolating property. b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2. X = np.array([[-4.61611719, -6.00099547], [4.10469096, 5.32782448], [0.00000000, -0.50000000], [-6.17289014, -4.6984743], [1.3109306, -6.93271427], [-5.03823144, 3.10584743], [-2.87600388, 6.74310541], [5.21301203, 4.26386883]]) y = g(X).ravel() thetaL = [1e-4] * 2 thetaU = [1e-1] * 2 gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0, theta0=[1e-2] * 2, thetaL=thetaL, thetaU=thetaU, random_start=random_start, verbose=False) gp.fit(X, y) y_pred, MSE = gp.predict(X, eval_MSE=True) assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.)) eps = np.finfo(gp.theta_.dtype).eps assert_true(np.all(gp.theta_ >= thetaL - eps)) # Lower bounds of hyperparameters assert_true(np.all(gp.theta_ <= thetaU + eps)) # Upper bounds of hyperparameters
def ensemble(value, noise): #mixture = GMM(C = 100) mixture = GMM() newdataset = addNoise(noise) temp = np.copy(newdataset[:,-1]) newdataset[:,-1] = newdataset[:,2] newdataset[:,2] = temp #print newdataset[:,-1], newdataset[:,3] for ensemble in range(0,value): np.random.shuffle(newdataset) train = np.copy(newdataset[0:-10,:]) test = np.copy(newdataset[-10:-1,:]) test_pred = np.copy(test) mixture.fit(newdataset[0:-10,0:-2],newdataset[0:-10,-1]) preds = mixture.predict(newdataset[-10:-1,0:-2]) test_pred[:,-2:-1] = preds errorabs = abs(dataset[-10:-1,-1]-preds)/(dataset[-10:-1,-1]) meanerrorabs = np.mean(errorabs) stderrorabs = np.std(errorabs) print preds #print meanerrorabs, stderrorabs #plt.plot(abs(dataset[-10:-1,-1]-preds)) #plt.ylim(-5e-12,5e-12) #plt.scatter(dataset[-10:-1,0],dataset[-10:-1,-1]) #plt.plot(preds) #plt.show() np.savetxt('data/new_train_%d_snr_%d.csv'%(ensemble,noise),train,delimiter=',') np.savetxt('data/new_test_%d_snr_%d.csv'%(ensemble,noise),test,delimiter=',') np.savetxt('data/new_test_predict_%d_snr_%d.csv'%(ensemble,noise),test_pred,delimiter=',')
def update_GP(self,meas_locations): # define grid to evaluate stuff on #res = 100 sensornoise=.000001 noise= sensornoise*np.random.randn(meas_locations.shape[0]) measurements = self.simulatedsurface(meas_locations) + noise print meas_locations print measurements [self.meas_locations,self.new_measurements]=self.averageduplicates(meas_locations,measurements) # Instanciate and fit Gaussian Process Model gp = GaussianProcess(corr='squared_exponential', #theta0=10e-1, #thetaL=10e-1, ##thetaU=1e-1, nugget=(sensornoise/self.new_measurements) ** 2 ) # Observations print noise # Don't perform MLE or you'll get a perfect prediction for this simple example! gp.fit(self.meas_locations, self.new_measurements) #evaluate the prediction and its MSE on a grid y_pred, MSE = gp.predict(self.xgrid, eval_MSE=True) sigma = np.sqrt(MSE) y_pred = y_pred.reshape((self.res, self.res)) sigma = sigma.reshape((self.res, self.res)) self.model=y_pred self.uncertainty=sigma return [y_pred,sigma]
def test(filename, reg): # initial the parameters #(X, y) = data_read('./data/eta_reg_605603_p0.data'); # (X, y) = load_svmlight_file('./data/eta_reg_605603_p0.data'); (X, y) = load_svmlight_file(filename); print("Data imported!"); # divide into training and test_pyGPs datasets ratio = 0.8; num = len(y); idx = range(num); random.shuffle(idx); tn = int(np.floor(num*ratio)); tr_idx = idx[:tn] te_idx = idx[tn:] # X_train = X[tr_idx] # y_train = y[tr_idx] tnum = int(np.floor(0.2*num)) X_train = X[tr_idx[:tnum]] y_train = y[tr_idx[:tnum]] X_test = X[te_idx] y_test = y[te_idx] # train linear ridge regression model print("Model training!"); p1 = float(reg); clf = GaussianProcess(corr='squared_exponential'); # clf.fit(list(X_train), y_train); clf.fit(X_train.toarray() , y_train); abe = np.mean( abs(clf.predict(X_test.toarray()) - y_test)/y_test ) #np.mean((clf.predict(X_test) - y_test) ** 2)) print("Absolute error is: %.2f" % abe );
def kriging(self, X, y, X_pred): """Interpolate using Gaussian Process Regression (kriging). Uses the GP pacakge from 'sklearn' to interpolate spatial data points (2d). Interpolation equal noiseless case, i.e., "almost" no uncertainty in the observations. Bounds are defined assuming anisotropy. """ # instanciate a Gaussian Process model gp = GaussianProcess(regr=self.regr, corr=self.corr, theta0=self.theta0, thetaL=self.thetaL, thetaU=self.thetaU, random_start=self.rand_start, nugget=self.nugget, verbose=True) # fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # evaluate the prediction points (ask for MSE as well) y_pred, MSE = gp.predict(X_pred, eval_MSE=True) return [y_pred, np.sqrt(MSE)]
def test_2d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): # MLE estimation of a two-dimensional Gaussian Process model accounting for # anisotropy. Check random start optimization. # Test the interpolating property. b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e)**2. X = np.array([[-4.61611719, -6.00099547], [4.10469096, 5.32782448], [0.00000000, -0.50000000], [-6.17289014, -4.6984743], [1.3109306, -6.93271427], [-5.03823144, 3.10584743], [-2.87600388, 6.74310541], [5.21301203, 4.26386883]]) y = g(X).ravel() thetaL = [1e-4] * 2 thetaU = [1e-1] * 2 gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0, theta0=[1e-2] * 2, thetaL=thetaL, thetaU=thetaU, random_start=random_start, verbose=False) gp.fit(X, y) y_pred, MSE = gp.predict(X, eval_MSE=True) assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.)) eps = np.finfo(gp.theta_.dtype).eps assert_true( np.all(gp.theta_ >= thetaL - eps)) # Lower bounds of hyperparameters assert_true( np.all(gp.theta_ <= thetaU + eps)) # Upper bounds of hyperparameters
class GaussianProcessInterpolator: def __init__(self, observations): self.observations = observations self.gaussian_process = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) self._compute_model() def _compute_model(self): observation_points = [] observation_results = [] for entry in self.observations: observation_points.append(entry[0]) observation_results.append(entry[1]) observation_points_array = np.atleast_2d(observation_points) observation_results_array = np.array(observation_results).T self.gaussian_process.fit(observation_points_array, observation_results_array) def compute_prediction(self, observation_points): observation_points_array = np.atleast_2d(observation_points) predicted_observation_results, MSE = self.gaussian_process.predict( observation_points_array, eval_MSE=True) return predicted_observation_results, MSE
def gaussian_process(x_train, y_train, x_test): def vector_2d(array): return np.array(array).reshape((-1, 1)) import warnings def fxn(): warnings.warn("deprecated", DeprecationWarning) with warnings.catch_warnings(): warnings.simplefilter("ignore") fxn() x_train = vector_2d(x_train) y_train = vector_2d(y_train) x_test = vector_2d(x_test) # Train gaussian process gp = GaussianProcess(corr='squared_exponential', theta0=1e-1, thetaL=1e-3, thetaU=1) gp.fit(x_train, y_train) # Get mean and standard deviation for each possible # number of hidden units y_mean, y_var = gp.predict(x_test, eval_MSE=True) y_std = np.sqrt(vector_2d(y_var)) return y_mean, y_std
def optimize(self, f, beta=1, init_num=10, restarts=100, \ opt_fx=float('-inf'), torelance=1e-8, iteration=1000): X = self.generate_random_variables(init_num) Fx = [f(x) for x in X] x_best = self.lower fx_best = min(Fx) acq_values = [] num_evals = 0 for i in range(iteration): gp = GaussianProcess(theta0=[0.1]*self.dim) gp.fit(X, Fx) x_new, acq_value = self.max_acquisition(gp, beta=beta, restarts=restarts) fx_new = f(x_new) num_evals += 1 if fx_new < fx_best: x_best = x_new fx_best = fx_new print("%d \t %s \t %s" % (i, fx_best, x_best)) if np.abs(fx_best - opt_fx) < torelance : break #X = np.vstack((X, x_new)) #Fx = np.hstack((Fx, fx_new)) #acq_values.append(acq_value) print("#Evals=%d \t\t f(x*)=%s" % (num_evals, str(fx_best))) return (x_best, fx_best)
def train_gaussian(path): X, Y, weight = load_data(path, get_avg(path)) np.save("train_X", X) np.save("train_Y", Y) gp = GaussianProcess() gp.fit(X, Y) return gp
def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): """ MLE estimation of a two-dimensional Gaussian Process model accounting for anisotropy. Check random start optimization. Test the GP interpolation for 2D output """ b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2. f = lambda x: np.vstack((g(x), g(x))).T X = np.array([[-4.61611719, -6.00099547], [4.10469096, 5.32782448], [0.00000000, -0.50000000], [-6.17289014, -4.6984743], [1.3109306, -6.93271427], [-5.03823144, 3.10584743], [-2.87600388, 6.74310541], [5.21301203, 4.26386883]]) y = f(X) gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0, theta0=[1e-2] * 2, thetaL=[1e-4] * 2, thetaU=[1e-1] * 2, random_start=random_start, verbose=False) gp.fit(X, y) y_pred, MSE = gp.predict(X, eval_MSE=True) assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.))
def next(grid, candidates, pending, complete, completed_values): gp = GaussianProcess(random_start=10, nugget=1e-6) gp.fit(_encode_categorical_df(complete, grid), completed_values) if pending.shape[0]: # Generate fantasies for pending mean, variance = gp.predict(_encode_categorical_df(pending, grid), eval_MSE=True) pending_value_estimation = pd.Series(mean + np.sqrt(variance) * npr.randn(mean.shape[0])) gp.fit(_encode_categorical_df(complete.append(pending), grid), completed_values.append(pending_value_estimation)) # Predict the marginal means and variances at candidates. mean, variance = gp.predict(_encode_categorical_df(candidates, grid), eval_MSE=True) best = np.min(completed_values) func_s = np.sqrt(variance) + 0.0001 Z = (best - mean) / func_s ncdf = sps.norm.cdf(Z) npdf = sps.norm.pdf(Z) ei = func_s * (Z * ncdf + npdf) best_cand = np.argmax(ei) return (best_cand, grid)
class GaussianProcessInterpolator: def __init__(self, observations): self.observations = observations self.gaussian_process = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) self._compute_model() def _compute_model(self): observation_points = [] observation_results = [] for entry in self.observations: observation_points.append(entry[0]) observation_results.append(entry[1]) observation_points_array = np.atleast_2d(observation_points) observation_results_array = np.array(observation_results).T self.gaussian_process.fit(observation_points_array, observation_results_array) def compute_prediction(self, observation_points): observation_points_array = np.atleast_2d(observation_points) predicted_observation_results, MSE = self.gaussian_process.predict(observation_points_array, eval_MSE=True) return predicted_observation_results, MSE
def noiseless(): X = np.atleast_2d([1.,3.,5.,6.,7.,8.,]).T # observations y = f(X).ravel() # reshape to 1-D # mesh the input space to cover all points to predict f(x) and the MSE x = np.atleast_2d(np.linspace(0,10,1000)).T # and flatten # instantiate gauss process gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) # fit to data using maximum likelihood estimation of params gp.fit(X,y) # make predictions on meshed x-axis, also return MSE y_pred, MSE = gp.predict(x, eval_MSE=True) sigma = np.sqrt(MSE) # plot fig = plt.figure() plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') # fill the space between the +/-MSE plt.fill(np.concatenate([x, x[::-1]]), # reverse order of x np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=0.5, fc='b', ec='None', label='95% confidence interval') # shade, fill color, edge color plt.title('Noiseless case') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') return
def initGP(): """Do simulations with random pi,z and create GP, X, y""" poolsize = 68 pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), poolsize, std=10) X = [] for i, org in enumerate(pool): org.mutate() genome = org.genome w = genome.weights z = [np.random.uniform(0, 0.3)] reward = cliff(genome, z) while reward <= 0 and len(X) < poolsize / 2: #Train input policies to reach the goal. org.mutate() genome = org.genome w = genome.weights reward = cliff(genome, z) if not len(X): X = np.atleast_2d(w + z) y = np.atleast_2d([reward]) else: X = np.append(X, [w + z], axis=0) y = np.append(y, [reward]) # Initialize GP with kernel parameters. GP = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.) GP.fit(X, y) return GP, X, y
def fitGP(df, station, test_cut='6/1/2013', makePlot=True, showTest=True): '''do Gaussian process fit''' from sklearn.gaussian_process import GaussianProcess fitCols = [station, station+'_filtered'] gp = GaussianProcess(regr='constant', corr='cubic') train = df[station+'_filtered'].truncate(after=test_cut) test = df[station+'_filtered'].truncate(before=test_cut) X = np.matrix(mpl.dates.date2num(train.index.to_pydatetime())).T y = train.values # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) xtest = np.matrix(mpl.dates.date2num(test.index.to_pydatetime())).T # Make the prediction on the meshed x-axis (ask for MSE as well) y_test, MSE = gp.predict(xtest, eval_MSE=True) sigma = np.sqrt(MSE) error = sigma y_train = gp.predict(X, eval_MSE=False) trainFull = df[station+'_filtered'] X = np.matrix(mpl.dates.date2num(trainFull.index.to_pydatetime())).T y = trainFull.values # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) ix = pd.date_range('8/1/2014', '8/1/2020', freq='M') predx = np.matrix(mpl.dates.date2num(ix.to_pydatetime())).T y_pred, MSE_pred = gp.predict(predx, eval_MSE=True) trainDf = pd.DataFrame({'train': train, 'fit': y_train}) testDf = pd.DataFrame({'test': test, 'y_test': y_test, 'sigma_test': sigma}) test_score = metrics.r2_score(test.values, y_test) sigma_pred = np.sqrt(MSE_pred) predDf = pd.DataFrame({'y_pred': y_pred, 'sigma_pred': sigma_pred}, index=ix) result = pd.concat([trainDf, testDf, predDf], axis=1) result = pd.concat([result, df[fitCols]], axis=1) if makePlot: fig, ax = plt.subplots(1) ax.plot_date(X, y, 'g-', markersize=10, label=u'Observations') ax.plot_date(predx, y_pred, 'b-', label=u'Prediction') if showTest: ax.plot_date(test.index, y_test, 'r-') ax.plot_date(test.index, y_test - 1.96*sigma, 'r--') ax.plot_date(test.index, y_test + 1.96*sigma, 'r--') ax.plot_date(predx, y_pred - 1.96*sigma_pred, 'b--') ax.plot_date(predx, y_pred + 1.96*sigma_pred, 'b--') plt.xlabel('Date') plt.ylabel('Price per square foot') return result, test_score
def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): """ MLE estimation of a two-dimensional Gaussian Process model accounting for anisotropy. Check random start optimization. Test the GP interpolation for 2D output """ b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e)**2. f = lambda x: np.vstack((g(x), g(x))).T X = np.array([[-4.61611719, -6.00099547], [4.10469096, 5.32782448], [0.00000000, -0.50000000], [-6.17289014, -4.6984743], [1.3109306, -6.93271427], [-5.03823144, 3.10584743], [-2.87600388, 6.74310541], [5.21301203, 4.26386883]]) y = f(X) gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0, theta0=[1e-2] * 2, thetaL=[1e-4] * 2, thetaU=[1e-1] * 2, random_start=random_start, verbose=False) gp.fit(X, y) y_pred, MSE = gp.predict(X, eval_MSE=True) assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.))
def interpolate_measurements(measurements, interptype='griddata'): pts, z = measurements_to_cartesian_points(measurements) gridx, gridy = make_mesh_grid(pts) if interptype == 'griddata': grid = interpolate.griddata(pts, z, (gridx, gridy), method='linear', fill_value=-3e30) elif interptype == 'rbf': ptx, pty = list(zip(*pts)) f = interpolate.Rbf(ptx, pty, z, function='linear') grid = f(gridy, gridx) elif interptype == 'gauss': from sklearn.gaussian_process import GaussianProcess ptx, pty = list(zip(*pts)) ptx = np.array(ptx) pty = np.array(pty) z = np.array(z) print(math.sqrt(np.var(z))) gp = GaussianProcess(regr='quadratic', corr='cubic', theta0=np.min(z), thetaL=min(z), thetaU=max(z), nugget=0.05) gp.fit(X=np.column_stack([pty, ptx]), y=z) rr_cc_as_cols = np.column_stack([gridy.flatten(), gridx.flatten()]) grid = gp.predict(rr_cc_as_cols).reshape((ncol, nrow)) return gridx, gridy, grid
def check2d(X, regr=regression.constant, corr=correlation.squared_exponential, random_start=10, beta0=None): """ MLE estimation of a two-dimensional Gaussian Process model accounting for anisotropy. Check random start optimization. Test the interpolating property. """ b, kappa, e = 5., .5, .1 g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2. y = g(X).ravel() thetaL = [1e-4] * 2 thetaU = [1e-1] * 2 gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0, theta0=[1e-2] * 2, thetaL=thetaL, thetaU=thetaU, random_start=random_start, verbose=False) gp.fit(X, y) y_pred, MSE = gp.predict(X, eval_MSE=True) assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.)) assert_true(np.all(gp.theta_ >= thetaL)) # Lower bounds of hyperparameters assert_true(np.all(gp.theta_ <= thetaU)) # Upper bounds of hyperparameters
def gaussian_fit_likelihood(x, y): # Remove duplicates n_dupl = 0 d = dict() for i in range(len(x)): try: if d[x[i]] != y[i]: n_dupl += 1 d.pop(x[i], None) except: d[x[i]] = y[i] ret = [n_dupl] try: newX = np.atleast_2d(d.keys()).T newY = np.array(d.values()).ravel() g = GaussianProcess(theta0=1e5, thetaL=1e-4, thetaU=1e-1) #g = GaussianProcess() g.fit(newX, newY) err = newY - g.predict(newX) p = pearsonr(err, newX) ret += [g.reduced_likelihood_function_value_] ret += p except: #fp = open("bad_pt.txt", "a") #fp.write("1") #fp.close() ret += [0.0, 0.0, 0.0] print ret return ret
def GaussianProcess_interp(points, values, xi, theta0=0.1, thetaL=0.001, thetaU=1.0, nugget=0.01): x_points = np.array(points)[:, 0] y_points = np.array(points)[:, 1] gp = GaussianProcess(theta0=theta0, thetaL=thetaL, thetaU=thetaU, nugget=nugget) gp.fit(X=np.column_stack([x_points, y_points]), y=np.array(values)) xi_as_cols = np.column_stack([xi[0].flatten(), xi[1].flatten()]) z = gp.predict(xi_as_cols).reshape(xi[0].shape) return z
def _predict_coordinate(segment, coord, times, t1, sigma=10., **kwargs): times = np.atleast_2d(times).T prev = segment[coord] nugget = (sigma / (prev + sigma)) ** 2 gp = GaussianProcess(nugget=nugget, **kwargs) gp.fit(times, prev) return gp.predict(t1, eval_MSE=True)
def test_batch_size(): # TypeError when using batch_size on Python 3, see # https://github.com/scikit-learn/scikit-learn/issues/7329 for more # details gp = GaussianProcess() gp.fit(X, y) gp.predict(X, batch_size=1) gp.predict(X, batch_size=1, eval_MSE=True)
def __init__(self,data,theta0=None,thetaL_r=None,thetaU_r=None,**ka): self.data = data gp = GaussianProcess(theta0=theta0,thetaL=thetaL_r*theta0,thetaU=thetaU_r*theta0,**ka) gp.fit(data.x[:,newaxis],data.y) def q(x,regr=gp.get_params()['regr'],beta=gp.reduced_likelihood_function()[1]['beta']): return dot(regr(x),beta) self.q = q self.predict = gp.predict
def gaussReg(metrics, densities): # metrics = [[0, 0], [2, 2]] #List of lists of metrics calculated # densities = [0.5, 2.5] #The corresponding densities gp = GaussianProcess(corr='absolute_exponential', theta0=1e-1, thetaL=1e-3, thetaU=1, random_start=100) #Change these parameters to get better fit... gp.fit(metrics, densities) return gp
def learning(regression,correlation,x_train,y_train): print("Learning") X_train, Y_train = numpy.asarray(x_train) , numpy.asarray(y_train) gp = GaussianProcess(corr=correlation, normalize=True, regr=regression, thetaL=1e-2, thetaU=1.0) gp.fit(X_train, Y_train) return gp
def gpTest1(): xx = np.array([[10.]]) X = np.array([[1., 3., 5., 6., 7., 8., 9.]]).T y = (X * np.sin(X)).ravel() gp = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.) gp.fit(X, y) p = gp.predict(xx) print(X, y) print(p, xx * np.sin(xx))
def Gaussian_Process_Regression(features, target): print("===== GaussianProcess =====") print("[INFO] Training the Classifier") classifier = GaussianProcess(theta0=0.1, thetaL=0.001, thetaU=1.0) classifier.fit(features, target) print("Saving the classifier") data_io.save_model(classifier)
def get_best_params(model, training, testing, non_relevant_count=100, **kwargs): """ Search for the best set of parameters in the model use ParameterTuning().getBestParameters(model,parA=(0,10,0.1,3)...) (min,max,step,default) """ # Create a grid of parameters kwargs = kwargs or model.param_details() grid = zip( *(x.flat for x in np.mgrid[[slice(*row[:3]) for row in kwargs.values()]])) m_instance = model() values = { k: ParameterTuning.tune(m_instance, training, testing, non_relevant_count, **dict(zip(kwargs.keys()[:2], k))) for k in zip(*(v[:2] for v in kwargs.values())) } gp = GaussianProcess(theta0=.1, thetaL=.001, thetaU=5.) # To make it reasonable we limit the number of iterations for i in xrange(0, ParameterTuning.__max_iterations): # Get a list of parameters and the correspondent result param, response = zip(*values.items()) # Fit the GaussianProcess model with the parameters and results gp.fit(np.array(param), np.array(response).T) # Get prediction y_predicted, mse = gp.predict(grid, eval_MSE=True) # get upper confidence interval. 2.576 z-score corresponds to 99th # percentile ucb_u = y_predicted + np.sqrt(mse) * ParameterTuning.__z_score next_list = zip(ucb_u, grid) next_list.sort(reverse=True) new_x = next_list[0][1] if new_x not in values: values[new_x] = ParameterTuning.tune( m_instance, training, testing, **{k: v for k, v in zip(kwargs, new_x)}) else: break sv = sorted(values.items(), cmp=lambda x, y: cmp(y[1], x[1])) assert sv[0][1] > sv[-1][1], "Sorted from lowest to highest" return {k: v for k, v in zip(kwargs, sv[0][0])}
def run_test(training_size,prediction_size,function_name,corr_kernel,n_cluster,prior='GCP'): scoring_function = functions[function_name] parameter_bounds = all_parameter_bounds[function_name] x_training = [] y_training = [] for i in range(training_size): x = [np.random.uniform(parameter_bounds[j][0],parameter_bounds[j][1]) for j in range(parameter_bounds.shape[0])] x_training.append(x) y_training.append(scoring_function(x)[0]) if(isInt[function_name]): x_training,y_training = compute_unique2( np.asarray(x_training,dtype=np.int32) , np.asarray( y_training) ) candidates = [] real_y = [] for i in range(prediction_size): x = [np.random.uniform(parameter_bounds[j][0],parameter_bounds[j][1]) for j in range(parameter_bounds.shape[0])] candidates.append(x) real_y.append(scoring_function(x)[0]) real_y = np.asarray(real_y) if(isInt[function_name]): candidates = np.asarray(candidates,dtype=np.int32) if(prior == 'GP'): gp = GaussianProcess(theta0=.1 *np.ones(parameter_bounds.shape[0]), thetaL=0.001 * np.ones(parameter_bounds.shape[0]), thetaU=10. * np.ones(parameter_bounds.shape[0]), random_start=5, nugget=nugget) gp.fit(x_training,y_training) pred = gp.predict(candidates) likelihood = gp.reduced_likelihood_function_value_ else: gcp = GaussianCopulaProcess(nugget=nugget, corr=corr_kernel, random_start=5, normalize=True, coef_latent_mapping=coef_latent_mapping, n_clusters=n_clusters) gcp.fit(x_training,y_training) likelihood = gcp.reduced_likelihood_function_value_ if not (integratedPrediction): pred = gcp.predict(candidates) else: pred,_,_,_ = gcp.predict(candidates, eval_MSE = True, eval_confidence_bounds=True, integratedPrediction=True) mse = np.mean( (pred - real_y)**2. ) # Normalize mse = mse / ( np.std(real_y) **2. ) likelihood = np.exp(likelihood) return [mse,likelihood]
def gaussReg(metrics, densities): """Runs a Gaussian Regression algorithm on metrics, and array of metrics, and densities, the corresponding densities.""" # metrics = [[0, 0], [2, 2]] #List of lists of metrics calculated # densities = [0.5, 2.5] #The corresponding densities gp = GaussianProcess( regr = 'linear', corr = 'absolute_exponential', theta0 = 1, thetaL=1, thetaU=10) #Change these parameters to get better fit... gp.fit(metrics, densities) return gp
def getAllData(): global allData, costModel, costModelInputScaler, costModelOutputScaler if not allData or not costModel: ## COST MODEL spamReader = csv.reader(open('time_results.csv', 'rb'), delimiter=';', quotechar='"') x = [] y = [] for row in spamReader: x.append([float(row[1]),float(row[2])]) y.append([float(row[3]) + random.random()]) x = array(x) y = array(y) input_scaler = preprocessing.StandardScaler().fit(x) scaled_training_set = input_scaler.transform(x) # Scale training data output_scaler = preprocessing.StandardScaler(with_std=False).fit(y) adjusted_training_fitness = output_scaler.transform(y) regr = GaussianProcess(corr='squared_exponential', theta0=1e-1, thetaL=1e-5, thetaU=3, random_start=400) regr.fit(scaled_training_set, adjusted_training_fitness) costModel = regr costModelInputScaler = input_scaler costModelOutputScaler = output_scaler ## cores, accuracy, exeuction time spamReader = csv.reader(open('AnsonCores.csv', 'rb'), delimiter=',', quotechar='"') cores = {11:{}} for row in spamReader: cores[11][int(row[1])] = int(row[0]) maxcores = cores spamReader = csv.reader(open('AnsonExec.csv', 'rb'), delimiter=';', quotechar='"') allData = {} for row in spamReader: row_0 = int(row[0]) row_1 = int(row[1]) row_2 = int(row[2]) row_3 = float(row[3]) row_4 = float(row[4]) data = [cores[row_0][row_1],row_3,row_4] try: try: allData[row_0][row_1][row_2] = data except: allData[row_0][row_1] = {row_2:data} except: allData[row_0] = {row_1:{row_2:data}} #spamReader.close() #print allData return allData
def make_a_perfect_model(pairNo, x, X, y): """Make a GaussianProcess model for data without noise (It complains for TDC dataset though!)""" gp = GaussianProcess(theta0=1e-3, thetaL=1e-3, thetaU=1, random_start=500) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, MSE = gp.predict(x, eval_MSE=True) sigma = np.sqrt(MSE) return gp, y_pred, sigma
def compareStats(rawData, theta): data = [[rawData["lon"][i], rawData["lat"][i]] for i in range(len(rawData["lon"]))] labels = rawData["classif"] data, labels = mapping.cleanDoubles(data, labels) rfc = GaussianProcess(regr="linear", theta0=theta) rfc.fit(data, labels) scores = cross_val_score(rfc, data, labels, cv=5) print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
def train_gaussian(X_train, y_train, X_test, y_test): ''' Creates bagging regressor estimator and returns it along with it's r2_score ''' clf_gaussian = GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-0) clf_gaussian.fit(X_train[:500], y_train[:500]) r2_gaussian = metrics.r2_score( y_test, clf_gaussian.predict(X_test)), metrics.r2_score( y_train, clf_gaussian.predict(X_train)) coef_gaussian = {'theta': clf_gaussian.theta_.tolist()} return clf_gaussian, r2_gaussian, coef_gaussian
def find_best_candidate_with_GP(X, Y, args, rand_candidates,verbose,acquisition_function='Simple'): nugget = args[5] gp = GaussianProcess(theta0=1. * np.ones(X.shape[1]) , thetaL = 0.001 * np.ones(X.shape[1]) , thetaU = 10. * np.ones(X.shape[1]) , nugget=nugget) gp.fit(X,Y) if(verbose == 2): print ('GP theta :'+str(gp.theta_)) if(acquisition_function=='Simple'): predictions = gp.predict(rand_candidates,eval_MSE=False) best_candidate_idx = np.argmax(predictions) best_candidate = rand_candidates[best_candidate_idx] if(verbose == 2): print 'GP Hopefully :', best_candidate, predictions[best_candidate_idx] elif(acquisition_function=='UCB'): predictions,MSE = gp.predict(rand_candidates,eval_MSE=True) upperBound = predictions + 1.96*np.sqrt(MSE) best_candidate_idx = np.argmax(upperBound) best_candidate = rand_candidates[best_candidate_idx] if(verbose == 2): print 'GP Hopefully :', best_candidate, predictions[best_candidate_idx], upperBound[best_candidate_idx] elif(acquisition_function=='EI'): predictions,MSE = gp.predict(rand_candidates,eval_MSE=True) y_best = np.max(Y) sigma = np.sqrt(MSE) ei = [ gp_compute_ei(predictions[i],sigma[i],y_best) \ for i in range(rand_candidates.shape[0]) ] best_candidate_idx = np.argmax(ei) best_candidate = rand_candidates[best_candidate_idx] if(verbose == 2): print 'GP Hopefully :', best_candidate, predictions[best_candidate_idx], upperBound[best_candidate_idx] elif(acquisition_function=='MaxLowerBound'): predictions,MSE = gp.predict(rand_candidates,eval_MSE=True) lowerBound = predictions - 1.96*np.sqrt(MSE) best_candidate_idx = np.argmax(lowerBound) best_candidate = rand_candidates[best_candidate_idx] if(verbose == 2): print 'GP Hopefully :', best_candidate, predictions[best_candidate_idx], lowerBound[best_candidate_idx] else: print('Acquisition function not handled...') return best_candidate
def gpo1d(objective): besto = 0.0 bestp = None D = 1 X = [] y = [] params = abs(rand(D)) * 10.0 X.append(params) y.append(objective([params, params])) params = abs(rand(D)) * 10.0 X.append(params) y.append(objective([params, params])) print "X = ", X print "y = ", y while(True): gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) gp.fit(X, y) #XX, YY = np.meshgrid(np.linspace(0, 10, 20), np.linspace(0, 10, 20)) #print XX XX = numpy.linspace(0, 10, 100) y_pred, mse = gp.predict(np.c_[XX], eval_MSE=True) sigma = np.sqrt(mse) #Z = np.array(Z) #Z = Z.reshape(XX.shape) pl.plot(X,y, 'xk') pl.plot(XX, y_pred, 'b-', label=u'Prediction') pl.fill(np.concatenate([XX, XX[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') #CS = pl.contour(XX, YY, Z, 20, colours='k') pl.show() # Find next point to evaluate # Evaluate and append to X and y for k in xrange(2): params = abs(rand(D)) * 10.0 X.append(params) y.append(objective([params, params])) return bestp
class GaussianProcess(object): def __init__(self, nugget=0.1): self.nugget = nugget def fit(self, chips): X = pandas.DataFrame([[chip.X, chip.Y] for chip in chips]) y = [chip.gnd for chip in chips] self.gp = GaussianProcess(nugget=self.nugget) self.gp.fit(X, y) def predict(self, chip): return self.gp.predict([chip.X, chip.Y])
def create_gp_and_fit(x, y, max_try=100): # この辺怪しい theta0 = 0.1 for i in range(max_try + 1): try: gp = GaussianProcess(theta0=theta0) gp.fit(x, y) return gp except Exception as e: theta0 *= 10 if i == max_try: print(theta0) raise e
def noisy(): X = np.linspace(0.1, 9.9, 20) X = np.atleast_2d(X).T # observations and noise y = f(X).ravel() dy = 0.5 + 1.0 * np.random.random(y.shape) noise = np.random.normal(0, dy) y += noise # mesh the input space for evaluation of the function & its prediction and MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # instantiate a gauss process gp = GaussianProcess(corr='squared_exponential', theta0=1e-1, thetaL=1e-3, thetaU=1, nugget=(dy / y)**2, random_start=100) # nugget specifies std of noise, Tikhonov regularization # allows robust recovery of underlying function # from noisy data # fit to GP using maximum likelihood estimation of params gp.fit(X, y) # make predictions on meshed x-axis y_pred, MSE = gp.predict(x, eval_MSE=True) sigma = np.sqrt(MSE) # plot fig = plt.figure() plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill( np.concatenate([x, x[::-1]]), # reverse order of x np.concatenate( [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=0.5, fc='b', ec='None', label='95% confidence interval') # shade, fill color, edge color plt.title('Noisy case') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') return
def test_mse_solving(): # test the MSE estimate to be sane. # non-regression test for ignoring off-diagonals of feature covariance, # testing with nugget that renders covariance useless, only # using the mean function, with low effective rank of data gp = GaussianProcess(corr='absolute_exponential', theta0=1e-4, thetaL=1e-12, thetaU=1e-2, nugget=1e-2, optimizer='Welch', regr="linear", random_state=0) X, y = make_regression(n_informative=3, n_features=60, noise=50, random_state=0, effective_rank=1) gp.fit(X, y) assert_greater(1000, gp.predict(X, eval_MSE=True)[1].mean())
class COGP(object): def __init__(self, func, initial, minimize=True, storeAllEvaluations=True, storeAllEvaluated=True, maxEvaluations=10): self.func = func self.X = np.array(initial) self.y = np.array([func(el) for el in initial]) LEFT = np.min(initial) RIGHT = np.max(initial) self.x = [i for i in itertools.product(np.arange(LEFT, RIGHT, 0.1), repeat=len(initial[0]))] self.fmin = np.min(self.y) self.argmin = self.X[np.argmin(self.y)] self.gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1) self.storeAllEvaluations = storeAllEvaluations self.storeAllEvaluated = storeAllEvaluated if storeAllEvaluations: self._allEvaluations = [] if storeAllEvaluated: self._allEvaluated = [] self.max_evaluations = maxEvaluations self.time = 0 def learn(self): wall_time = time.time() for step in xrange(self.max_evaluations): try: self.gp.fit(self.X, self.y) except: break y_pred, MSE = self.gp.predict(self.x, eval_MSE=True) s = (self.fmin-y_pred) / np.sqrt(MSE) argm = np.argmax(MSE * (s * norm.cdf(s) + norm.pdf(s))) self.X = np.vstack([self.X, self.x[argm]]) f = self.func(self.x[argm]) if self.storeAllEvaluations: self._allEvaluations.append(f) if self.storeAllEvaluated: self._allEvaluated.append(self.x[argm]) self.y = np.hstack([self.y, f]) if f < self.fmin: self.fmin = f self.argmin = self.x[argm] self.time = time.time() - wall_time return (np.array(self.argmin), self.fmin)
def decompose_model(timeseries, fcst_window): composed_df = pd.DataFrame() res_df = pd.DataFrame() res_test = sm.seasonal_decompose(timeseries.dropna(), two_sided=False) composed_df['trend'] = res_test.trend.dropna() composed_df['seasonal'] = res_test.seasonal.dropna() composed_df['residual'] = res_test.resid.dropna() # create date index for the output data frame date_rng = pd.date_range(composed_df.index[len(composed_df) - 1] + DateOffset(months=1), periods=fcst_window, freq='MS') res_df['Date'] = pd.to_datetime(date_rng, errors='coerce') res_df = res_df.sort_values(by='Date') res_df = res_df.set_index('Date') # predict the residual component resid_mean = composed_df['residual'].mean() res_df['Residual'] = resid_mean # predict the seasonal component last_year = date_rng[0].year - 1 last_year_rng = pd.date_range(date(last_year, 1, 1), periods=12, freq='MS') seas_data = composed_df.loc[composed_df.index.isin(last_year_rng)].seasonal seas_val = list() for i in range(fcst_window): seas_val.append(seas_data[res_df.index[i].month - 1]) res_df['Seasonal'] = seas_val # predict the trend component (Gaussian Process) x_fit = (composed_df.index - composed_df.index[0]).days.tolist() x_test = (res_df.index - composed_df.index[0]).days.tolist() x_fit_np = np.asarray(x_fit).reshape((-1, 1)) x_test_np = np.asarray(x_test).reshape((-1, 1)) y_fit = composed_df['trend'].values y_fit_np = np.asarray(y_fit).reshape((-1, 1)) gpr = GaussianProcess(corr='cubic', regr='linear', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) gpr.fit(x_fit_np, y_fit_np) y_gpr = gpr.predict(x_test_np) res_df['Trend'] = y_gpr res_df['Total'] = res_df.sum(axis=1) res_df.loc[res_df['Total'] < 0, 'Total'] = 0 return res_df
def buildInterpolator(obs_arr, cosmo_params, function='multiquadric', smooth=0.0): #, regr='linear', corr='cubic'): '''Build an interpolator: input: obs_arr = (points, Nbin), where # of points = # of models cosmo_params = (points, Nparams), currently Nparams is hard-coded to be 3 (om,w,si8) output: spline_interps Usage: spline_interps[ibin](im, wm, sm) ''' if function == 'GP': gp = GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) gp.fit(cosmo_params, obs_arr) out = lambda x: (gp.predict(x.reshape(1, -1))).flatten() return out else: m, w, s = cosmo_params.T spline_interps = list() for ibin in range(obs_arr.shape[-1]): model = obs_arr[:, ibin] iinterp = interpolate.Rbf(m, w, s, model, function=function, smooth=smooth) spline_interps.append(iinterp) #return spline_interps def interp_cosmo(params): '''Interpolate the powspec for certain param. Params: list of 3 parameters = (om, w, si8) Method: "multiquadric" for spline (default), and "GP" for Gaussian process. ''' mm, wm, sm = params gen_ps = lambda ibin: spline_interps[ibin](mm, wm, sm) ps_interp = array(map(gen_ps, range(obs_arr.shape[-1]))) ps_interp = ps_interp.reshape(-1, 1).squeeze() return ps_interp return interp_cosmo
def GaussianProcesses(data, label, pred_data, pred_last): '''memory not enough ''' data = np.array(data) pred_data = np.array(pred_data) label = np.array(label) pred_last = np.array(pred_last) from sklearn.gaussian_process import GaussianProcess gp = GaussianProcess(theta0=5e-1) print data print data.shape print label print label.shape gp.fit(data[:, 8:14], label) return data
def noiseless(): X = np.atleast_2d([ 1., 3., 5., 6., 7., 8., ]).T # observations y = f(X).ravel() # reshape to 1-D # mesh the input space to cover all points to predict f(x) and the MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # and flatten # instantiate gauss process gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) # fit to data using maximum likelihood estimation of params gp.fit(X, y) # make predictions on meshed x-axis, also return MSE y_pred, MSE = gp.predict(x, eval_MSE=True) sigma = np.sqrt(MSE) # plot fig = plt.figure() plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') # fill the space between the +/-MSE plt.fill( np.concatenate([x, x[::-1]]), # reverse order of x np.concatenate( [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=0.5, fc='b', ec='None', label='95% confidence interval') # shade, fill color, edge color plt.title('Noiseless case') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') return
class robot_speed_gp(): def __init__(self, num_in_samples=2, sampling_function=uniform_random_sampling): self.gp = GaussianProcess(corr='squared_exponential', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, random_start=100) self.results_hash = {} self.observations = [] self.samples = [] self.num_in_samples = num_in_samples self.sampling_function = sampling_function self.model_robot_results = {} self.generateX_AND_getY_for_training() def generateX_AND_getY_for_training(self): for i in range(self.num_in_samples): # generated_parameters = uniform_random_sampling() generated_parameters = self.sampling_function() self.results_hash[",".join(generated_parameters)] = 0 # Run the tests on the simulator in parallel pool = multiprocessing.Pool(processes=multiprocessing.cpu_count( )) # This will use the max number of processes available self.results_hash = dict( pool.map(robot_problem, self.results_hash.keys())) for item in self.results_hash: self.samples.append(np.array(map(float, item.split(",")))) self.observations.append(self.results_hash[item]) def fit_gp(self): assert (len(self.samples) > 0) assert (len(self.observations) > 0) self.gp.fit(self.samples, self.observations) def test_gp(self, test_samples): y_pred, MSE = self.gp.predict(test_samples, eval_MSE=True) sigma = np.sqrt(MSE) for i in range(len(test_samples)): self.model_robot_results[",".join(map( str, test_samples[i]))] = y_pred[i] return self.model_robot_results