def fitGP(cls, X, Y, **kwargs): ''' Fits a gaussian process of X to each variable in Y and returns a SurrogateModel instance ''' models = [] for y in Y.T: gp = GaussianProcess(**kwargs) gp.fit(X, y) models.append(gp) return cls(models)
# The function to predict f = lambda x: x * np.sin(x) # The design of experiments X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T # Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # Instanciate a Gaussian Process model gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, \ random_start=100) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, MSE = gp.predict(x, eval_MSE=True) sigma = np.sqrt(MSE) # Plot the function, the prediction and the 95% confidence interval based on # the MSE fig = pl.figure() pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') pl.plot(X, y, 'r.', markersize=10, label=u'Observations') pl.plot(x, y_pred, 'b-', label=u'Prediction') pl.fill(np.concatenate([x, x[::-1]]), \
import numpy as np from scikits.learn import datasets from scikits.learn.gaussian_process import GaussianProcess from scikits.learn.cross_val import cross_val_score, KFold from scikits.learn.metrics import r2_score # Load the dataset from scikits' data sets diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target # Instanciate a GP model gp = GaussianProcess(regr='constant', corr='absolute_exponential', theta0=[1e-4] * 10, thetaL=[1e-12] * 10, thetaU=[1e-2] * 10, nugget=1e-2, optimizer='Welch') # Fit the GP model to the data performing maximum likelihood estimation gp.fit(X, y) # Deactivate maximum likelihood estimation for the cross-validation loop gp.theta0 = gp.theta # Given correlation parameter = MLE gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE # Perform a cross-validation estimate of the coefficient of determination using # the cross_val module using all CPUs available on the machine K = 20 # folds R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=-1).mean()
# License: BSD style from scikits.learn import datasets, cross_val, metrics from scikits.learn.gaussian_process import GaussianProcess from matplotlib import pyplot as pl # Print the docstring print __doc__ # Load the dataset from scikits' data sets diabetes = datasets.load_diabetes() X, y = diabetes['data'], diabetes['target'] # Instanciate a GP model gp = GaussianProcess(regr='constant', corr='absolute_exponential', theta0=[1e-4] * 10, thetaL=[1e-12] * 10, thetaU=[1e-2] * 10, nugget=1e-2, optimizer='Welch', verbose=False) # Fit the GP model to the data gp.fit(X, y) gp.theta0 = gp.theta gp.thetaL = None gp.thetaU = None gp.verbose = False # Estimate the leave-one-out predictions using the cross_val module n_jobs = 2 # the distributing capacity available on the machine y_pred = y + cross_val.cross_val_score(gp, X, y=y, cv=cross_val.LeaveOneOut(y.size), n_jobs=n_jobs, ).ravel()
"""The function to predict (classification will then consist in predicting whether g(x) <= 0 or not)""" return 5. - x[:, 1] - .5 * x[:, 0]**2. # Design of experiments X = np.array([[-4.61611719, -6.00099547], [4.10469096, 5.32782448], [0.00000000, -0.50000000], [-6.17289014, -4.6984743], [1.3109306, -6.93271427], [-5.03823144, 3.10584743], [-2.87600388, 6.74310541], [5.21301203, 4.26386883]]) # Observations y = g(X) # Instanciate and fit Gaussian Process Model gp = GaussianProcess(theta0=5e-1) # Don't perform MLE or you'll get a perfect prediction for this simple example! gp.fit(X, y) # Evaluate real function, the prediction and its MSE on a grid res = 50 x1, x2 = np.meshgrid(np.linspace(- lim, lim, res), \ np.linspace(- lim, lim, res)) xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T y_true = g(xx) y_pred, MSE = gp.predict(xx, eval_MSE=True) sigma = np.sqrt(MSE) y_true = y_true.reshape((res, res)) y_pred = y_pred.reshape((res, res))
# License: BSD style from scikits.learn import datasets from scikits.learn.gaussian_process import GaussianProcess from matplotlib import pyplot as pl # Print the docstring print __doc__ # Load the dataset from scikits' data sets diabetes = datasets.load_diabetes() X, y = diabetes['data'], diabetes['target'] # Instanciate a GP model gp = GaussianProcess(regr='constant', corr='absolute_exponential', theta0=[1e-4] * 10, thetaL=[1e-12] * 10, thetaU=[1e-2] * 10, nugget=1e-2, optimizer='Welch', verbose=True) # Fit the GP model to the data gp.fit(X, y) # Estimate the leave-one-out coefficient of determination score Q2, y_pred = gp.score(return_predictions=True) # Goodness-of-fit plot pl.figure() pl.title('Goodness-of-fit plot (Q2 = %1.2e)' % Q2) pl.plot(y, y_pred, 'r.', label='Leave-one-out') pl.plot(y, gp.predict(X), 'k.', label='Whole dataset (nugget=1e-2)') pl.plot([y.min(), y.max()], [y.min(), y.max()], 'k--') pl.xlabel('Observations')
"""The function to predict.""" return x * np.sin(x) # The design of experiments X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T # Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # Instanciate a Gaussian Process model gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, \ random_start=100) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, MSE = gp.predict(x, eval_MSE=True) sigma = np.sqrt(MSE) # Plot the function, the prediction and the 95% confidence interval based on # the MSE fig = pl.figure() pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') pl.plot(X, y, 'r.', markersize=10, label=u'Observations') pl.plot(x, y_pred, 'b-', label=u'Prediction') pl.fill(np.concatenate([x, x[::-1]]), \
print __doc__ # Author: Vincent Dubourg <*****@*****.**> # License: BSD style from scikits.learn import datasets from scikits.learn.gaussian_process import GaussianProcess from scikits.learn.cross_val import cross_val_score, KFold # Load the dataset from scikits' data sets diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target # Instanciate a GP model gp = GaussianProcess(regr='constant', corr='absolute_exponential', theta0=[1e-4] * 10, thetaL=[1e-12] * 10, thetaU=[1e-2] * 10, nugget=1e-2, optimizer='Welch') # Fit the GP model to the data performing maximum likelihood estimation gp.fit(X, y) # Deactivate maximum likelihood estimation for the cross-validation loop gp.theta0 = gp.theta # Given correlation parameter = MLE gp.thetaL, gp.thetaU = None, None # None bounds deactivate MLE # Perform a cross-validation estimate of the coefficient of determination using # the cross_val module using all CPUs available on the machine K = 20 # folds R2 = cross_val_score(gp, X, y=y, cv=KFold(y.size, K), n_jobs=-1).mean() print("The %d-Folds estimate of the coefficient of determination is R2 = %s" % (K, R2))