def __call__(self, theta, X=None): """ Compute correlation for given correlation parameter(s) theta. Parameters ---------- theta : array_like An array giving the autocorrelation parameter(s). X : array_like, shape(n_eval, n_features) An array containing the n_eval query points whose correlation with the training datapoints shall be computed. If None, autocorrelation of the training datapoints is computed instead. Returns ------- r : array_like, shape=(n_eval, n_samples) if X != None (n_samples, n_samples) if X == None An array containing the values of the correlation model. """ # Parse theta into its components theta_gp, theta_l, length_scales, nu = self._parse_theta(theta) # Train length-scale Gaussian Process from skgp.estimators import GaussianProcess self.gp_l = \ GaussianProcess(corr="matern_1.5", theta0=theta_l).fit(self.X_, np.log10(length_scales)) l_train = 10**self.gp_l.predict(self.X) # Prepare distances and length scale information for any pair of # datapoints, whose correlation shall be computed if X is not None: # Get pairwise componentwise L1-differences to the input training # set d = X[:, np.newaxis, :] - self.X[np.newaxis, :, :] d = d.reshape((-1, X.shape[1])) # Predict length scales for query datapoints l_query = 10**self.gp_l.predict(X) l = np.transpose([np.tile(l_train, len(l_query)), np.repeat(l_query, len(l_train))]) else: # No external datapoints given; auto-correlation of training set # is used instead d = self.D l = l_train[self.ij] # Compute general Matern kernel if d.ndim > 1 and theta_gp.size == d.ndim: activation = np.sum(theta_gp.reshape(1, d.ndim) * d ** 2, axis=1) else: activation = theta_gp[0] * np.sum(d ** 2, axis=1) tmp = 0.5*(l**2).sum(1) tmp2 = np.maximum(2*np.sqrt(nu * activation / tmp), 1e-5) r = np.sqrt(l[:, 0]) * np.sqrt(l[:, 1]) / (gamma(nu) * 2**(nu - 1)) r /= np.sqrt(tmp) r *= tmp2**nu * kv(nu, tmp2) # Convert correlations to 2d matrix if X is not None: return r.reshape(-1, self.n_samples) else: # exploit symmetry of auto-correlation R = np.eye(self.n_samples) * (1. + self.nugget) R[self.ij[:, 0], self.ij[:, 1]] = r R[self.ij[:, 1], self.ij[:, 0]] = r return R
by ARD. Furthermore, the values x in R^3 and x + \alpha (1, 2 , 0) + \beta (1, 0, 2) have the same value for all x and all alpha and beta. This can be exploited by FAD. """ return np.tanh(2 * X[:, 0] - X[:, 1] - X[:, 2]) Xtrain = np.random.random((100, 6)) * 2 - 1 ytrain = f(Xtrain) plt.figure() colors = ['r', 'g', 'b', 'c', 'm'] labels = {True: "Bayesian GP", False: "Standard GP"} for i, bayesian in enumerate(labels.keys()): model = GaussianProcess(corr='squared_exponential', theta0=[1.0] * 12, thetaL=[1e-4] * 12, thetaU=[1e2] * 12) if bayesian: model = BayesianGaussianProcess(model, n_posterior_samples=25, n_burnin=250, n_sampling_steps=25) train_sizes, train_scores, test_scores = \ learning_curve(model, Xtrain, ytrain, scoring="mean_squared_error", cv=10, n_jobs=1) test_scores = -test_scores # Scores correspond to negative MSE test_scores_mean = np.mean(test_scores, axis=1) test_scores_min = np.min(test_scores, axis=1) test_scores_max = np.max(test_scores, axis=1)
Xtrain = np.random.random((200, 4)) * 2 - 1 ytrain = f(Xtrain) plt.figure() colors = ['r', 'g', 'b', 'c', 'm'] labels = { 1: "Isotropic", 4: "Automatic Relevance Determination", 8: "Factor Analysis" } for i, n in enumerate(labels.keys()): train_sizes, train_scores, test_scores = \ learning_curve(GaussianProcess(corr='squared_exponential', theta0=[1.0] * n, thetaL=[1e-4] * n, thetaU=[1e2] * n), Xtrain, ytrain, scoring="mean_squared_error", cv=10, n_jobs=4) test_scores = -test_scores # Scores correspond to negative MSE test_scores_mean = np.mean(test_scores, axis=1) test_scores_min = np.min(test_scores, axis=1) test_scores_max = np.max(test_scores, axis=1) plt.plot(train_sizes, test_scores_mean, label=labels[n], color=colors[i]) plt.fill_between(train_sizes, test_scores_min, test_scores_max, alpha=0.2, color=colors[i])
#---------------------------------------------------------------------- # Actual test data X = np.random.random(50)[:, None] * 4 - 2 # Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(-2, 2, 1000)).T # Instanciate one Gaussian Process model for the stationary Matern kernel and # one for the non-stationary one gp_stationary = \ GaussianProcess(corr='matern_1.5', theta0=1e0, thetaL=1e-2, thetaU=1e+2, random_start=100) gp_non_stationary = \ GaussianProcess(corr=NonStationaryCorrelation(), theta0=1e0, thetaL=1e-2, thetaU=1e+2, random_start=100) # Fit to data using Maximum Likelihood Estimation of the parameters gp_stationary.fit(X, y) gp_non_stationary.fit(X, y) print("Theta:\n\tStationary: {:.3f} \t Non-stationary: {:.3f}".format( gp_stationary.theta_[0], gp_non_stationary.theta_[0])) print("Posterior probability (negative, average, log):\n\t" "Stationary: {:.5f} \t Non-stationary: {:.5f}".format( gp_stationary.posterior_function_value_, gp_non_stationary.posterior_function_value_))