def test_lml_without_cloning_kernel(kernel): # Test that lml of optimized kernel is stored correctly. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) input_theta = np.ones(gpr.kernel_.theta.shape, dtype=np.float64) gpr.log_marginal_likelihood(input_theta, clone_kernel=False) assert_almost_equal(gpr.kernel_.theta, input_theta, 7)
def test_lml_gradient(kernel): # Compare analytic and numeric gradient of log marginal likelihood. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) lml, lml_gradient = gpr.log_marginal_likelihood(kernel.theta, True) lml_gradient_approx = \ approx_fprime(kernel.theta, lambda theta: gpr.log_marginal_likelihood(theta, False), 1e-10) assert_almost_equal(lml_gradient, lml_gradient_approx, 3)
def test_converged_to_local_maximum(kernel): # Test that we are in local maximum after hyperparameter-optimization. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) lml, lml_gradient = \ gpr.log_marginal_likelihood(gpr.kernel_.theta, True) assert np.all((np.abs(lml_gradient) < 1e-4) | (gpr.kernel_.theta == gpr.kernel_.bounds[:, 0]) | (gpr.kernel_.theta == gpr.kernel_.bounds[:, 1]))
def test_custom_optimizer(kernel): # Test that GPR can use externally defined optimizers. # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) theta_opt, func_min = \ initial_theta, obj_func(initial_theta, eval_gradient=False) for _ in range(50): theta = np.atleast_1d( rng.uniform(np.maximum(-2, bounds[:, 0]), np.minimum(1, bounds[:, 1]))) f = obj_func(theta, eval_gradient=False) if f < func_min: theta_opt, func_min = theta, f return theta_opt, func_min gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer) gpr.fit(X, y) # Checks that optimizer improved marginal likelihood assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) > gpr.log_marginal_likelihood(gpr.kernel.theta))
def test_random_starts(): # Test that an increasing number of random-starts of GP fitting only # increases the log marginal likelihood of the chosen theta. n_samples, n_features = 25, 2 rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) * 2 - 1 y = np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1) \ + rng.normal(scale=0.1, size=n_samples) kernel = C(1.0, (1e-2, 1e2)) \ * RBF(length_scale=[1.0] * n_features, length_scale_bounds=[(1e-4, 1e+2)] * n_features) \ + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-5, 1e1)) last_lml = -np.inf for n_restarts_optimizer in range(5): gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, random_state=0, ).fit(X, y) lml = gp.log_marginal_likelihood(gp.kernel_.theta) assert lml > last_lml - np.finfo(np.float32).eps last_lml = lml
# medium term irregularity k3 = 0.66**2 \ * RationalQuadratic(length_scale=1.2, alpha=0.78) k4 = 0.18**2 * RBF(length_scale=0.134) \ + WhiteKernel(noise_level=0.19**2) # noise terms kernel_gpml = k1 + k2 + k3 + k4 gp = GaussianProcessRegressor(kernel=kernel_gpml, alpha=0, optimizer=None, normalize_y=True) gp.fit(X, y) print("GPML kernel: %s" % gp.kernel_) print("Log-marginal-likelihood: %.3f" % gp.log_marginal_likelihood(gp.kernel_.theta)) # Kernel with optimized parameters k1 = 50.0**2 * RBF(length_scale=50.0) # long term smooth rising trend k2 = 2.0**2 * RBF(length_scale=100.0) \ * ExpSineSquared(length_scale=1.0, periodicity=1.0, periodicity_bounds="fixed") # seasonal component # medium term irregularities k3 = 0.5**2 * RationalQuadratic(length_scale=1.0, alpha=1.0) k4 = 0.1**2 * RBF(length_scale=0.1) \ + WhiteKernel(noise_level=0.1**2, noise_level_bounds=(1e-3, np.inf)) # noise terms kernel = k1 + k2 + k3 + k4 gp = GaussianProcessRegressor(kernel=kernel, alpha=0, normalize_y=True) gp.fit(X, y)
def test_lml_precomputed(kernel): # Test that lml of optimized kernel is stored correctly. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert (gpr.log_marginal_likelihood( gpr.kernel_.theta) == gpr.log_marginal_likelihood())
def test_lml_improving(kernel): # Test that hyperparameter-tuning improves log-marginal likelihood. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) > gpr.log_marginal_likelihood(kernel.theta))
plt.figure() kernel = 1.0 * RBF(length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, y) X_ = np.linspace(0, 5, 100) y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True) plt.plot(X_, y_mean, 'k', lw=3, zorder=9) plt.fill_between(X_, y_mean - np.sqrt(np.diag(y_cov)), y_mean + np.sqrt(np.diag(y_cov)), alpha=0.5, color='k') plt.plot(X_, 0.5 * np.sin(3 * X_), 'r', lw=3, zorder=9) plt.scatter(X[:, 0], y, c='r', s=50, zorder=10, edgecolors=(0, 0, 0)) plt.title("Initial: %s\nOptimum: %s\nLog-Marginal-Likelihood: %s" % (kernel, gp.kernel_, gp.log_marginal_likelihood(gp.kernel_.theta))) plt.tight_layout() # Second run plt.figure() kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e+1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, y) X_ = np.linspace(0, 5, 100) y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True) plt.plot(X_, y_mean, 'k', lw=3, zorder=9) plt.fill_between(X_, y_mean - np.sqrt(np.diag(y_cov)), y_mean + np.sqrt(np.diag(y_cov)), alpha=0.5, color='k')