def test_cmaes(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X[:, 0])[:, np.newaxis] kernel = george.kernels.Matern52Kernel(np.ones([1, 1]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = PosteriorMeanOptimization(model, X_lower, X_upper, method="cmaes") startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
def test_cmaes(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X[:, 0])[:, np.newaxis] kernel = george.kernels.Matern52Kernel(np.ones([1, 1]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper, method="cmaes") startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
def setUp(self): self.task = TestTask() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel( np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) cost_noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) cost_kernel = 3000 * (cost_kernel + cost_noise_kernel) prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y, C = self.task.evaluate(X) model.train(X, Y, do_optimize=False) cost_model.train(X, C, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost( model, cost_model, self.task.X_lower, self.task.X_upper, self.task.is_env) self.acquisition_func.update(model, cost_model)
class WithinModelComparison(BaseTask): def __init__(self, seed=42): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) rng = np.random.RandomState(seed) cov_amp = 1.0 mat_kernel = george.kernels.Matern52Kernel(np.ones([2]) * 0.1, ndim=2) kernel = cov_amp * mat_kernel self.xstar = rng.rand(1000, 2) K = kernel.value(self.xstar) L = sla.cholesky(K) sigma = rng.randn(1000) self.f = np.dot(L, sigma) self.gp = GaussianProcess(kernel, yerr=0.0) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) best = np.argmin(self.f) fopt = self.f[best] opt = self.xstar[best] super(WithinModelComparison, self).__init__(X_lower, X_upper, opt, fopt) def objective_function(self, x): noise = 1e-3 * np.random.randn() mu, _ = self.gp.predict(x) return mu + noise def evaluate_test(self, x): return self.objective_function(x)
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestObservation(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
def setUp(self): self.task = TestTask() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) cost_noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) cost_kernel = 3000 * (cost_kernel + cost_noise_kernel) prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y, C = self.task.evaluate(X) model.train(X, Y, do_optimize=False) cost_model.train(X, C, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost(model, cost_model, self.task.X_lower, self.task.X_upper, self.task.is_env) self.acquisition_func.update(model, cost_model)
class WithinModelComparison(BaseTask): def __init__(self, seed=42): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) rng = np.random.RandomState(seed) cov_amp = 1.0 mat_kernel = george.kernels.Matern52Kernel(np.ones([2]) * 0.1, ndim=2) kernel = cov_amp * mat_kernel self.xstar = rng.rand(1000, 2) K = kernel.value(self.xstar) L = sla.cholesky(K) sigma = rng.randn(1000) self.f = np.dot(L, sigma) self.gp = GaussianProcess(kernel, yerr=0.0) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) best = np.argmin(self.f) fopt = self.f[best] opt = self.xstar[best] super(WithinModelComparison, self).__init__(X_lower, X_upper, opt, fopt) def objective_function(self, x): noise = 1e-3 * np.random.randn() mu, _ = self.gp.predict(x) return mu + noise def evaluate_test(self, x): return self.objective_function(x)
class Test(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost(model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model) def test_sampling_representer_points(self): # Check if representer points are inside the configuration subspace assert np.any(self.acquisition_func.zb[:, self.is_env == 1] == self.acquisition_func.upper[self.is_env == 1]) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1
class Test(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost( model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model) def test_sampling_representer_points(self): # Check if representer points are inside the configuration subspace assert np.any(self.acquisition_func.zb[:, self.is_env == 1] == self.acquisition_func.upper[self.is_env == 1]) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1
def setUp(self): self.task = SinFunction() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y = self.task.evaluate(X) model.train(X, Y, do_optimize=False) self.acquisition_func = InformationGainMC(model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) self.acquisition_func.update(model)
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost(model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model)
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost( model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model)
def test(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) is_env = np.array([0, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X)[:, None, 0] kernel = george.kernels.Matern52Kernel(np.ones([2]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestProjectedObservation(model, X_lower, X_upper, is_env) inc, inc_val = rec.estimate_incumbent() # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])]) # Check if incumbent is the correct point b = np.argmin(Y) x_best = X[b, None, :] assert np.all(inc[:, is_env==0] == x_best[:, is_env==0])
def test(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) is_env = np.array([0, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X)[:, None, 0] kernel = george.kernels.Matern52Kernel(np.ones([2]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestProjectedObservation(model, X_lower, X_upper, is_env) inc, inc_val = rec.estimate_incumbent() # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any( [np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any( [np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])]) # Check if incumbent is the correct point b = np.argmin(Y) x_best = X[b, None, :] assert np.all(inc[:, is_env == 0] == x_best[:, is_env == 0])
class TestInformationGain(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) self.acquisition_func = InformationGain(self.model, np.zeros([2]), np.ones([2])) self.acquisition_func.update(self.model) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1 def test_sampling_representer_points(self): # Check if representer points are inside the bounds assert np.any(self.acquisition_func.zb >= self.acquisition_func.lower) assert np.any(self.acquisition_func.zb <= self.acquisition_func.upper) def test_compute_pmin(self): # Uniform distribution m = np.ones([self.acquisition_func.Nb]) v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin.shape[0] == self.acquisition_func.Nb assert np.any(pmin < (uprob + 0.03)) and np.any(pmin > uprob - 0.01) # Dirac delta m = np.ones([self.acquisition_func.Nb]) * 1000 m[0] = 1 v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin[0] == 1.0 assert np.any(pmin[:1] > 1e-10) def test_innovations(self): # Case 1: Assume no influence of test point on representer points rep = np.array([[1.0]]) x = np.array([[0.0]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) < 1e-3) assert np.any(np.abs(dv) < 1e-3) # Case 2: Test point is close to representer points rep = np.array([[1.0]]) x = np.array([[0.99]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) > 1e-3) assert np.any(np.abs(dv) > 1e-3)
class TestGaussianProcess(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcess(self.kernel, prior=prior, normalize_input=False, normalize_output=False) self.model.train(self.X, self.y, do_optimize=False) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] m, v = self.model.predict(X_test, full_cov=True) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 2 assert v.shape[0] == X_test.shape[0] assert v.shape[1] == X_test.shape[0] K_zz = self.kernel.value(X_test) K_zx = self.kernel.value(X_test, self.X) K_nz = self.kernel.value(self.X) + self.model.noise * np.eye(self.X.shape[0]) inv = spla.inv(K_nz) K_zz_x = K_zz - np.dot(K_zx, np.inner(inv, K_zx)) assert np.mean((K_zz_x - v)**2) < 10e-5 def test_sample_function(self): X_test = np.random.rand(8, 2) n_funcs = 3 funcs = self.model.sample_functions(X_test, n_funcs=n_funcs) assert len(funcs.shape) == 2 assert funcs.shape[0] == n_funcs assert funcs.shape[1] == X_test.shape[0] def test_predict_variance(self): x_test1 = np.random.rand(1, 2) x_test2 = np.random.rand(10, 2) var = self.model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == x_test1.shape[0] def test_nll(self): theta = np.array([0.2, 0.2, 0.001]) nll = self.model.nll(theta) def test_optimize(self): theta = self.model.optimize() # Hyperparameters are 2 length scales + noise assert theta.shape[0] == 3 def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
class TestGaussianProcess(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcess(self.kernel, prior=prior, normalize_input=False, normalize_output=False) self.model.train(self.X, self.y, do_optimize=False) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] m, v = self.model.predict(X_test, full_cov=True) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 2 assert v.shape[0] == X_test.shape[0] assert v.shape[1] == X_test.shape[0] K_zz = self.kernel.value(X_test) K_zx = self.kernel.value(X_test, self.X) K_nz = self.kernel.value( self.X) + self.model.noise * np.eye(self.X.shape[0]) inv = spla.inv(K_nz) K_zz_x = K_zz - np.dot(K_zx, np.inner(inv, K_zx)) assert np.mean((K_zz_x - v)**2) < 10e-5 def test_sample_function(self): X_test = np.random.rand(8, 2) n_funcs = 3 funcs = self.model.sample_functions(X_test, n_funcs=n_funcs) assert len(funcs.shape) == 2 assert funcs.shape[0] == n_funcs assert funcs.shape[1] == X_test.shape[0] def test_predict_variance(self): x_test1 = np.random.rand(1, 2) x_test2 = np.random.rand(10, 2) var = self.model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == x_test1.shape[0] def test_nll(self): theta = np.array([0.2, 0.2, 0.001]) nll = self.model.nll(theta) def test_optimize(self): theta = self.model.optimize() # Hyperparameters are 2 length scales + noise assert theta.shape[0] == 3 def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
def train(self, X, y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y) if self.y_std == 0: raise ValueError("Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) sampler.random_state = self.rng.get_state() # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] #TODO: check gradients # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test1 = np.array([np.random.rand(1)]) x_test2 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
def train(self, X, Y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and Y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. Y: np.ndarray (N, 1) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ self.X = X # For EnvES we transform s to (1 - s)^2 if self.basis_func is not None: self.X = deepcopy(X) self.X[:, self.dim] = self.basis_func(self.X[:, self.dim]) self.Y = Y # Use the mean of the data as mean for the GP mean = np.mean(Y, axis=0) self.gp = george.GP(self.kernel, mean=mean) # Precompute the covariance yerr = 1e-25 while (True): try: self.gp.compute(self.X, yerr=yerr) break except np.linalg.LinAlgError: yerr *= 10 logging.error( "Cholesky decomposition for the covariance matrix of \ the GP failed. \ Add %s noise on the diagonal." % yerr) if do_optimize: # We have one walker for each hyperparameter configuration self.sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars), self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = self.sampler.chain[:, -1] self.models = [] logging.info("Hypers: %s" % self.hypers) for sample in self.hypers: # Instantiate a model for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample) model = GaussianProcess(kernel, basis_func=self.basis_func, dim=self.dim) model.train(self.X, self.Y, do_optimize=False) self.models.append(model) else: self.hypers = self.gp.kernel[:]
def train(self, X, Y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and Y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. Y: np.ndarray (N, 1) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ self.X = X # For EnvES we transform s to (1 - s)^2 if self.basis_func is not None: self.X = deepcopy(X) self.X[:, self.dim] = self.basis_func(self.X[:, self.dim]) self.Y = Y # Use the mean of the data as mean for the GP mean = np.mean(Y, axis=0) self.gp = george.GP(self.kernel, mean=mean) # Precompute the covariance yerr = 1e-25 while(True): try: self.gp.compute(self.X, yerr=yerr) break except np.linalg.LinAlgError: yerr *= 10 logging.error( "Cholesky decomposition for the covariance matrix of \ the GP failed. \ Add %s noise on the diagonal." % yerr) if do_optimize: # We have one walker for each hyperparameter configuration self.sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars), self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = self.sampler.chain[:, -1] self.models = [] logging.info("Hypers: %s" % self.hypers) for sample in self.hypers: # Instantiate a model for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample) model = GaussianProcess(kernel, basis_func=self.basis_func, dim=self.dim) model.train(self.X, self.Y, do_optimize=False) self.models.append(model) else: self.hypers = self.gp.kernel[:]
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] #TODO: check gradients # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test1 = np.array([np.random.rand(1)]) x_test2 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
def train(self, X, Y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and Y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. Y: np.ndarray (N, 1) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ self.X = X # For Fabolas we transform s to (1 - s)^2 if self.basis_func is not None: self.X = deepcopy(X) self.X[:, self.dim] = self.basis_func(self.X[:, self.dim]) self.Y = Y if self.normalize_output: self.Y_mean = np.mean(Y) self.Y_std = np.std(Y) self.Y = (Y - self.Y_mean) / self.Y_std # Use the mean of the data as mean for the GP mean = np.mean(self.Y, axis=0) self.gp = george.GP(self.kernel, mean=mean) if do_optimize: # We have one walker for each hyperparameter configuration self.sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = self.sampler.chain[:, -1] self.models = [] else: self.hypers = [self.gp.kernel[:]] for sample in self.hypers: # Instantiate a model for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, basis_func=self.basis_func, dim=self.dim, normalize_output=self.normalize_output, noise=noise) model.train(X, Y, do_optimize=False) self.models.append(model)
def train(self, X, y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y) if self.y_std == 0: raise ValueError("Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = np.random.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True
class TestInformationGain(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) self.acquisition_func = InformationGain(self.model, np.zeros([2]), np.ones([2])) self.acquisition_func.update(self.model) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1 def test_sampling_representer_points(self): # Check if representer points are inside the bounds assert np.any(self.acquisition_func.zb >= self.acquisition_func.lower) assert np.any(self.acquisition_func.zb <= self.acquisition_func.upper) def test_compute_pmin(self): # Uniform distribution m = np.ones([self.acquisition_func.Nb]) v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin.shape[0] == self.acquisition_func.Nb assert np.any(pmin < (uprob + 0.03)) and np.any(pmin > uprob - 0.01) # Dirac delta m = np.ones([self.acquisition_func.Nb]) * 1000 m[0] = 1 v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin[0] == 1.0 assert np.any(pmin[:1] > 1e-10) def test_innovations(self): # Case 1: Assume no influence of test point on representer points rep = np.array([[1.0]]) x = np.array([[0.0]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) < 1e-3) assert np.any(np.abs(dv) < 1e-3) # Case 2: Test point is close to representer points rep = np.array([[1.0]]) x = np.array([[0.99]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) > 1e-3) assert np.any(np.abs(dv) > 1e-3)