def test_matern52_encoding(): kernel = Matern52(dimension=2, ARD=True) assert isinstance(kernel.encoding, LogarithmScalarEncoding) assert isinstance(kernel.squared_distance.encoding, LogarithmScalarEncoding) assert kernel.encoding.dimension == 1 assert kernel.squared_distance.encoding.dimension == 2 kernel = Matern52(dimension=2, ARD=True, encoding_type="positive") assert isinstance(kernel.encoding, PositiveScalarEncoding) assert isinstance(kernel.squared_distance.encoding, PositiveScalarEncoding) assert kernel.encoding.dimension == 1 assert kernel.squared_distance.encoding.dimension == 2
def test_gp_regression_with_warping(): def f(x): return np.sin(3*np.log(x)) np.random.seed(7) L, U = -5., 12. input_range = (2.**L, 2.**U) x_train = np.sort(2.**np.random.uniform(L, U, 250)) x_test = np.sort(2.**np.random.uniform(L, U, 500)) y_train = f(x_train) y_test = f(x_test) # to mx.nd y_train_mx_nd = mx.nd.array(y_train) x_train_mx_nd = mx.nd.array(x_train) x_test_mx_nd = mx.nd.array(x_test) kernels = [ Matern52(dimension=1), WarpedKernel( kernel=Matern52(dimension=1), warping=Warping(dimension=1, index_to_range={0: input_range}) ) ] models = [GaussianProcessRegression(kernel=k, random_seed=0) for k in kernels] train_errors, test_errors = [], [] for model in models: model.fit(x_train_mx_nd, y_train_mx_nd) mu_train, var_train = model.predict(x_train_mx_nd)[0] mu_test, var_test = model.predict(x_test_mx_nd)[0] # back to np.array mu_train = mu_train.asnumpy() mu_test = mu_test.asnumpy() # var_train = var_train.asnumpy() # var_test = var_test.asnumpy() train_errors.append(np.mean(np.abs((mu_train - y_train)))) test_errors.append(np.mean(np.abs((mu_test - y_test)))) # The two models have similar performance on training points np.testing.assert_almost_equal(train_errors[0], train_errors[1], decimal=4) # As expected, the model with warping largely outperforms the model without assert test_errors[1] < 0.1 * test_errors[0]
def test_likelihood_encoding(): mean = ScalarMeanFunction() kernel = Matern52(dimension=1) likelihood = MarginalLikelihood(mean=mean, kernel=kernel) assert isinstance(likelihood.encoding, LogarithmScalarEncoding) likelihood = MarginalLikelihood(mean=mean, kernel=kernel, encoding_type="positive") assert isinstance(likelihood.encoding, PositiveScalarEncoding)
def test_matern52_unit_scale(): X = mx.nd.array([[1, 0], [0, 1]], dtype=DATA_TYPE) kernel = Matern52(dimension=2) assert kernel.ARD == False kernel.collect_params().initialize() K = kernel(X, X).asnumpy() expected_K = np.array([[mater52(0.0), mater52(2.0)], [mater52(2.0), mater52(0.0)]]) np.testing.assert_almost_equal(expected_K, K)
def test_set_gp_hps(): mean = ScalarMeanFunction() kernel = Matern52(dimension=1) warping = Warping(dimension=1, index_to_range={0: (-4., 4.)}) warped_kernel = WarpedKernel(kernel=kernel, warping=warping) likelihood = MarginalLikelihood(kernel=warped_kernel, mean=mean, initial_noise_variance=1e-6) likelihood.initialize(ctx=mx.cpu(), force_reinit=True) likelihood.hybridize() hp_values = np.array([1e-2, 1.0, 0.5, 0.3, 0.2, 1.1]) _set_gp_hps(hp_values, likelihood) np.testing.assert_array_almost_equal(hp_values, _get_gp_hps(likelihood))
def test_get_gp_hps(): mean = ScalarMeanFunction() kernel = Matern52(dimension=1) warping = Warping(dimension=1, index_to_range={0: (-4., 4.)}) warped_kernel = WarpedKernel(kernel=kernel, warping=warping) likelihood = MarginalLikelihood(kernel=warped_kernel, mean=mean, initial_noise_variance=1e-6) likelihood.initialize(ctx=mx.cpu(), force_reinit=True) likelihood.hybridize() hp_values = _get_gp_hps(likelihood) # the oder of hps are noise, mean, covariance scale, bandwidth, warping a, warping b np.testing.assert_array_almost_equal(hp_values, np.array([1e-6, 0.0, 1.0, 1.0, 1.0, 1.0]))
def test_matern52_wrongshape(): kernel = Matern52(dimension=3) kernel.collect_params().initialize() X1 = mx.nd.random_normal(0.0, 1.0, shape=(5, 2)) with pytest.raises(mx.base.MXNetError): kmat = kernel(X1, X1) with pytest.raises(mx.base.MXNetError): kdiag = kernel.diagonal(mx.nd, X1) X2 = mx.nd.random_normal(0.0, 1.0, shape=(3, 3)) with pytest.raises(mx.base.MXNetError): kmat = kernel(X2, X1)
def test_gp_regression_2d_with_ard(): def f(x): # Only dependent on the first column of x return np.sin(x[:, 0]) / x[:, 0] np.random.seed(7) dimension = 3 # 30 train and test points in R^3 x_train = np.random.uniform(-5, 5, size=(30, dimension)) x_test = np.random.uniform(-5, 5, size=(30, dimension)) y_train = f(x_train) y_test = f(x_test) # to mx.nd y_train_mx_nd = mx.nd.array(y_train) x_train_mx_nd = mx.nd.array(x_train) x_test_mx_nd = mx.nd.array(x_test) model = GaussianProcessRegression( kernel=Matern52(dimension=dimension, ARD=True)) model.fit(x_train_mx_nd, y_train_mx_nd) # Check that the value of the residual noise variance learned by empirical Bayes is in the same order as the smallest allowed value (since there is no noise) noise_variance = model.likelihood.get_noise_variance() np.testing.assert_almost_equal(noise_variance, NOISE_VARIANCE_LOWER_BOUND) # Check that the bandwidths learned by empirical Bayes reflect the fact that only the first column is useful # In particular, for the useless dimensions indexed by {1,2}, the inverse bandwidths should be close to INVERSE_BANDWIDTHS_LOWER_BOUND # (or conversely, bandwidths should be close to their highest allowed values) sqd = model.likelihood.kernel.squared_distance inverse_bandwidths = sqd.encoding.get( mx.nd, sqd.inverse_bandwidths_internal.data()).asnumpy() assert inverse_bandwidths[0] > inverse_bandwidths[ 1] and inverse_bandwidths[0] > inverse_bandwidths[2] np.testing.assert_almost_equal(inverse_bandwidths[1], INVERSE_BANDWIDTHS_LOWER_BOUND) np.testing.assert_almost_equal(inverse_bandwidths[2], INVERSE_BANDWIDTHS_LOWER_BOUND) mu_train, _ = model.predict(x_train_mx_nd)[0] mu_test, _ = model.predict(x_test_mx_nd)[0] # back to np.array mu_train = mu_train.asnumpy() mu_test = mu_test.asnumpy() np.testing.assert_almost_equal(mu_train, y_train, decimal=2) # Fewer decimals imposed for the test points np.testing.assert_almost_equal(mu_test, y_test, decimal=1)
def test_incremental_update(): def f(x): return np.sin(x) / x np.random.seed(298424) std_noise = 0.01 for rep in range(10): model = GaussianProcessRegression(kernel=Matern52(dimension=1)) # Sample data num_train = np.random.randint(low=5, high=15) num_incr = np.random.randint(low=1, high=7) sizes = [num_train, num_incr] features = [] targets = [] for sz in sizes: feats = np.random.uniform(low=-1.0, high=1.0, size=sz).reshape((-1, 1)) features.append(feats) targs = f(feats) targs += np.random.normal(0.0, std_noise, size=targs.shape) targets.append(targs) # Posterior state by incremental updating train_features = to_nd(features[0]) train_targets = to_nd(targets[0]) model.fit(train_features, train_targets) noise_variance_1 = model.likelihood.get_noise_variance() state_incr = IncrementalUpdateGPPosteriorState( features=train_features, targets=train_targets, mean=model.likelihood.mean, kernel=model.likelihood.kernel, noise_variance=model.likelihood.get_noise_variance(as_ndarray=True)) for i in range(num_incr): state_incr = state_incr.update( to_nd(features[1][i].reshape((1, -1))), to_nd(targets[1][i].reshape((1, -1)))) noise_variance_2 = state_incr.noise_variance.asscalar() # Posterior state by direct computation state_comp = GaussProcPosteriorState( features=to_nd(np.concatenate(features, axis=0)), targets=to_nd(np.concatenate(targets, axis=0)), mean=model.likelihood.mean, kernel=model.likelihood.kernel, noise_variance=state_incr.noise_variance) # Compare them assert noise_variance_1 == noise_variance_2, \ "noise_variance_1 = {} != {} = noise_variance_2".format( noise_variance_1, noise_variance_2) chol_fact_incr = state_incr.chol_fact.asnumpy() chol_fact_comp = state_comp.chol_fact.asnumpy() np.testing.assert_almost_equal(chol_fact_incr, chol_fact_comp, decimal=2) pred_mat_incr = state_incr.pred_mat.asnumpy() pred_mat_comp = state_comp.pred_mat.asnumpy() np.testing.assert_almost_equal(pred_mat_incr, pred_mat_comp, decimal=2)
def test_matern52_ard(): X = mx.nd.array([[2., 1.], [1., 2.], [0., 1.]], dtype=DATA_TYPE) kernel = Matern52(dimension=2, ARD=True) kernel.collect_params().initialize() sqd = kernel.squared_distance assert kernel.ARD == True assert sqd.ARD == True sqd.encoding.set(sqd.inverse_bandwidths_internal, [1. / np.sqrt(2.), 1.]) K = kernel(X, X).asnumpy() # expected_D is taken from previous test about squared distances expected_D = np.array([[0., 3. / 2., 2.], [3. / 2., 0., 3. / 2.], [2.0, 3. / 2., 0.]]) expected_K = mater52(expected_D) np.testing.assert_almost_equal(expected_K, K)
def test_product_wrongshape(): kernel1 = Matern52(dimension=2) kernel1.collect_params().initialize() # A better way to do this is using the `pytest.mark.parametrize` # decorator. Keeping it simple for now. kernels = [Matern52(dimension=1), FabolasKernelFunction()] for kernel2 in kernels: kernel2.collect_params().initialize() kernel = ProductKernelFunction(kernel1, kernel2) X1 = mx.nd.random_normal(0.0, 1.0, shape=(5, 4)) with pytest.raises(mx.base.MXNetError): kmat = kernel(X1, X1) with pytest.raises(mx.base.MXNetError): kdiag = kernel.diagonal(mx.nd, X1) X2 = mx.nd.random_normal(0.0, 1.0, shape=(3, 3)) with pytest.raises(mx.base.MXNetError): kmat = kernel(X2, X1) X1 = mx.nd.random_normal(0.0, 1.0, shape=(5, 2)) with pytest.raises(mx.base.MXNetError): kmat = kernel(X1, X1)
def test_gp_regression_with_noise(): def f(x): return np.sin(x) / x np.random.seed(7) x_train = np.arange(-5, 5, 0.2) # [-5, -4.8, -4.6,..., 4.8] x_test = np.arange( -4.9, 5, 0.2 ) # [-4.9, -4.7, -4.5,..., 4.9], note that train and test points do not overlap y_train = f(x_train) y_test = f(x_test) std_noise = 0.01 noise_train = np.random.normal(0.0, std_noise, size=y_train.shape) # to mx.nd y_train_mx_nd = mx.nd.array(y_train) noise_train_mx_nd = mx.nd.array(noise_train) x_train_mx_nd = mx.nd.array(x_train) x_test_mx_nd = mx.nd.array(x_test) model = GaussianProcessRegression(kernel=Matern52(dimension=1)) model.fit(x_train_mx_nd, y_train_mx_nd + noise_train_mx_nd) # Check that the value of the residual noise variance learned by empirical Bayes is in the same order as std_noise^2 noise_variance = model.likelihood.get_noise_variance() np.testing.assert_almost_equal(noise_variance, std_noise**2, decimal=4) mu_train, _ = model.predict(x_train_mx_nd)[0] mu_test, _ = model.predict(x_test_mx_nd)[0] # back to np.array mu_train = mu_train.asnumpy() mu_test = mu_test.asnumpy() np.testing.assert_almost_equal(mu_train, y_train, decimal=2) np.testing.assert_almost_equal(mu_test, y_test, decimal=2)
def test_gp_regression_no_noise(): def f(x): return np.sin(x) / x x_train = np.arange(-5, 5, 0.2) # [-5,-4.8,-4.6,...,4.8] x_test = np.arange( -4.9, 5, 0.2 ) # [-4.9, -4.7, -4.5,...,4.9], note that train and test points do not overlap y_train = f(x_train) y_test = f(x_test) # to mx.nd y_train_mx_nd = mx.nd.array(y_train) x_train_mx_nd = mx.nd.array(x_train) x_test_mx_nd = mx.nd.array(x_test) model = GaussianProcessRegression(kernel=Matern52(dimension=1)) model.fit(x_train_mx_nd, y_train_mx_nd) # Check that the value of the residual noise variance learned by empirical Bayes is in the same order # as the smallest allowed value (since there is no noise) noise_variance = model.likelihood.get_noise_variance() np.testing.assert_almost_equal(noise_variance, NOISE_VARIANCE_LOWER_BOUND) mu_train, var_train = model.predict(x_train_mx_nd)[0] mu_test, var_test = model.predict(x_test_mx_nd)[0] # back to np.array mu_train = mu_train.asnumpy() mu_test = mu_test.asnumpy() var_train = var_train.asnumpy() var_test = var_test.asnumpy() np.testing.assert_almost_equal(mu_train, y_train, decimal=4) np.testing.assert_almost_equal(var_train, [0.0] * len(var_train), decimal=4) # Fewer decimals imposed for the test points np.testing.assert_almost_equal(mu_test, y_test, decimal=3)
def build_kernel(): return WarpedKernel( kernel=Matern52(dimension=1), warping=Warping(dimension=1, index_to_range={0: (-4., 4.)}) )