def temporal_kernel(self): kernel = White(variance=self.model_config['noise_inner']) m_inds = list(range(self.m)) # Initialize a non-linear kernels over inputs if self.model_config['input_nonlinear']: scales = [self.model_config['scale'] ] * self.m if self.model_config[ 'scale_tie'] else self.model_config['scale'] if self.model_config['rq']: kernel += RationalQuadratic(active_dims=m_inds, variance=1.0, lengthscales=scales, alpha=1e-2) else: kernel += SquaredExponential(active_dims=m_inds, variance=1.0, lengthscales=scales) # Add a periodic kernel over inputs # Decay????? if self.model_config['per']: scales = [self.model_config['per_scale']] * self.m periods = [self.model_config['per_period']] * self.m base_kernel = SquaredExponential(active_dims=m_inds, variance=1.0, lengthscales=scales) kernel += Periodic(base_kernel, period=periods) # Add a linear kernel over inputs if self.model_config['input_linear']: variances = [self.model_config['input_linear_scale']] * self.m kernel += LinearKernel(active_dims=m_inds, variance=variances) return kernel
def test_sample_conditional_mixedkernel(): q_mu = tf.random.uniform((Data.M, Data.L), dtype=tf.float64) # M x L q_sqrt = tf.convert_to_tensor( [np.tril(tf.random.uniform((Data.M, Data.M), dtype=tf.float64)) for _ in range(Data.L)] ) # L x M x M Z = Data.X[: Data.M, ...] # M x D N = int(10e5) Xs = np.ones((N, Data.D), dtype=float_type) # Path 1: mixed kernel: most efficient route W = np.random.randn(Data.P, Data.L) mixed_kernel = mk.LinearCoregionalization([SquaredExponential() for _ in range(Data.L)], W) optimal_inducing_variable = mf.SharedIndependentInducingVariables(InducingPoints(Z)) value, mean, var = sample_conditional( Xs, optimal_inducing_variable, mixed_kernel, q_mu, q_sqrt=q_sqrt, white=True ) # Path 2: independent kernels, mixed later separate_kernel = mk.SeparateIndependent([SquaredExponential() for _ in range(Data.L)]) fallback_inducing_variable = mf.SharedIndependentInducingVariables(InducingPoints(Z)) value2, mean2, var2 = sample_conditional( Xs, fallback_inducing_variable, separate_kernel, q_mu, q_sqrt=q_sqrt, white=True ) value2 = np.matmul(value2, W.T) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value, axis=0), np.mean(value2, axis=0), decimal=1) np.testing.assert_array_almost_equal( np.cov(value, rowvar=False), np.cov(value2, rowvar=False), decimal=1 )
def test_multioutput_with_diag_q_sqrt(): data = DataMixedKernel q_sqrt_diag = np.ones((data.M, data.L)) * 2 q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L, axis=0) * 2 # L x M x M kern_list = [SquaredExponential() for _ in range(data.L)] k1 = mk.LinearCoregionalization(kern_list, W=data.W) f1 = mf.SharedIndependentInducingVariables( InducingPoints(data.X[:data.M, ...])) model_1 = SVGP(k1, Gaussian(), inducing_variable=f1, q_mu=data.mu_data, q_sqrt=q_sqrt_diag, q_diag=True) kern_list = [SquaredExponential() for _ in range(data.L)] k2 = mk.LinearCoregionalization(kern_list, W=data.W) f2 = mf.SharedIndependentInducingVariables( InducingPoints(data.X[:data.M, ...])) model_2 = SVGP(k2, Gaussian(), inducing_variable=f2, q_mu=data.mu_data, q_sqrt=q_sqrt, q_diag=False) check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
def test_cglb_predict(): """ Test that 1.) The predict method returns the same variance estimate as SGPR. 2.) The predict method returns the same mean as SGPR for v=0. 3.) The predict method returns a mean very similar to GPR when CG is run to low tolerance. """ rng: np.random.RandomState = np.random.RandomState(999) train, z, xs = data(rng) noise = 0.2 gpr = GPR(train, kernel=SquaredExponential(), noise_variance=noise) sgpr = SGPR(train, kernel=SquaredExponential(), inducing_variable=z, noise_variance=noise) cglb = CGLB( train, kernel=SquaredExponential(), inducing_variable=z, noise_variance=noise, ) gpr_mean, _ = gpr.predict_y(xs, full_cov=False) sgpr_mean, sgpr_cov = sgpr.predict_y(xs, full_cov=False) cglb_mean, cglb_cov = cglb.predict_y( xs, full_cov=False, cg_tolerance=1e6) # set tolerance high so v stays at 0. assert np.allclose(sgpr_cov, cglb_cov) assert np.allclose(sgpr_mean, cglb_mean) cglb_mean, _ = cglb.predict_y(xs, full_cov=False, cg_tolerance=1e-12) assert np.allclose(gpr_mean, cglb_mean)
def test_mixed_mok_with_Id_vs_independent_mok(): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependent(SquaredExponential(variance=0.5, lengthscales=1.2), data.L) f1 = InducingPoints(data.X[: data.M, ...]) model_1 = SVGP(k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_1, False) set_trainable(model_1.q_sqrt, True) gpflow.optimizers.Scipy().minimize( model_1.training_loss_closure(Data.data), variables=model_1.trainable_variables, method="BFGS", compile=True, ) # Mixed Model kern_list = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(data.L)] k2 = mk.LinearCoregionalization(kern_list, data.W) f2 = InducingPoints(data.X[: data.M, ...]) model_2 = SVGP(k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_2, False) set_trainable(model_2.q_sqrt, True) gpflow.optimizers.Scipy().minimize( model_2.training_loss_closure(Data.data), variables=model_2.trainable_variables, method="BFGS", compile=True, ) check_equality_predictions(Data.data, [model_1, model_2])
def test_separate_independent_mok(): """ We use different independent kernels for each of the output dimensions. We can achieve this in two ways: 1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof 2) inefficient: SeparateIndependentMok with InducingPoints However, both methods should return the same conditional, and after optimization return the same log likelihood. """ # Model 1 (Inefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kern_list_1 = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P)] kernel_1 = mk.SeparateIndependent(kern_list_1) inducing_variable_1 = InducingPoints(Data.X[: Data.M, ...]) model_1 = SVGP( kernel_1, Gaussian(), inducing_variable_1, num_latent_gps=1, q_mu=q_mu_1, q_sqrt=q_sqrt_1, ) set_trainable(model_1, False) set_trainable(model_1.q_sqrt, True) set_trainable(model_1.q_mu, True) gpflow.optimizers.Scipy().minimize( model_1.training_loss_closure(Data.data), variables=model_1.trainable_variables, method="BFGS", compile=True, ) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array( [np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)] ) # P x M x M kern_list_2 = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P)] kernel_2 = mk.SeparateIndependent(kern_list_2) inducing_variable_2 = mf.SharedIndependentInducingVariables( InducingPoints(Data.X[: Data.M, ...]) ) model_2 = SVGP( kernel_2, Gaussian(), inducing_variable_2, num_latent_gps=Data.P, q_mu=q_mu_2, q_sqrt=q_sqrt_2, ) set_trainable(model_2, False) set_trainable(model_2.q_sqrt, True) set_trainable(model_2.q_mu, True) gpflow.optimizers.Scipy().minimize( model_2.training_loss_closure(Data.data), variables=model_2.trainable_variables, method="BFGS", compile=True, ) check_equality_predictions(Data.data, [model_1, model_2])
def test_MixedKernelSeparateMof(): data = DataMixedKernel kern_list = [SquaredExponential() for _ in range(data.L)] inducing_variable_list = [ InducingPoints(data.X[:data.M, ...]) for _ in range(data.L) ] k1 = mk.LinearCoregionalization(kern_list, W=data.W) f1 = mf.SeparateIndependentInducingVariables(inducing_variable_list) model_1 = SVGP(k1, Gaussian(), inducing_variable=f1, q_mu=data.mu_data, q_sqrt=data.sqrt_data) kern_list = [SquaredExponential() for _ in range(data.L)] inducing_variable_list = [ InducingPoints(data.X[:data.M, ...]) for _ in range(data.L) ] k2 = mk.LinearCoregionalization(kern_list, W=data.W) f2 = mf.SeparateIndependentInducingVariables(inducing_variable_list) model_2 = SVGP(k2, Gaussian(), inducing_variable=f2, q_mu=data.mu_data, q_sqrt=data.sqrt_data) check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
def test_latent_kernels(): kernel_list = [SquaredExponential(), White(), White() + Linear()] multioutput_kernel_list = [ SharedIndependent(SquaredExponential(), 3), SeparateIndependent(kernel_list), LinearCoregionalization(kernel_list, np.random.random((5, 3))), ] assert len(multioutput_kernel_list[0].latent_kernels) == 1 assert multioutput_kernel_list[1].latent_kernels == tuple(kernel_list) assert multioutput_kernel_list[2].latent_kernels == tuple(kernel_list)
def gen_gp_data(X, lengthscale=0.5, variance=0.2, obs_noise=0.1): seed = tfp.util.SeedStream(123, salt="MVN") n = X.shape[0] kern = SquaredExponential(lengthscales=lengthscale, variance=variance) K = stabilise(kern.K(X, X)) generator = tfp.distributions.MultivariateNormalFullCovariance( tf.reshape(tf.zeros_like(X), [-1]), K) y = tf.reshape(generator.sample(seed=seed()), (n, 1)).numpy() return tf.reshape( y + tf.cast( tfp.distributions.Normal(0, obs_noise).sample( y.shape, seed=seed()), tf.float64), (n, 1)).numpy()
def test_shapes_of_mok(): data = DataMixedKernel kern_list = [SquaredExponential() for _ in range(data.L)] k1 = mk.LinearCoregionalization(kern_list, W=data.W) assert k1.num_latent_gps == data.L k2 = mk.SeparateIndependent(kern_list) assert k2.num_latent_gps == data.L dims = 5 k3 = mk.SharedIndependent(SquaredExponential(), dims) assert k3.num_latent_gps == dims
def test_combination_LMC_kernels(): N, D, P = 100, 3, 2 kernel_list1 = [Linear(active_dims=[1]), SquaredExponential()] L1 = len(kernel_list1) kernel_list2 = [SquaredExponential(), Linear(), Linear()] L2 = len(kernel_list2) k1 = LinearCoregionalization(kernel_list1, np.random.randn(P, L1)) k2 = LinearCoregionalization(kernel_list2, np.random.randn(P, L2)) kernel = k1 + k2 X = np.random.randn(N, D) K1 = k1(X, full_cov=True) K2 = k2(X, full_cov=True) K = kernel(X, full_cov=True) assert K.shape == [N, P, N, P] np.testing.assert_allclose(K, K1 + K2)
def test_conjugate_gradient_convergence(): """ Check that the method of conjugate gradients implemented can solve a linear system of equations """ rng: np.random.RandomState = np.random.RandomState(999) noise = 1e-3 train, z, _ = data(rng) x, y = train n = x.shape[0] b = tf.transpose(y) k = SquaredExponential() K = k(x) + noise * tf.eye(n, dtype=default_float()) Kinv_y = tf.linalg.solve(K, y) # We could solve by cholesky instead model = CGLB((x, y), kernel=k, inducing_variable=z, noise_variance=noise) common = model._common_calculation() initial = tf.zeros_like(b) A = common.A LB = common.LB max_error = 0.01 max_steps = 200 restart_cg_step = 200 preconditioner = NystromPreconditioner(A, LB, noise) v = cglb_conjugate_gradient(K, b, initial, preconditioner, max_error, max_steps, restart_cg_step) # NOTE: with smaller `max_error` we can reduce the `rtol` np.testing.assert_allclose(Kinv_y, tf.transpose(v), rtol=0.1)
def test_construct_kernel_separate_independent_custom_list(): kernel_list = [SquaredExponential(), Matern52()] mok = construct_basic_kernel(kernel_list) assert isinstance(mok, MultioutputKernel) assert isinstance(mok, SeparateIndependent) assert mok.kernels == kernel_list
def get_covariance_function(): gp_dtype = gpf.config.default_float() # Matern 32 m32_cov = Matern32(variance=1, lengthscales=100.) m32_cov.variance.prior = Normal(gp_dtype(1.), gp_dtype(0.1)) m32_cov.lengthscales.prior = Normal(gp_dtype(100.), gp_dtype(50.)) # Periodic base kernel periodic_base_cov = SquaredExponential(variance=5., lengthscales=1.) set_trainable(periodic_base_cov.variance, False) periodic_base_cov.lengthscales.prior = Normal(gp_dtype(5.), gp_dtype(1.)) # Periodic periodic_cov = Periodic(periodic_base_cov, period=1., order=FLAGS.qp_order) set_trainable(periodic_cov.period, False) # Periodic damping periodic_damping_cov = Matern32(variance=1e-1, lengthscales=50) periodic_damping_cov.variance.prior = Normal(gp_dtype(1e-1), gp_dtype(1e-3)) periodic_damping_cov.lengthscales.prior = Normal(gp_dtype(50), gp_dtype(10.)) # Final covariance co2_cov = periodic_cov * periodic_damping_cov + m32_cov return co2_cov
def _init_backwards_layers(self, X, Y, Z, mean_function=Zero(), optimize_inducing_location=True, Layer=SVGPLayer, white=False): backlayers = [] num_inputs = X.shape[1] num_outputs = Y.shape[1] num_inducing = Z.shape[0] for i in range(num_outputs): if i == 0: inducing_points = Z[:, :num_inputs] else: inducing_points = Z[:, num_inputs + num_outputs - i][:, None] layer = Layer( SquaredExponential(), inducing_points, Z[:, num_inputs + num_outputs - i - 1], [default_jitter()] * num_inducing, mean_function, optimize_inducing_location=optimize_inducing_location, white=white) backlayers.append(layer) return backlayers
def test_variational_univariate_conditionals(diag, whiten): q_mu = np.ones((1, Datum.num_latent)) * Datum.posterior_mean ones = np.ones((1, Datum.num_latent)) if diag else np.ones( (1, 1, Datum.num_latent)) q_sqrt = ones * Datum.posterior_std model = gpflow.models.SVGP(kernel=SquaredExponential(variance=Datum.K), likelihood=Gaussian(), inducing_variable=Datum.Z, num_latent=Datum.num_latent, q_diag=diag, whiten=whiten, q_mu=q_mu, q_sqrt=q_sqrt) fmean_func, fvar_func = gpflow.conditionals.conditional( Datum.X, Datum.Z, model.kernel, model.q_mu, q_sqrt=model.q_sqrt, white=whiten) mean_value, var_value = fmean_func[0, 0], fvar_func[0, 0] assert_allclose(mean_value - Datum.posterior_mean, 0, atol=4) assert_allclose(var_value - Datum.posterior_var, 0, atol=4)
def test_separate_independent_conditional_with_q_sqrt_none(): """ In response to bug #1523, this test checks that separate_independent_condtional does not fail when q_sqrt=None. """ q_sqrt = None data = DataMixedKernel kern_list = [SquaredExponential() for _ in range(data.L)] kernel = gpflow.kernels.SeparateIndependent(kern_list) inducing_variable_list = [ InducingPoints(data.X[:data.M, ...]) for _ in range(data.L) ] inducing_variable = mf.SeparateIndependentInducingVariables( inducing_variable_list) mu_1, var_1 = gpflow.conditionals.conditional( data.X, inducing_variable, kernel, data.mu_data, full_cov=False, full_output_cov=False, q_sqrt=q_sqrt, white=True, )
def test_mixed_mok_with_Id_vs_independent_mok(): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscale=1.2), data.L) f1 = InducingPoints(data.X[:data.M, ...]) model_1 = SVGP(k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_1, False) model_1.q_sqrt.trainable = True @tf.function(autograph=False) def closure1(): return -model_1.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure1, variables=model_1.trainable_variables, method='BFGS') # Mixed Model kern_list = [ SquaredExponential(variance=0.5, lengthscale=1.2) for _ in range(data.L) ] k2 = mk.LinearCoregionalization(kern_list, data.W) f2 = InducingPoints(data.X[:data.M, ...]) model_2 = SVGP(k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_2, False) model_2.q_sqrt.trainable = True @tf.function(autograph=False) def closure2(): return -model_2.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure2, variables=model_2.trainable_variables, method='BFGS') check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
def test_sample_conditional(whiten, full_cov, full_output_cov): if full_cov and full_output_cov: return q_mu = tf.random.uniform((Data.M, Data.P), dtype=tf.float64) # [M, P] q_sqrt = tf.convert_to_tensor([ np.tril(tf.random.uniform((Data.M, Data.M), dtype=tf.float64)) for _ in range(Data.P) ]) # [P, M, M] Z = Data.X[:Data.M, ...] # [M, D] Xs = np.ones((Data.N, Data.D), dtype=float_type) inducing_variable = InducingPoints(Z) kernel = SquaredExponential() # Path 1 value_f, mean_f, var_f = sample_conditional( Xs, inducing_variable, kernel, q_mu, q_sqrt=q_sqrt, white=whiten, full_cov=full_cov, full_output_cov=full_output_cov, num_samples=int(1e5), ) value_f = value_f.numpy().reshape((-1, ) + value_f.numpy().shape[2:]) # Path 2 if full_output_cov: pytest.skip( "sample_conditional with X instead of inducing_variable does not support full_output_cov" ) value_x, mean_x, var_x = sample_conditional( Xs, Z, kernel, q_mu, q_sqrt=q_sqrt, white=whiten, full_cov=full_cov, full_output_cov=full_output_cov, num_samples=int(1e5), ) value_x = value_x.numpy().reshape((-1, ) + value_x.numpy().shape[2:]) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value_x, axis=0), np.mean(value_f, axis=0), decimal=1) np.testing.assert_array_almost_equal(np.cov(value_x, rowvar=False), np.cov(value_f, rowvar=False), decimal=1) np.testing.assert_allclose(mean_x, mean_f) np.testing.assert_allclose(var_x, var_f)
def test_seed_reproducibility(init_method): k = SquaredExponential() X = np.random.randn(100, 2) Z1, idx1 = init_method(X, 30, k) Z2, idx2 = init_method(X, 30, k) assert np.all(Z1 == Z2), str(init_method) assert np.all(idx1 == idx2), str(init_method)
def test_cglb_check_basics(): """ * Quadratic term of CGLB with v=0 is equivalent to the quadratic term of SGPR. * Log determinant term of CGLB is less or equal to SGPR log determinant. In the test the `logdet_term` method returns negative half of the logdet bound, therefore we run the opposite direction of the sign. """ rng: np.random.RandomState = np.random.RandomState(999) train, z, _ = data(rng) noise = 0.2 sgpr = SGPR(train, kernel=SquaredExponential(), inducing_variable=z, noise_variance=noise) # `v_grad_optimization=True` turns off the CG in the quadratic term cglb = CGLB( train, kernel=SquaredExponential(), inducing_variable=z, noise_variance=noise, v_grad_optimization=True, ) sgpr_common = sgpr._common_calculation() cglb_common = cglb._common_calculation() sgpr_quad_term = sgpr.quad_term(sgpr_common) cglb_quad_term = cglb.quad_term(cglb_common) np.testing.assert_almost_equal(sgpr_quad_term, cglb_quad_term) sgpr_logdet = sgpr.logdet_term(sgpr_common) cglb_logdet = cglb.logdet_term(cglb_common) assert cglb_logdet >= sgpr_logdet x = train[0] K = SquaredExponential()(x) + noise * tf.eye(x.shape[0], dtype=default_float()) gpr_logdet = -0.5 * tf.linalg.logdet(K) assert cglb_logdet <= gpr_logdet
def test_incremental_ConditionalVariance(): init_method = ConditionalVariance(sample=True) k = SquaredExponential() X = np.random.randn(100, 2) Z1, idx1 = init_method(X, 20, k) Z2, idx2 = init_method(X, 30, k) assert np.all(Z1 == Z2[:20]) assert np.all(idx1 == idx2[:20])
def test_MixedMok_Kgg(): data = DataMixedKernel kern_list = [SquaredExponential() for _ in range(data.L)] kernel = mk.LinearCoregionalization(kern_list, W=data.W) Kgg = kernel.Kgg(Data.X, Data.X) # L x N x N Kff = kernel.K(Data.X, Data.X) # N x P x N x P # Kff = W @ Kgg @ W^T Kff_infered = np.einsum("lnm,pl,ql->npmq", Kgg, data.W, data.W) np.testing.assert_array_almost_equal(Kff, Kff_infered, decimal=5)
def __init__(self, data, Z=None, kernel=SquaredExponential(), likelihood=Gaussian(), mean_function=None, maxiter=1000): # Use full Gaussian processes regression model for now. Could # implement SVGP in the future is dataset gets too big. if Z is None: m = gpflow.models.GPR(data, kernel=kernel, mean_function=mean_function) # Implements the L-BFGS-B algorithm for optimising hyperparameters opt = gpflow.optimizers.Scipy() def objective_closure(): return -m.log_marginal_likelihood() opt_logs = opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=maxiter)) else: # Sparse variational Gaussian process for big data (see Hensman) m = gpflow.models.SVGP(kernel, likelihood, Z, num_data=data[0].shape[0]) @tf.function def optimization_step(optimizer, m, batch): with tf.GradientTape() as t: t.watch(m.tranable_variables) objective = -model.elbo(batch) grads = tape.gradient(objective, m.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return objective adam = tf.optimizers.Adam() for i in range(maxiter): elbo = -optimization_step(adam, m, data) if step % 100 == 0: print('Iteration: {} ELBO: {.3f}'.format(i, elbo)) print_summary(m) # Cannot simply set self.gp_model = m as need to sample from prior, # not the posterior. self.kernel = m.kernel self.likelihood = m.likelihood
def get_simple_covariance_function(covariance_enum, **kwargs): if not isinstance(covariance_enum, CovarianceEnum): covariance_enum = CovarianceEnum(covariance_enum) if covariance_enum == CovarianceEnum.Matern12: return Matern12(**kwargs) if covariance_enum == CovarianceEnum.Matern32: return Matern32(**kwargs) if covariance_enum == CovarianceEnum.Matern52: return Matern52(**kwargs) if covariance_enum == CovarianceEnum.RBF: return RBF(**kwargs) if covariance_enum == CovarianceEnum.QP: base_kernel = SquaredExponential(kwargs.pop("variance", 1.), kwargs.pop("lengthscales", 1.)) return Periodic(base_kernel, **kwargs)
def test_data(): x_dim, y_dim, w_dim = 2, 1, 2 num_data = 31 x_data = np.random.random((num_data, x_dim)) * 5 w_data = np.random.random((num_data, w_dim)) w_data[:(num_data // 2), :] = 0.2 * w_data[:(num_data // 2), :] + 5 input_data = np.concatenate([x_data, w_data], axis=1) assert input_data.shape == (num_data, x_dim + w_dim) y_data = np.random.multivariate_normal( mean=np.zeros(num_data), cov=SquaredExponential(variance=0.1)(input_data), size=y_dim).T assert y_data.shape == (num_data, y_dim) return x_data, y_data
def _construct_kernel(input_dim: int, is_last_layer: bool) -> SquaredExponential: """ Return a :class:`gpflow.kernels.SquaredExponential` kernel with ARD lengthscales set to 2 and a small kernel variance of 1e-6 if the kernel is part of a hidden layer; otherwise, the kernel variance is set to 1.0. :param input_dim: The input dimensionality of the layer. :param is_last_layer: Whether the kernel is part of the last layer in the Deep GP. """ variance = 1e-6 if not is_last_layer else 1.0 # TODO: Looking at this initializing to 2 (assuming N(0, 1) or U[0,1] normalized # data) seems a bit weird - that's really long lengthscales? And I remember seeing # something where the value scaled with the number of dimensions before lengthscales = [2.0] * input_dim return SquaredExponential(lengthscales=lengthscales, variance=variance)
def setUp(self): seed = 31415926 self.T = 2000 self.K = 800 self.t = np.sort(np.random.rand(self.T)) self.ft = sinu(self.t) self.y = obs_noise(self.ft, 0.01, seed) self.data = (tf.constant(self.t[:, None]), tf.constant(self.y[:, None])) periodic_order = 2 periodic_base_kernel = SquaredExponential(variance=1., lengthscales=0.1) self.cov = Periodic(periodic_base_kernel, period=1., order=periodic_order)
def test_smoke(): domain = np.array([[0., 10.]]) kernel = SquaredExponential() events = rng.uniform(0, 10, size=20)[:, None] feature = InducingPoints(np.linspace(0, 10, 20)[:, None]) M = len(feature) m = VBPP(feature, kernel, domain, np.zeros(M), np.eye(M)) Kuu = m.compute_Kuu() m.q_sqrt.assign(np.linalg.cholesky(Kuu)) assert np.allclose(m.prior_kl(tf.identity(Kuu)).numpy(), 0.0) def objective_closure(): return - m.elbo(events) opt = gpflow.optimizers.Scipy() opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=2))
def test_variational_univariate_prior_KL(diag, whiten): reference_kl = univariate_prior_KL(Datum.posterior_mean, Datum.zero_mean, Datum.posterior_var, Datum.K) q_mu = np.ones((1, Datum.num_latent)) * Datum.posterior_mean ones = np.ones((1, Datum.num_latent)) if diag else np.ones( (1, 1, Datum.num_latent)) q_sqrt = ones * Datum.posterior_std model = gpflow.models.SVGP(kernel=SquaredExponential(variance=Datum.K), likelihood=Gaussian(), inducing_variable=Datum.Z, num_latent=Datum.num_latent, q_diag=diag, whiten=whiten, q_mu=q_mu, q_sqrt=q_sqrt) test_prior_KL = model.prior_kl() assert_allclose(reference_kl - test_prior_KL, 0, atol=4)