def test_separate_independent_mok(): """ We use different independent kernels for each of the output dimensions. We can achieve this in two ways: 1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof 2) inefficient: SeparateIndependentMok with InducingPoints However, both methods should return the same conditional, and after optimization return the same log likelihood. """ # Model 1 (Inefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kern_list_1 = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P)] kernel_1 = mk.SeparateIndependent(kern_list_1) inducing_variable_1 = InducingPoints(Data.X[: Data.M, ...]) model_1 = SVGP( kernel_1, Gaussian(), inducing_variable_1, num_latent_gps=1, q_mu=q_mu_1, q_sqrt=q_sqrt_1, ) set_trainable(model_1, False) set_trainable(model_1.q_sqrt, True) set_trainable(model_1.q_mu, True) gpflow.optimizers.Scipy().minimize( model_1.training_loss_closure(Data.data), variables=model_1.trainable_variables, method="BFGS", compile=True, ) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array( [np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)] ) # P x M x M kern_list_2 = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P)] kernel_2 = mk.SeparateIndependent(kern_list_2) inducing_variable_2 = mf.SharedIndependentInducingVariables( InducingPoints(Data.X[: Data.M, ...]) ) model_2 = SVGP( kernel_2, Gaussian(), inducing_variable_2, num_latent_gps=Data.P, q_mu=q_mu_2, q_sqrt=q_sqrt_2, ) set_trainable(model_2, False) set_trainable(model_2.q_sqrt, True) set_trainable(model_2.q_mu, True) gpflow.optimizers.Scipy().minimize( model_2.training_loss_closure(Data.data), variables=model_2.trainable_variables, method="BFGS", compile=True, ) check_equality_predictions(Data.data, [model_1, model_2])
def test_sample_conditional_mixedkernel(): q_mu = tf.random.uniform((Data.M, Data.L), dtype=tf.float64) # M x L q_sqrt = tf.convert_to_tensor( [np.tril(tf.random.uniform((Data.M, Data.M), dtype=tf.float64)) for _ in range(Data.L)] ) # L x M x M Z = Data.X[: Data.M, ...] # M x D N = int(10e5) Xs = np.ones((N, Data.D), dtype=float_type) # Path 1: mixed kernel: most efficient route W = np.random.randn(Data.P, Data.L) mixed_kernel = mk.LinearCoregionalization([SquaredExponential() for _ in range(Data.L)], W) optimal_inducing_variable = mf.SharedIndependentInducingVariables(InducingPoints(Z)) value, mean, var = sample_conditional( Xs, optimal_inducing_variable, mixed_kernel, q_mu, q_sqrt=q_sqrt, white=True ) # Path 2: independent kernels, mixed later separate_kernel = mk.SeparateIndependent([SquaredExponential() for _ in range(Data.L)]) fallback_inducing_variable = mf.SharedIndependentInducingVariables(InducingPoints(Z)) value2, mean2, var2 = sample_conditional( Xs, fallback_inducing_variable, separate_kernel, q_mu, q_sqrt=q_sqrt, white=True ) value2 = np.matmul(value2, W.T) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value, axis=0), np.mean(value2, axis=0), decimal=1) np.testing.assert_array_almost_equal( np.cov(value, rowvar=False), np.cov(value2, rowvar=False), decimal=1 )
def test_shapes_of_mok(): data = DataMixedKernel kern_list = [SquaredExponential() for _ in range(data.L)] k1 = mk.LinearCoregionalization(kern_list, W=data.W) assert k1.num_latent_gps == data.L k2 = mk.SeparateIndependent(kern_list) assert k2.num_latent_gps == data.L dims = 5 k3 = mk.SharedIndependent(SquaredExponential(), dims) assert k3.num_latent_gps == dims
def test_separate_independent_mof(): """ Same test as above but we use different (i.e. separate) inducing inducing for each of the output dimensions. """ np.random.seed(0) # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscales=1.2), Data.P) inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...]) model_1 = SVGP(kernel_1, Gaussian(), inducing_variable_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) set_trainable(model_1, False) set_trainable(model_1.q_sqrt, True) set_trainable(model_1.q_mu, True) gpflow.optimizers.Scipy().minimize( model_1.training_loss_closure(Data.data), variables=model_1.trainable_variables, method="BFGS", compile=True, ) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscales=1.2), Data.P) inducing_variable_list_2 = [ InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P) ] inducing_variable_2 = mf.SeparateIndependentInducingVariables( inducing_variable_list_2) model_2 = SVGP(kernel_2, Gaussian(), inducing_variable_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) set_trainable(model_2, False) set_trainable(model_2.q_sqrt, True) set_trainable(model_2.q_mu, True) gpflow.optimizers.Scipy().minimize( model_2.training_loss_closure(Data.data), variables=model_2.trainable_variables, method="BFGS", compile=True, ) # Model 3 (Inefficient): an idenitical inducing variable is used P times, # and treated as a separate one. q_mu_3 = np.random.randn(Data.M, Data.P) q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list = [ SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_3 = mk.SeparateIndependent(kern_list) inducing_variable_list_3 = [ InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P) ] inducing_variable_3 = mf.SeparateIndependentInducingVariables( inducing_variable_list_3) model_3 = SVGP(kernel_3, Gaussian(), inducing_variable_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3) set_trainable(model_3, False) set_trainable(model_3.q_sqrt, True) set_trainable(model_3.q_mu, True) gpflow.optimizers.Scipy().minimize( model_3.training_loss_closure(Data.data), variables=model_3.trainable_variables, method="BFGS", compile=True, ) check_equality_predictions(Data.data, [model_1, model_2, model_3])
Xnew = rng.randn(N)[:, None] multioutput_inducing_variable_list = [ mf.SharedIndependentInducingVariables(make_ip()), mf.SeparateIndependentInducingVariables(make_ips(Datum.P)), ] multioutput_fallback_inducing_variable_list = [ mf.FallbackSharedIndependentInducingVariables(make_ip()), mf.FallbackSeparateIndependentInducingVariables(make_ips(Datum.P)), ] multioutput_kernel_list = [ mk.SharedIndependent(make_kernel(), Datum.P), mk.SeparateIndependent(make_kernels(Datum.L)), mk.LinearCoregionalization(make_kernels(Datum.L), Datum.W), ] @pytest.mark.parametrize("inducing_variable", multioutput_inducing_variable_list) @pytest.mark.parametrize("kernel", multioutput_kernel_list) def test_kuu_shape(inducing_variable, kernel): Kuu = mo_kuus.Kuu(inducing_variable, kernel, jitter=1e-9) t = tf.linalg.cholesky(Kuu) if isinstance(kernel, mk.SharedIndependent): if isinstance(inducing_variable, mf.SeparateIndependentInducingVariables): assert t.shape == (3, 10, 10)