Example #1
0
def test_multioutput_with_diag_q_sqrt():
    data = DataMixedKernel

    q_sqrt_diag = np.ones((data.M, data.L)) * 2
    q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L,
                       axis=0) * 2  # L x M x M

    kern_list = [SquaredExponential() for _ in range(data.L)]
    k1 = mk.LinearCoregionalization(kern_list, W=data.W)
    f1 = mf.SharedIndependentInducingVariables(
        InducingPoints(data.X[:data.M, ...]))
    model_1 = SVGP(k1,
                   Gaussian(),
                   inducing_variable=f1,
                   q_mu=data.mu_data,
                   q_sqrt=q_sqrt_diag,
                   q_diag=True)

    kern_list = [SquaredExponential() for _ in range(data.L)]
    k2 = mk.LinearCoregionalization(kern_list, W=data.W)
    f2 = mf.SharedIndependentInducingVariables(
        InducingPoints(data.X[:data.M, ...]))
    model_2 = SVGP(k2,
                   Gaussian(),
                   inducing_variable=f2,
                   q_mu=data.mu_data,
                   q_sqrt=q_sqrt,
                   q_diag=False)

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
def test_mixed_shared(fun):
    inducing_variable = mf.SharedIndependentInducingVariables(make_ip())
    kernel = mk.LinearCoregionalization(make_kernels(Datum.L), Datum.W)
    if fun is mo_kuus.Kuu:
        t = tf.linalg.cholesky(fun(inducing_variable, kernel, jitter=1e-9))
    else:
        t = fun(inducing_variable, kernel, Datum.Xnew)
        print(t.shape)
Example #3
0
def test_sample_conditional_mixedkernel():
    q_mu = tf.random.uniform((Data.M, Data.L), dtype=tf.float64)  # M x L
    q_sqrt = tf.convert_to_tensor([
        np.tril(tf.random.uniform((Data.M, Data.M), dtype=tf.float64))
        for _ in range(Data.L)
    ])  # L x M x M

    Z = Data.X[:Data.M, ...]  # M x D
    N = int(10e5)
    Xs = np.ones((N, Data.D), dtype=float_type)

    # Path 1: mixed kernel: most efficient route
    W = np.random.randn(Data.P, Data.L)
    mixed_kernel = mk.LinearCoregionalization(
        [SquaredExponential() for _ in range(Data.L)], W)
    optimal_inducing_variable = mf.SharedIndependentInducingVariables(
        InducingPoints(Z))

    value, mean, var = sample_conditional(Xs,
                                          optimal_inducing_variable,
                                          mixed_kernel,
                                          q_mu,
                                          q_sqrt=q_sqrt,
                                          white=True)

    # Path 2: independent kernels, mixed later
    separate_kernel = mk.SeparateIndependent(
        [SquaredExponential() for _ in range(Data.L)])
    fallback_inducing_variable = mf.SharedIndependentInducingVariables(
        InducingPoints(Z))

    value2, mean2, var2 = sample_conditional(Xs,
                                             fallback_inducing_variable,
                                             separate_kernel,
                                             q_mu,
                                             q_sqrt=q_sqrt,
                                             white=True)
    value2 = np.matmul(value2, W.T)
    # check if mean and covariance of samples are similar
    np.testing.assert_array_almost_equal(np.mean(value, axis=0),
                                         np.mean(value2, axis=0),
                                         decimal=1)
    np.testing.assert_array_almost_equal(np.cov(value, rowvar=False),
                                         np.cov(value2, rowvar=False),
                                         decimal=1)
Example #4
0
def test_compare_mixed_kernel():
    data = DataMixedKernel

    kern_list = [SquaredExponential() for _ in range(data.L)]
    k1 = mk.LinearCoregionalization(kern_list, W=data.W)
    f1 = mf.SharedIndependentInducingVariables(
        InducingPoints(data.X[:data.M, ...]))
    model_1 = SVGP(k1,
                   Gaussian(),
                   inducing_variable=f1,
                   q_mu=data.mu_data,
                   q_sqrt=data.sqrt_data)

    kern_list = [SquaredExponential() for _ in range(data.L)]
    k2 = mk.LinearCoregionalization(kern_list, W=data.W)
    f2 = mf.SharedIndependentInducingVariables(
        InducingPoints(data.X[:data.M, ...]))
    model_2 = SVGP(k2,
                   Gaussian(),
                   inducing_variable=f2,
                   q_mu=data.mu_data,
                   q_sqrt=data.sqrt_data)

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
Example #5
0
def test_separate_independent_mok():
    """
    We use different independent kernels for each of the output dimensions.
    We can achieve this in two ways:
        1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof
        2) inefficient: SeparateIndependentMok with InducingPoints
    However, both methods should return the same conditional,
    and after optimization return the same log likelihood.
    """
    # Model 1 (Inefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP

    kern_list_1 = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(Data.P)
    ]
    kernel_1 = mk.SeparateIndependent(kern_list_1)
    inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable_1,
                   num_latent=1,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1)
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True
    model_1.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       method='BFGS')

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list_2 = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(Data.P)
    ]
    kernel_2 = mk.SeparateIndependent(kern_list_2)
    inducing_variable_2 = mf.SharedIndependentInducingVariables(
        InducingPoints(Data.X[:Data.M, ...]))
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   num_latent=Data.P,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True
    model_2.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
Example #6
0
def test_shared_independent_mok():
    """
    In this test we use the same kernel and the same inducing inducing
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    np.random.seed(0)
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1,
                   num_latent=Data.Y.shape[-1])
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = SquaredExponential(variance=0.5, lengthscale=1.2)
    inducing_variable_2 = InducingPoints(Data.X[:Data.M, ...])
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   num_latent=Data.P,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_3 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_3 = mf.SharedIndependentInducingVariables(
        InducingPoints(Data.X[:Data.M, ...]))
    model_3 = SVGP(kernel_3,
                   Gaussian(),
                   inducing_variable_3,
                   num_latent=Data.P,
                   q_mu=q_mu_3,
                   q_sqrt=q_sqrt_3)
    set_trainable(model_3, False)
    model_3.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure3():
        return -model_3.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure3,
                                       variables=model_3.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
def test_conditional_broadcasting(full_cov, white, conditional_type):
    """
    Test that the `conditional` and `sample_conditional` broadcasts correctly
    over leading dimensions of Xnew. Xnew can be shape [..., N, D],
    and conditional should broadcast over the [...].
    """
    q_mu = np.random.randn(Data.M, Data.Dy)
    q_sqrt = np.tril(np.random.randn(Data.Dy, Data.M, Data.M), -1)

    if conditional_type == "Z":
        inducing_variable = Data.Z
        kernel = gpflow.kernels.Matern52(lengthscale=0.5)
    elif conditional_type == "inducing_points":
        inducing_variable = gpflow.inducing_variables.InducingPoints(Data.Z)
        kernel = gpflow.kernels.Matern52(lengthscale=0.5)
    elif conditional_type == "mixing":
        # variational params have different output dim in this case
        q_mu = np.random.randn(Data.M, Data.L)
        q_sqrt = np.tril(np.random.randn(Data.L, Data.M, Data.M), -1)
        inducing_variable = mf.SharedIndependentInducingVariables(gpflow.inducing_variables.InducingPoints(Data.Z))
        kernel = mk.LinearCoregionalization(
            kernels=[gpflow.kernels.Matern52(lengthscale=0.5) for _ in range(Data.L)],
            W=Data.W
        )
    else:
        raise (NotImplementedError)

    if conditional_type == "mixing" and full_cov:
        pytest.skip("combination is not implemented")

    num_samples = 5

    def sample_conditional_fn(X):
        return sample_conditional(X,
                                  inducing_variable,
                                  kernel,
                                  tf.convert_to_tensor(q_mu),
                                  q_sqrt=tf.convert_to_tensor(q_sqrt),
                                  white=white,
                                  full_cov=full_cov,
                                  num_samples=num_samples)

    samples = np.array([sample_conditional_fn(X)[0] for X in Data.SX])
    means = np.array([sample_conditional_fn(X)[1] for X in Data.SX])
    variables = np.array([sample_conditional_fn(X)[2] for X in Data.SX])

    samples_S12, means_S12, vars_S12 = \
        sample_conditional(Data.SX, inducing_variable, kernel,
                           tf.convert_to_tensor(q_mu),
                           q_sqrt=tf.convert_to_tensor(q_sqrt),
                           white=white, full_cov=full_cov,
                           num_samples=num_samples)

    samples_S1_S2, means_S1_S2, vars_S1_S2 = \
        sample_conditional(Data.S1_S2_X, inducing_variable, kernel,
                           tf.convert_to_tensor(q_mu),
                           q_sqrt=tf.convert_to_tensor(q_sqrt),
                           white=white, full_cov=full_cov,
                           num_samples=num_samples)

    assert_allclose(samples_S12.shape, samples.shape)
    assert_allclose(samples_S1_S2.shape, [Data.S1, Data.S2, num_samples, Data.N, Data.Dy])
    assert_allclose(means_S12, means)
    assert_allclose(vars_S12, variables)
    assert_allclose(means_S1_S2.numpy().reshape(Data.S1 * Data.S2, Data.N, Data.Dy), means)
    if full_cov:
        vars_s1_s2 = vars_S1_S2.numpy().reshape(Data.S1 * Data.S2, Data.Dy, Data.N, Data.N)
        assert_allclose(vars_s1_s2, variables)
    else:
        vars_s1_s2 = vars_S1_S2.numpy().reshape(Data.S1 * Data.S2, Data.N, Data.Dy)
        assert_allclose(vars_s1_s2, variables)
# ------------------------------------------


class Datum:
    D = 1
    L = 2
    P = 3
    M = 10
    N = 100
    W = rng.randn(P, L)
    X = rng.randn(N)[:, None]
    Xnew = rng.randn(N)[:, None]


multioutput_inducing_variable_list = [
    mf.SharedIndependentInducingVariables(make_ip()),
    mf.SeparateIndependentInducingVariables(make_ips(Datum.P))
]

multioutput_kernel_list = [
    mk.SharedIndependent(make_kernel(), Datum.P),
    mk.SeparateIndependent(make_kernels(Datum.L)),
    mk.LinearCoregionalization(make_kernels(Datum.L), Datum.W)
]


@pytest.mark.parametrize('inducing_variable',
                         multioutput_inducing_variable_list)
@pytest.mark.parametrize('kernel', multioutput_kernel_list)
def test_kuu(inducing_variable, kernel):
    Kuu = mo_kuus.Kuu(inducing_variable, kernel, jitter=1e-9)