def test_optimize(self):
        with defer_build():
            input_layer = InputLayer(input_dim=1,
                                     output_dim=1,
                                     num_inducing=self.M,
                                     kernel=RBF(1) + White(1),
                                     multitask=True)
            output_layer = OutputLayer(input_dim=1,
                                       output_dim=1,
                                       num_inducing=self.M,
                                       kernel=RBF(1) + White(1),
                                       multitask=True)

            seq = MultitaskSequential([input_layer, output_layer])

            model = MultitaskDSDGP(X=self.X,
                                   Y=self.Y,
                                   Z=self.Z,
                                   layers=seq,
                                   likelihood=SwitchedLikelihood(
                                       [Gaussian(), Gaussian()]),
                                   num_latent=1)
        model.compile()
        before = model.compute_log_likelihood()
        opt = gpflow.train.AdamOptimizer(0.01)
        opt.minimize(model, maxiter=100)
        after = model.compute_log_likelihood()
        self.assertGreaterEqual(after, before)
    def setUp(self):
        self.test_graph = tf.Graph()
        self.rng = np.random.RandomState(42)

        self.X1_ind = np.array([0,1,2,2,1,0,1,0,2,1])[:,None]
        self.X1 = np.hstack([np.random.randn(10, 3),
                             self.X1_ind]).astype(gpflow.settings.float_type)

        self.X2_ind = np.array([0,1,2,2,1])[:,None]
        self.X2 = np.hstack([np.random.randn(5, 3),
                             self.X2_ind]).astype(gpflow.settings.float_type)

        with defer_build():
            K1 = DummyKernel(3, 1.0)
            K2 = DummyKernel(3, 2.0)
            K3 = DummyKernel(3, 3.0)
            kern_list = [K1, K2, K3]
            self.kernel = SwitchedKernel(kern_list, 3)
    def prepare(self):
        N = 100
        M = 10
        rng = np.random.RandomState(42)
        X = rng.randn(N, 2)
        Y = rng.randn(N, 1)
        Z = rng.randn(M, 2)

        X_ind = rng.randint(0, 2, (N, 1))
        Z_ind = rng.randint(0, 2, (M, 1))

        X = np.hstack([X, X_ind])
        Y = np.hstack([Y, X_ind])
        Z = np.hstack([Z, Z_ind])

        Xs = rng.randn(M, 2)
        Xs_ind = rng.randint(0, 2, (M, 1))
        Xs = np.hstack([Xs, Xs_ind])

        with defer_build():
            lik = SwitchedLikelihood([Gaussian(), Gaussian()])

            input_layer = InputLayer(input_dim=2,
                                     output_dim=1,
                                     num_inducing=M,
                                     kernel=RBF(2) + White(2),
                                     mean_function=Linear(A=np.ones((3, 1))),
                                     multitask=True)
            output_layer = OutputLayer(input_dim=1,
                                       output_dim=1,
                                       num_inducing=M,
                                       kernel=RBF(1) + White(1),
                                       multitask=True)

            seq = MultitaskSequential([input_layer, output_layer])

            model = MultitaskDSDGP(X=X,
                                   Y=Y,
                                   Z=Z,
                                   layers=seq,
                                   likelihood=lik,
                                   num_latent=1)
        model.compile()
        return model, Xs
def test_pos_def():
    # N = 10
    # Dx = 3
    # Dy = 1
    # K = 5
    from bayesian_benchmarks.data import get_regression_data

    data = get_regression_data("wilson_3droad")
    X = data.X_train
    Y = data.Y_train
    M = 128
    from scipy.cluster.vq import kmeans2

    Z = kmeans2(X, M, minit="points")[0]

    N, Dx = X.shape
    Dy = Y.shape[1]
    K = 1

    lik = gpflow.likelihoods.Gaussian(variance=0.1)
    kern = gpflow.kernels.RBF(Dx, lengthscales=0.1)

    X = np.random.randn(N, Dx)
    Y = np.random.randn(N, Dy)

    layers_vi = [LatentVariableLayer(Dx, XY_dim=Dx + Dy), GPLayer(kern, Z, Dy)]

    layers_iw = [LatentVariableLayer(Dx, XY_dim=Dx + Dy), GPLayer(kern, Z, Dy)]

    m_dgp_vi = DGP_VI(X, Y, layers_vi, lik, num_samples=K, minibatch_size=512)
    with defer_build():
        m_dgp_iw = DGP_IWVI(
            X,
            Y,
            encoder_minibatch_size=None,
            layers=layers_iw,
            likelihood=lik,
            num_samples=K,
            minibatch_size=512,
        )
    m_dgp_iw.compile()

    for model in [m_dgp_vi, m_dgp_iw]:

        model.layers[-1].q_mu.set_trainable(False)
        model.layers[-1].q_sqrt.set_trainable(False)

        optimizer_adam = gpflow.train.AdamOptimizer(0.005)
        adam_op = optimizer_adam.make_optimize_tensor(model)

        optimizer_ng = gpflow.train.NatGradOptimizer(gamma=0.01)
        ng_op = optimizer_ng.make_optimize_tensor(
            model, var_list=[[model.layers[-1].q_mu, model.layers[-1].q_sqrt]])
        sess = model.enquire_session()
        for _ in range(10):
            print("{} {:.2f}".format(_, sess.run(model.likelihood_tensor)))
            sess.run(ng_op)
            sess.run(adam_op)

    L_vi = [m_dgp_vi.compute_log_likelihood() for _ in range(100)]
    L_iw = [m_dgp_iw.compute_log_likelihood() for _ in range(100)]

    L_vi = np.average(L_vi)
    L_iw = np.average(L_iw)

    print(L_vi, L_iw)