Esempio n. 1
0
def test_hypers_SVGP_vs_SGPR_with_excluded_vars(session_tf, svgp, sgpr):
    """
    Test SVGP vs SGPR. Excluded variables.

    This test is as test_hypers_SVGP_vs_SGPR, but we use a different approach to partitioning
    the variables. In this test the variational parameters are trainable, but the ordinary
    gradient optimizer has these variables removed from its var_list, so does not update them
    """
    anchor = False

    # combination (doing GD first as we've already done the nat grad step
    p = [(svgp.q_mu, svgp.q_sqrt)]
    o1 = [NatGradOptimizer(Datum.gamma), {'var_list': p}]
    o2 = [
        ExcludedGradientDescentOptimizer(Datum.learning_rate,
                                         excluded_params=p[0]), {}
    ]
    o3 = [NatGradOptimizer(Datum.gamma), {'var_list': p}]

    nag_grads_with_gd_optimizer = CombinationOptimizer([o1, o2, o3])
    nag_grads_with_gd_optimizer.minimize(svgp, maxiter=1, anchor=anchor)

    GradientDescentOptimizer(Datum.learning_rate).minimize(sgpr,
                                                           maxiter=1,
                                                           anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)
Esempio n. 2
0
def test_hypers_SVGP_vs_SGPR_tensors(session_tf, svgp, sgpr):
    """
    Test SVGP vs SGPR. Running optimization as tensors w/o GPflow wrapper.

    """
    anchor = False
    variationals = [(svgp.q_mu, svgp.q_sqrt)]

    svgp.q_mu.trainable = False
    svgp.q_sqrt.trainable = False

    o1 = NatGradOptimizer(Datum.gamma)
    o1_tensor = o1.make_optimize_tensor(svgp, var_list=variationals)

    o2 = GradientDescentOptimizer(Datum.learning_rate)
    o2_tensor = o2.make_optimize_tensor(svgp)

    o3 = NatGradOptimizer(Datum.gamma)
    o3_tensor = o3.make_optimize_tensor(svgp, var_list=variationals)

    session_tf.run(o1_tensor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)

    session_tf.run(o2_tensor)
    session_tf.run(o3_tensor)

    GradientDescentOptimizer(Datum.learning_rate).minimize(sgpr, maxiter=1, anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)
Esempio n. 3
0
def test_SVGP_vs_SGPR(session_tf):
    """
    With a Gaussian likelihood the sparse Gaussian variational (SVGP) model should be equivalent to the analytically 
     optimial sparse regression model (SGPR) after a single nat grad step of size 1
    """
    N, M, D = 4, 3, 2
    X = np.random.randn(N, D)
    Z = np.random.randn(M, D)
    Y = np.random.randn(N, 1)
    kern = gpflow.kernels.RBF(D)
    lik_var = 0.1
    lik = gpflow.likelihoods.Gaussian()
    lik.variance = lik_var

    m_svgp = gpflow.models.SVGP(X, Y, kern, lik, Z=Z)
    m_sgpr = gpflow.models.SGPR(X, Y, kern, Z=Z)
    m_sgpr.likelihood.variance = lik_var

    m_svgp.set_trainable(False)
    m_svgp.q_mu.set_trainable(True)
    m_svgp.q_sqrt.set_trainable(True)
    NatGradOptimizer(1.).minimize(m_svgp, [[m_svgp.q_mu, m_svgp.q_sqrt]],
                                  maxiter=1)

    assert_allclose(m_sgpr.compute_log_likelihood(),
                    m_svgp.compute_log_likelihood(),
                    atol=1e-5)
Esempio n. 4
0
def test_other_XiTransform_VGP_vs_GPR(session_tf,
                                      xi_transform=XiSqrtMeanVar()):
    """
    With other transforms the solution is not given in a single step, but it should still give the same answer
    after a number of smaller steps. 
    """
    N, D = 3, 2
    X = np.random.randn(N, D)
    Y = np.random.randn(N, 1)
    kern = gpflow.kernels.RBF(D)
    lik_var = 0.1
    lik = gpflow.likelihoods.Gaussian()
    lik.variance = lik_var

    m_vgp = gpflow.models.VGP(X, Y, kern, lik)
    m_gpr = gpflow.models.GPR(X, Y, kern)
    m_gpr.likelihood.variance = lik_var

    m_vgp.set_trainable(False)
    m_vgp.q_mu.set_trainable(True)
    m_vgp.q_sqrt.set_trainable(True)
    NatGradOptimizer(0.01).minimize(m_vgp,
                                    [[m_vgp.q_mu, m_vgp.q_sqrt, xi_transform]],
                                    maxiter=500)

    assert_allclose(m_gpr.compute_log_likelihood(),
                    m_vgp.compute_log_likelihood(),
                    atol=1e-4)
Esempio n. 5
0
def test_VGP_vs_GPR(session_tf):
    """
    With a Gaussian likelihood the Gaussian variational (VGP) model should be equivalent to the exact 
     regression model (GPR) after a single nat grad step of size 1
    """
    N, D = 3, 2
    X = np.random.randn(N, D)
    Y = np.random.randn(N, 1)
    kern = gpflow.kernels.RBF(D)
    lik_var = 0.1
    lik = gpflow.likelihoods.Gaussian()
    lik.variance = lik_var

    m_vgp = gpflow.models.VGP(X, Y, kern, lik)
    m_gpr = gpflow.models.GPR(X, Y, kern)
    m_gpr.likelihood.variance = lik_var

    m_vgp.set_trainable(False)
    m_vgp.q_mu.set_trainable(True)
    m_vgp.q_sqrt.set_trainable(True)
    NatGradOptimizer(1.).minimize(m_vgp, [(m_vgp.q_mu, m_vgp.q_sqrt)],
                                  maxiter=1)

    assert_allclose(m_gpr.compute_log_likelihood(),
                    m_vgp.compute_log_likelihood(),
                    atol=1e-4)
Esempio n. 6
0
def test_small_q_sqrt_handeled_correctly(session_tf):
    """
    This is an extra test to make sure things still work when q_sqrt is small. This was breaking (#767)
    """
    N, D = 3, 2
    X = np.random.randn(N, D)
    Y = np.random.randn(N, 1)
    kern = gpflow.kernels.RBF(D)
    lik_var = 0.1
    lik = gpflow.likelihoods.Gaussian()
    lik.variance = lik_var

    m_vgp = gpflow.models.VGP(X, Y, kern, lik)
    m_gpr = gpflow.models.GPR(X, Y, kern)
    m_gpr.likelihood.variance = lik_var

    m_vgp.set_trainable(False)
    m_vgp.q_mu.set_trainable(True)
    m_vgp.q_sqrt.set_trainable(True)
    m_vgp.q_mu = np.random.randn(N, 1)
    m_vgp.q_sqrt = np.eye(N)[None, :, :] * 1e-3
    NatGradOptimizer(1.).minimize(m_vgp, [(m_vgp.q_mu, m_vgp.q_sqrt)],
                                  maxiter=1)

    assert_allclose(m_gpr.compute_log_likelihood(),
                    m_vgp.compute_log_likelihood(),
                    atol=1e-4)
Esempio n. 7
0
def test_hypers_SVGP_vs_SGPR(session_tf, svgp, sgpr):
    """
    Test SVGP vs SGPR. Combined optimization.

    The logic is as follows:

    SVGP is given on nat grad step with gamma=1. Now it is identical to SGPR (which has
    analytic optimal variational distribution)

    We then take an ordinary gradient step on the hyperparameters (and inducing locations Z)

    Finally we update the variational parameters to their optimal values with another nat grad
    step with gamma=1.

    These three steps are equivalent to an ordinary gradient step on the parameters of SGPR

    In this test we simply make the variational parameters trainable=False, so they are not
    updated by the ordinary gradient step
    """
    anchor = False
    variationals = [(svgp.q_mu, svgp.q_sqrt)]

    svgp.q_mu.trainable = False
    svgp.q_sqrt.trainable = False

    opt = NatGradOptimizer(Datum.gamma)
    opt.minimize(svgp, var_list=variationals, maxiter=1, anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)

    # combination (doing GD first as we've already done the nat grad step
    a1 = GradientDescentOptimizer(
        Datum.learning_rate).make_optimize_action(svgp)
    a2 = NatGradOptimizer(Datum.gamma).make_optimize_action(
        svgp, var_list=variationals)
    Loop([a1, a2]).with_settings(stop=1)()

    GradientDescentOptimizer(Datum.learning_rate).minimize(sgpr,
                                                           maxiter=1,
                                                           anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)
Esempio n. 8
0
def test_hypers_SVGP_vs_SGPR(session_tf, svgp, sgpr):
    """
    Test SVGP vs SGPR. Combined optimization.

    The logic is as follows:

    SVGP is given on nat grad step with gamma=1. Now it is identical to SGPR (which has
    analytic optimal variational distribution)

    We then take an ordinary gradient step on the hyperparameters (and inducing locations Z)

    Finally we update the variational parameters to their optimal values with another nat grad
    step with gamma=1.

    These three steps are equivalent to an ordinary gradient step on the parameters of SGPR

    In this test we simply make the variational parameters trainable=False, so they are not
    updated by the ordinary gradient step
    """
    anchor = False
    variationals = [(svgp.q_mu, svgp.q_sqrt)]

    svgp.q_mu.trainable = False
    svgp.q_sqrt.trainable = False

    opt = NatGradOptimizer(Datum.gamma)
    opt.minimize(svgp, var_list=variationals, maxiter=1, anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)

    # combination (doing GD first as we've already done the nat grad step
    a1 = GradientDescentOptimizer(Datum.learning_rate).make_optimize_action(svgp)
    a2 = NatGradOptimizer(Datum.gamma).make_optimize_action(svgp, var_list=variationals)
    Loop([a1, a2]).with_settings(stop=1)()

    GradientDescentOptimizer(Datum.learning_rate).minimize(sgpr, maxiter=1, anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)
Esempio n. 9
0
def test_hypers_SVGP_vs_SGPR_tensors(session_tf, svgp, sgpr):
    """
    Test SVGP vs SGPR. Running optimization as tensors w/o GPflow wrapper.

    """
    anchor = False
    variationals = [(svgp.q_mu, svgp.q_sqrt)]

    svgp.q_mu.trainable = False
    svgp.q_sqrt.trainable = False

    o1 = NatGradOptimizer(Datum.gamma)
    o1.minimize(svgp, var_list=variationals, maxiter=0, anchor=anchor)
    o1_tensor = o1.minimize_operation

    o2 = GradientDescentOptimizer(Datum.learning_rate)
    o2.minimize(svgp, maxiter=0, anchor=anchor)
    o2_tensor = o2.minimize_operation

    o3 = NatGradOptimizer(Datum.gamma)
    o3.minimize(svgp, var_list=variationals, maxiter=0, anchor=anchor)
    o3_tensor = o3.minimize_operation

    session_tf.run(o1_tensor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)

    session_tf.run(o2_tensor)
    session_tf.run(o3_tensor)

    GradientDescentOptimizer(Datum.learning_rate).minimize(sgpr,
                                                           maxiter=1,
                                                           anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)