Exemple #1
0
def run_with_adam_and_nat(model, lr, iterations, callback=None, gamma=0.001):
    if gamma == 0:
        adam = AdamOptimizer(lr).make_optimize_action(model)
        actions = [adam]
        actions = actions if callback is None else actions + [callback]

        Loop(actions, stop=iterations)()
        model.anchor(model.enquire_session())
        return

    var_list = [(model.f_latent.q_mu, model.f_latent.q_sqrt)]

    # we don't want adam optimizing these
    model.f_latent.q_mu.set_trainable(False)
    model.f_latent.q_sqrt.set_trainable(False)

    adam = AdamOptimizer(lr).make_optimize_action(model)
    natgrad = NatGradOptimizer(gamma).make_optimize_action(model,
                                                           var_list=var_list)

    actions = [adam, natgrad]
    actions = actions if callback is None else actions + [callback]

    Loop(actions, stop=iterations)()
    model.anchor(model.enquire_session())
Exemple #2
0
def train_with_adam(model, iterations, callback=None, **kwargs):
    #,initial_learning_rate=0.03,learning_rate_steps=2.3,
    #          learning_rate_decay=1.5,

    with tf.variable_scope("learning_rate"):
        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 0.03
        decay_steps = int(iterations / 2.)
        decay_rate = 1. / 1.5
        learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                   tf.assign_add(
                                                       global_step, 1),
                                                   decay_steps,
                                                   decay_rate,
                                                   staircase=True)
    tf.summary.scalar("optimisation/learning_rate", learning_rate)
    sess = model.enquire_session()
    tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                scope='learning_rate')
    sess.run(tf.variables_initializer(var_list=tf_vars))

    assert isinstance(callback, (tuple, list))
    adam = AdamOptimizer(learning_rate).make_optimize_action(model)
    actions = [adam]
    actions = actions if callback is None else actions + callback
    for c in callback:
        try:
            c.init()
        except:
            pass

    Loop(actions, stop=iterations)()
    model.anchor(model.enquire_session())
def run_with_adam(model, lr, iterations, callback=None):

    adam = AdamOptimizer(lr).make_optimize_action(model)

    actions = [adam]  #natgrad,
    actions = actions if callback is None else actions + [callback]

    Loop(actions, stop=iterations)()
    model.anchor(model.enquire_session())
    def train_model(self, dgp_model):


        ng_vars = [[dgp_model.layers[-1].q_mu, dgp_model.layers[-1].q_sqrt]]
        for v in ng_vars[0]:
            v.set_trainable(False)
        ng_action = NatGradOptimizer(gamma=0.1).make_optimize_action(dgp_model, var_list=ng_vars)
        adam_action = AdamOptimizer(0.01).make_optimize_action(dgp_model)

        iterations = 10000
        try:
            Loop([ng_action, adam_action], stop=iterations)()
        except:
            print('Failure of Cholesky in Nat Gradient')

        # sess = dgp_model.enquire_session()
        #
        # gamma_start = 1e-2
        # gamma_max = 1e-1
        # gamma_step = 1e-2
        #
        # gamma = tf.Variable(gamma_start, dtype=tf.float64)
        # gamma_incremented = tf.where(tf.less(gamma, gamma_max), gamma + gamma_step, gamma_max)
        #
        # op_ng = NatGradOptimizer(gamma).make_optimize_tensor(dgp_model, var_list=[[dgp_model.layers[-1].q_mu,
        #                                                                            dgp_model.layers[-1].q_sqrt]])
        # op_adam = AdamOptimizer(0.001).make_optimize_tensor(dgp_model)
        # op_increment_gamma = tf.assign(gamma, gamma_incremented)
        #
        # gamma_fallback = 1e-1  # we'll reduce by this factor if there's a cholesky failure
        # op_fallback_gamma = tf.assign(gamma, gamma * gamma_fallback)
        #
        # sess.run(tf.variables_initializer([gamma]))
        #
        # iterations = 10000
        # for it in range(iterations):
        #     try:
        #         sess.run(op_ng)
        #         sess.run(op_increment_gamma)
        #     except tf.errors.InvalidArgumentError:
        #         g = sess.run(gamma)
        #         print('gamma = {} on iteration {} is too big! Falling back to {}'.format(it, g, g * gamma_fallback))
        #         sess.run(op_fallback_gamma)
        #
        #     sess.run(op_adam)
        #
        #     if it % 1000 == 0:
        #         print('{} gamma={:.4f} ELBO={:.4f}'.format(it, *sess.run([gamma, dgp_model.likelihood_tensor])))
        #
        # dgp_model.anchor(sess)
        # # print(len(tf.all_variables()))
        # # print(len(tf.get_default_graph().get_operations()))
        sess = dgp_model.enquire_session()
        dgp_model.anchor(sess)
        print('ELBO={:.4f}'.format(*sess.run([dgp_model.likelihood_tensor])))
        return dgp_model
Exemple #5
0
 def _optimize(self, retry=0, error=None):
     numiter = self.flags.test_every
     max_retries = 5
     if retry > max_retries:
         raise error
     try:
         Loop(self.loop, stop=numiter)()
     except tf.errors.InvalidArgumentError as exception:
         if self.flags.optimizer != "NatGrad":
             raise exception
         self.step_back_gamma()
         self._optimize(retry=retry + 1, error=exception)
Exemple #6
0
def train_with_bfgs(model, learning_rate, iterations, callback=None):

    sess = model.enquire_session()

    assert isinstance(callback, (tuple, list))
    for c in callback:
        c.init()
    adam = ScipyOptimizer().make_optimize_action(model)
    actions = [adam]
    actions = actions if callback is None else actions + callback

    Loop(actions)()
    model.anchor(model.enquire_session())
def test_hypers_SVGP_vs_SGPR(session_tf, svgp, sgpr):
    """
    Test SVGP vs SGPR. Combined optimization.

    The logic is as follows:

    SVGP is given on nat grad step with gamma=1. Now it is identical to SGPR (which has
    analytic optimal variational distribution)

    We then take an ordinary gradient step on the hyperparameters (and inducing locations Z)

    Finally we update the variational parameters to their optimal values with another nat grad
    step with gamma=1.

    These three steps are equivalent to an ordinary gradient step on the parameters of SGPR

    In this test we simply make the variational parameters trainable=False, so they are not
    updated by the ordinary gradient step
    """
    anchor = False
    variationals = [(svgp.q_mu, svgp.q_sqrt)]

    svgp.q_mu.trainable = False
    svgp.q_sqrt.trainable = False

    opt = NatGradOptimizer(Datum.gamma)
    opt.minimize(svgp, var_list=variationals, maxiter=1, anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)

    # combination (doing GD first as we've already done the nat grad step
    a1 = GradientDescentOptimizer(
        Datum.learning_rate).make_optimize_action(svgp)
    a2 = NatGradOptimizer(Datum.gamma).make_optimize_action(
        svgp, var_list=variationals)
    Loop([a1, a2]).with_settings(stop=1)()

    GradientDescentOptimizer(Datum.learning_rate).minimize(sgpr,
                                                           maxiter=1,
                                                           anchor=anchor)

    sgpr_likelihood = sgpr.compute_log_likelihood()
    svgp_likelihood = svgp.compute_log_likelihood()
    assert_allclose(sgpr_likelihood, svgp_likelihood, atol=1e-5)
Exemple #8
0
 def run_adam(self, lr, iterations):
     adam = AdamOptimizer(lr).make_optimize_action(self)
     actions = [adam, PrintAction(self, "MF-DGP with Adam")]
     loop = Loop(actions, stop=iterations)()
     self.anchor(self.enquire_session())
Exemple #9
0
def train_with_nat(model,
                   gamma_start=1e-5,
                   gamma_add=1e-3,
                   gamma_mul=1.04,
                   gamma_max=0.1,
                   gamma_fallback=1e-1,
                   iterations=500,
                   var_list=None,
                   callback=None,
                   **kwargs):
    # we'll make use of this later when we use a XiTransform
    if var_list is None:
        var_list = [[model.q_mu, model.q_sqrt]]

    with tf.variable_scope("gamma"):

        gamma_start = tf.cast(gamma_start, tf.float64)
        gamma_max = tf.cast(gamma_max, tf.float64)
        mul_step = tf.cast(gamma_mul, tf.float64)
        add_step = tf.cast(gamma_add, tf.float64)
        gamma = tf.Variable(gamma_start, dtype=tf.float64, trainable=False)

        gamma_ref = tf.identity(gamma)

        gamma_fallback = tf.cast(
            gamma_fallback, tf.float64
        )  # we'll reduce by this factor if there's a cholesky failure
        op_fallback_gamma = tf.assign(gamma, gamma_ref * gamma_fallback)
        diff = tf.where(gamma_ref * mul_step < add_step, gamma_ref * mul_step,
                        add_step)
        op_gamma_inc = tf.assign(
            gamma,
            tf.where(gamma_ref + diff > gamma_max, gamma_max,
                     gamma_ref + diff))

    tf.summary.scalar("optimisation/gamma", gamma)
    sess = model.enquire_session()
    tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='gamma')
    sess.run(tf.variables_initializer(var_list=tf_vars))

    natgrad = NatGradOptimizer(gamma_ref).make_optimize_action(
        model, var_list=var_list)

    actions = [natgrad, GammaSchedule(op_gamma_inc)]
    actions = actions if callback is None else actions + callback

    for c in callback:
        try:
            c.init()
        except:
            pass

    sess = model.enquire_session()
    it = 0
    while it < iterations:
        try:
            looper = Loop(actions, start=it, stop=iterations)
            looper()
            it = looper.iteration
        except tf.errors.InvalidArgumentError:
            it = looper.iteration
            g, gf = sess.run([gamma_ref, op_fallback_gamma])
            logging.info(
                'gamma = {} on iteration {} is too big! Falling back to {}'.
                format(g, it, gf))

    model.anchor(model.enquire_session())
Exemple #10
0
def train_with_nat_and_adam(model,
                            initial_learning_rate=0.03,
                            learning_rate_steps=2,
                            learning_rate_decay=1.5,
                            gamma_start=1e-5,
                            gamma_add=1e-3,
                            gamma_mul=1.1,
                            gamma_max=0.1,
                            gamma_fallback=1e-1,
                            iterations=500,
                            var_list=None,
                            callback=None,
                            **kwargs):
    # we'll make use of this later when we use a XiTransform
    if var_list is None:
        var_list = [[model.q_mu, model.q_sqrt]]

    # we don't want adam optimizing these
    model.q_mu.set_trainable(False)
    model.q_sqrt.set_trainable(False)

    with tf.variable_scope("learning_rate"):
        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = initial_learning_rate
        decay_steps = int(iterations / learning_rate_steps)
        decay_rate = 1. / learning_rate_decay
        learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                   tf.assign_add(
                                                       global_step, 1),
                                                   decay_steps,
                                                   decay_rate,
                                                   staircase=True)
    tf.summary.scalar("optimisation/learning_rate", learning_rate)
    sess = model.enquire_session()
    tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                scope='learning_rate')
    sess.run(tf.variables_initializer(var_list=tf_vars))

    with tf.variable_scope("gamma"):

        #        gamma = tf.Variable(gamma_start, dtype=tf.float64)
        #        beta = tf.Variable(1.,dtype=tf.float64)

        gamma_start = tf.cast(gamma_start, tf.float64)
        gamma_max = tf.cast(gamma_max, tf.float64)
        mul_step = tf.cast(gamma_mul, tf.float64)
        add_step = tf.cast(gamma_add, tf.float64)
        gamma = tf.Variable(gamma_start, dtype=tf.float64)

        gamma_ref = tf.identity(gamma)

        gamma_fallback = tf.cast(
            gamma_fallback, tf.float64
        )  # we'll reduce by this factor if there's a cholesky failure
        op_fallback_gamma = tf.assign(gamma, gamma * gamma_fallback)
        diff = tf.where(gamma_ref * mul_step < add_step, gamma_ref * mul_step,
                        add_step)
        op_gamma_inc = tf.assign(
            gamma,
            tf.where(gamma_ref + diff > gamma_max, gamma_max,
                     gamma_ref + diff))

    tf.summary.scalar("optimisation/gamma", gamma)
    sess = model.enquire_session()
    tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='gamma')
    sess.run(tf.variables_initializer(var_list=tf_vars))

    natgrad = NatGradOptimizer(gamma_ref).make_optimize_action(
        model, var_list=var_list)
    adam = AdamOptimizer(learning_rate).make_optimize_action(model)

    actions = [adam, natgrad, GammaSchedule(op_gamma_inc)]
    actions = actions if callback is None else actions + callback
    for c in callback:
        try:
            c.init()
        except:
            pass

    sess = model.enquire_session()
    it = 0
    while it < iterations:
        try:
            looper = Loop(actions, start=it, stop=iterations)
            looper()
            it = looper.iteration
        except tf.errors.InvalidArgumentError:
            it = looper.iteration
            g, gf = sess.run([gamma_ref, op_fallback_gamma])
            logging.info(
                'gamma = {} on iteration {} is too big! Falling back to {}'.
                format(g, it, gf))

    model.anchor(model.enquire_session())