Ejemplo n.º 1
0
def _optimizers(lr, mlr0, mlr_decay, learn_lr=True):
    io_optim = far.GradientDescentOptimizer(far.get_hyperparameter('lr', lr) if learn_lr else
                                            tf.constant(lr, name='lr'))
    gs = tf.get_variable('global_step', initializer=0, trainable=False)
    meta_lr = tf.train.inverse_time_decay(mlr0, gs, 1., mlr_decay)
    oo_optim = tf.train.AdamOptimizer(meta_lr)
    farho = far.HyperOptimizer()
    return io_optim, gs, meta_lr, oo_optim, farho
Ejemplo n.º 2
0
accuracy = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out, 1)), tf.float32))

# optimizers
# get an hyperparameter for the learning rate
lr = far.get_hyperparameter('lr', 0.01)
io_optim = far.GradientDescentOptimizer(
    lr)  # for training error minimization an optimizer from far_ho is needed
oo_optim = tf.train.AdamOptimizer(
)  # for outer objective optimizer all optimizers from tf are valid

print('hyperparameters to optimize')
[print(h) for h in far.hyperparameters()]

# build hyperparameter optimizer
farho = far.HyperOptimizer()
run = farho.minimize(val_loss,
                     oo_optim,
                     tr_loss,
                     io_optim,
                     init_dynamics_dict={
                         v: h
                         for v, h in zip(tf.model_variables(),
                                         far.utils.hyperparameters()[:4])
                     })

print(
    'Variables (or tensors) that will store the values of the hypergradients')
print(*far.hypergradients(), sep='\n')

# run hyperparameter optimization
Ejemplo n.º 3
0
        mb_dict['y'].append(y)
        hyper_repr = build_hyper_representation(x, auto_reuse=True)
        logits = classifier(hyper_repr, y)
        ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels=y, logits=logits))
        mb_dict['err'].append(ce)
        mb_dict['acc'].append(accuracy(y, logits))

    L = tf.add_n(mb_dict['err'])
    E = L / meta_batch_size
    mean_acc = tf.add_n(mb_dict['acc'])/meta_batch_size

    inner_opt = far.GradientDescentOptimizer(learning_rate=lr)
    outer_opt = tf.train.AdamOptimizer()

    hyper_step = far.HyperOptimizer().minimize(
        E, outer_opt, L, inner_opt)

    T = 10

    tf.global_variables_initializer().run()

    acc=[]
    val_acc = []

    next = 0

    for ii in range(10):
        for i in range(0, numData, batch*meta_batch_size):
            meta_batch = []
            for m in range(meta_batch_size):
                if next == numSet:
Ejemplo n.º 4
0
w = tf.get_variable('w', initializer=tf.zeros_initializer, shape=(1, ))
b = tf.get_variable('b', initializer=tf.ones_initializer, shape=(2, ))

outer_obj = (w - 2.)**2 / 2. + lmbd**2


#  this should be a callable! yeah
def inner_obj(var_list):
    w = var_list[0]
    obj = (w - L)**2 / 2. + lmbd * (w)**2 / 2 + tf.reduce_sum(var_list[1]**2)
    return obj[0]


io_lip = 1. + lmbd

farho = far.HyperOptimizer(far.ReverseHg())
if run_gd:
    inner_obj = inner_obj([w, b])
    if right_step:
        gd = far.GradientDescentOptimizer(2 * kappa / io_lip)
    else:
        gd = far.GradientDescentOptimizer(1.)
else:
    gd = far.BackTrackingGradientDescentOptimizer(tf.constant(1.))

run = farho.minimize(outer_obj,
                     tf.train.GradientDescentOptimizer(0.01),
                     inner_obj,
                     gd,
                     var_list=[w, b],
                     hyper_list=[lmbd])
Ejemplo n.º 5
0
def build(metasets,
          hyper_model_builder,
          learn_lr,
          lr0,
          MBS,
          mlr0,
          mlr_decay,
          batch_norm_before_classifier,
          weights_initializer,
          process_fn=None):
    exs = [em.SLExperiment(metasets) for _ in range(MBS)]

    hyper_repr_model = hyper_model_builder(exs[0].x, 'HyperRepr')

    if learn_lr:
        lr = far.get_hyperparameter('lr', lr0)
    else:
        lr = tf.constant(lr0, name='lr')

    gs = tf.get_variable('global_step', initializer=0, trainable=False)
    meta_lr = tf.train.inverse_time_decay(mlr0,
                                          gs,
                                          decay_steps=1.,
                                          decay_rate=mlr_decay)

    io_opt = far.GradientDescentOptimizer(lr)
    oo_opt = tf.train.AdamOptimizer(meta_lr)
    far_ho = far.HyperOptimizer()

    for k, ex in enumerate(exs):
        # print(k)  # DEBUG
        with tf.device(available_devices[k % len(available_devices)]):
            repr_out = hyper_repr_model.for_input(ex.x).out

            other_train_vars = []
            if batch_norm_before_classifier:
                batch_mean, batch_var = tf.nn.moments(repr_out, [0])
                scale = tf.Variable(tf.ones_like(repr_out[0]))
                beta = tf.Variable(tf.zeros_like(repr_out[0]))
                other_train_vars.append(scale)
                other_train_vars.append(beta)
                repr_out = tf.nn.batch_normalization(repr_out, batch_mean,
                                                     batch_var, beta, scale,
                                                     1e-3)

            ex.model = em.models.FeedForwardNet(
                repr_out,
                metasets.train.dim_target,
                output_weight_initializer=weights_initializer,
                name='Classifier_%s' % k)

            ex.errors['training'] = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=ex.y,
                                                        logits=ex.model.out))
            ex.errors['validation'] = ex.errors['training']
            ex.scores['accuracy'] = tf.reduce_mean(tf.cast(
                tf.equal(tf.argmax(ex.y, 1), tf.argmax(ex.model.out, 1)),
                tf.float32),
                                                   name='accuracy')

            # simple training step used for testing (look
            ex.optimizers['ts'] = tf.train.GradientDescentOptimizer(
                lr).minimize(ex.errors['training'], var_list=ex.model.var_list)

            optim_dict = far_ho.inner_problem(ex.errors['training'],
                                              io_opt,
                                              var_list=ex.model.var_list +
                                              other_train_vars)
            far_ho.outer_problem(ex.errors['validation'],
                                 optim_dict,
                                 oo_opt,
                                 hyper_list=tf.get_collection(
                                     far.GraphKeys.HYPERPARAMETERS),
                                 global_step=gs)

    far_ho.finalize(process_fn=process_fn)
    saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES),
                           max_to_keep=240)
    return exs, far_ho, saver