def _optimizers(lr, mlr0, mlr_decay, learn_lr=True): io_optim = far.GradientDescentOptimizer(far.get_hyperparameter('lr', lr) if learn_lr else tf.constant(lr, name='lr')) gs = tf.get_variable('global_step', initializer=0, trainable=False) meta_lr = tf.train.inverse_time_decay(mlr0, gs, 1., mlr_decay) oo_optim = tf.train.AdamOptimizer(meta_lr) farho = far.HyperOptimizer() return io_optim, gs, meta_lr, oo_optim, farho
accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out, 1)), tf.float32)) # optimizers # get an hyperparameter for the learning rate lr = far.get_hyperparameter('lr', 0.01) io_optim = far.GradientDescentOptimizer( lr) # for training error minimization an optimizer from far_ho is needed oo_optim = tf.train.AdamOptimizer( ) # for outer objective optimizer all optimizers from tf are valid print('hyperparameters to optimize') [print(h) for h in far.hyperparameters()] # build hyperparameter optimizer farho = far.HyperOptimizer() run = farho.minimize(val_loss, oo_optim, tr_loss, io_optim, init_dynamics_dict={ v: h for v, h in zip(tf.model_variables(), far.utils.hyperparameters()[:4]) }) print( 'Variables (or tensors) that will store the values of the hypergradients') print(*far.hypergradients(), sep='\n') # run hyperparameter optimization
mb_dict['y'].append(y) hyper_repr = build_hyper_representation(x, auto_reuse=True) logits = classifier(hyper_repr, y) ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( labels=y, logits=logits)) mb_dict['err'].append(ce) mb_dict['acc'].append(accuracy(y, logits)) L = tf.add_n(mb_dict['err']) E = L / meta_batch_size mean_acc = tf.add_n(mb_dict['acc'])/meta_batch_size inner_opt = far.GradientDescentOptimizer(learning_rate=lr) outer_opt = tf.train.AdamOptimizer() hyper_step = far.HyperOptimizer().minimize( E, outer_opt, L, inner_opt) T = 10 tf.global_variables_initializer().run() acc=[] val_acc = [] next = 0 for ii in range(10): for i in range(0, numData, batch*meta_batch_size): meta_batch = [] for m in range(meta_batch_size): if next == numSet:
w = tf.get_variable('w', initializer=tf.zeros_initializer, shape=(1, )) b = tf.get_variable('b', initializer=tf.ones_initializer, shape=(2, )) outer_obj = (w - 2.)**2 / 2. + lmbd**2 # this should be a callable! yeah def inner_obj(var_list): w = var_list[0] obj = (w - L)**2 / 2. + lmbd * (w)**2 / 2 + tf.reduce_sum(var_list[1]**2) return obj[0] io_lip = 1. + lmbd farho = far.HyperOptimizer(far.ReverseHg()) if run_gd: inner_obj = inner_obj([w, b]) if right_step: gd = far.GradientDescentOptimizer(2 * kappa / io_lip) else: gd = far.GradientDescentOptimizer(1.) else: gd = far.BackTrackingGradientDescentOptimizer(tf.constant(1.)) run = farho.minimize(outer_obj, tf.train.GradientDescentOptimizer(0.01), inner_obj, gd, var_list=[w, b], hyper_list=[lmbd])
def build(metasets, hyper_model_builder, learn_lr, lr0, MBS, mlr0, mlr_decay, batch_norm_before_classifier, weights_initializer, process_fn=None): exs = [em.SLExperiment(metasets) for _ in range(MBS)] hyper_repr_model = hyper_model_builder(exs[0].x, 'HyperRepr') if learn_lr: lr = far.get_hyperparameter('lr', lr0) else: lr = tf.constant(lr0, name='lr') gs = tf.get_variable('global_step', initializer=0, trainable=False) meta_lr = tf.train.inverse_time_decay(mlr0, gs, decay_steps=1., decay_rate=mlr_decay) io_opt = far.GradientDescentOptimizer(lr) oo_opt = tf.train.AdamOptimizer(meta_lr) far_ho = far.HyperOptimizer() for k, ex in enumerate(exs): # print(k) # DEBUG with tf.device(available_devices[k % len(available_devices)]): repr_out = hyper_repr_model.for_input(ex.x).out other_train_vars = [] if batch_norm_before_classifier: batch_mean, batch_var = tf.nn.moments(repr_out, [0]) scale = tf.Variable(tf.ones_like(repr_out[0])) beta = tf.Variable(tf.zeros_like(repr_out[0])) other_train_vars.append(scale) other_train_vars.append(beta) repr_out = tf.nn.batch_normalization(repr_out, batch_mean, batch_var, beta, scale, 1e-3) ex.model = em.models.FeedForwardNet( repr_out, metasets.train.dim_target, output_weight_initializer=weights_initializer, name='Classifier_%s' % k) ex.errors['training'] = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=ex.y, logits=ex.model.out)) ex.errors['validation'] = ex.errors['training'] ex.scores['accuracy'] = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(ex.y, 1), tf.argmax(ex.model.out, 1)), tf.float32), name='accuracy') # simple training step used for testing (look ex.optimizers['ts'] = tf.train.GradientDescentOptimizer( lr).minimize(ex.errors['training'], var_list=ex.model.var_list) optim_dict = far_ho.inner_problem(ex.errors['training'], io_opt, var_list=ex.model.var_list + other_train_vars) far_ho.outer_problem(ex.errors['validation'], optim_dict, oo_opt, hyper_list=tf.get_collection( far.GraphKeys.HYPERPARAMETERS), global_step=gs) far_ho.finalize(process_fn=process_fn) saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES), max_to_keep=240) return exs, far_ho, saver