def _optimizers(lr, mlr0, mlr_decay, learn_lr=True): io_optim = far.GradientDescentOptimizer(far.get_hyperparameter('lr', lr) if learn_lr else tf.constant(lr, name='lr')) gs = tf.get_variable('global_step', initializer=0, trainable=False) meta_lr = tf.train.inverse_time_decay(mlr0, gs, 1., mlr_decay) oo_optim = tf.train.AdamOptimizer(meta_lr) farho = far.HyperOptimizer() return io_optim, gs, meta_lr, oo_optim, farho
# build loss and accuracy # inner objective (training error), weighted mean of cross entropy errors (with sigmoid to be sure is > 0) with tf.name_scope('errors'): tr_loss = tf.reduce_mean( tf.sigmoid(weights) * tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out)) # outer objective (validation error) (not weighted) val_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out)) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out, 1)), tf.float32)) # optimizers # get an hyperparameter for the learning rate lr = far.get_hyperparameter('lr', 0.01) io_optim = far.GradientDescentOptimizer( lr) # for training error minimization an optimizer from far_ho is needed oo_optim = tf.train.AdamOptimizer( ) # for outer objective optimizer all optimizers from tf are valid print('hyperparameters to optimize') [print(h) for h in far.hyperparameters()] # build hyperparameter optimizer farho = far.HyperOptimizer() run = farho.minimize(val_loss, oo_optim, tr_loss, io_optim, init_dynamics_dict={ v: h for v, h in zip(tf.model_variables(),
for _ in range(meta_batch_size): x, y = get_placeholders() mb_dict['x'].append(x) mb_dict['y'].append(y) hyper_repr = build_hyper_representation(x, auto_reuse=True) logits = classifier(hyper_repr, y) ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( labels=y, logits=logits)) mb_dict['err'].append(ce) mb_dict['acc'].append(accuracy(y, logits)) L = tf.add_n(mb_dict['err']) E = L / meta_batch_size mean_acc = tf.add_n(mb_dict['acc'])/meta_batch_size inner_opt = far.GradientDescentOptimizer(learning_rate=lr) outer_opt = tf.train.AdamOptimizer() hyper_step = far.HyperOptimizer().minimize( E, outer_opt, L, inner_opt) T = 10 tf.global_variables_initializer().run() acc=[] val_acc = [] next = 0 for ii in range(10):
return logits x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x') y = tf.placeholder(tf.float32, shape=(None, 10), name='y') logits = g_logits(x, y) train_set, validation_set = get_data() lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples)) lr = far.get_hyperparameter('lr', initializer=0.01) ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits) L = tf.reduce_mean(tf.sigmoid(lambdas) * ce) E = tf.reduce_mean(ce) inner_optimizer = far.GradientDescentOptimizer(lr) outer_optimizer = tf.train.AdamOptimizer() hyper_step = far.HyperOptimizer().minimize(E, outer_optimizer, L, inner_optimizer) T = 200 # Number of inner iterations train_set_supplier = train_set.create_supplier(x, y) validation_set_supplier = validation_set.create_supplier(x, y) tf.global_variables_initializer().run() print('inner:', L.eval(train_set_supplier())) print('outer:', E.eval(validation_set_supplier())) # print('-'*50) n_hyper_iterations = 10 for _ in range(n_hyper_iterations): hyper_step(T,
# this should be a callable! yeah def inner_obj(var_list): w = var_list[0] obj = (w - L)**2 / 2. + lmbd * (w)**2 / 2 + tf.reduce_sum(var_list[1]**2) return obj[0] io_lip = 1. + lmbd farho = far.HyperOptimizer(far.ReverseHg()) if run_gd: inner_obj = inner_obj([w, b]) if right_step: gd = far.GradientDescentOptimizer(2 * kappa / io_lip) else: gd = far.GradientDescentOptimizer(1.) else: gd = far.BackTrackingGradientDescentOptimizer(tf.constant(1.)) run = farho.minimize(outer_obj, tf.train.GradientDescentOptimizer(0.01), inner_obj, gd, var_list=[w, b], hyper_list=[lmbd]) tf.global_variables_initializer().run() rs = []
def build(metasets, hyper_model_builder, learn_lr, lr0, MBS, mlr0, mlr_decay, batch_norm_before_classifier, weights_initializer, process_fn=None): exs = [em.SLExperiment(metasets) for _ in range(MBS)] hyper_repr_model = hyper_model_builder(exs[0].x, 'HyperRepr') if learn_lr: lr = far.get_hyperparameter('lr', lr0) else: lr = tf.constant(lr0, name='lr') gs = tf.get_variable('global_step', initializer=0, trainable=False) meta_lr = tf.train.inverse_time_decay(mlr0, gs, decay_steps=1., decay_rate=mlr_decay) io_opt = far.GradientDescentOptimizer(lr) oo_opt = tf.train.AdamOptimizer(meta_lr) far_ho = far.HyperOptimizer() for k, ex in enumerate(exs): # print(k) # DEBUG with tf.device(available_devices[k % len(available_devices)]): repr_out = hyper_repr_model.for_input(ex.x).out other_train_vars = [] if batch_norm_before_classifier: batch_mean, batch_var = tf.nn.moments(repr_out, [0]) scale = tf.Variable(tf.ones_like(repr_out[0])) beta = tf.Variable(tf.zeros_like(repr_out[0])) other_train_vars.append(scale) other_train_vars.append(beta) repr_out = tf.nn.batch_normalization(repr_out, batch_mean, batch_var, beta, scale, 1e-3) ex.model = em.models.FeedForwardNet( repr_out, metasets.train.dim_target, output_weight_initializer=weights_initializer, name='Classifier_%s' % k) ex.errors['training'] = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=ex.y, logits=ex.model.out)) ex.errors['validation'] = ex.errors['training'] ex.scores['accuracy'] = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(ex.y, 1), tf.argmax(ex.model.out, 1)), tf.float32), name='accuracy') # simple training step used for testing (look ex.optimizers['ts'] = tf.train.GradientDescentOptimizer( lr).minimize(ex.errors['training'], var_list=ex.model.var_list) optim_dict = far_ho.inner_problem(ex.errors['training'], io_opt, var_list=ex.model.var_list + other_train_vars) far_ho.outer_problem(ex.errors['validation'], optim_dict, oo_opt, hyper_list=tf.get_collection( far.GraphKeys.HYPERPARAMETERS), global_step=gs) far_ho.finalize(process_fn=process_fn) saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES), max_to_keep=240) return exs, far_ho, saver