Example #1
0
def _optimizers(lr, mlr0, mlr_decay, learn_lr=True):
    io_optim = far.GradientDescentOptimizer(far.get_hyperparameter('lr', lr) if learn_lr else
                                            tf.constant(lr, name='lr'))
    gs = tf.get_variable('global_step', initializer=0, trainable=False)
    meta_lr = tf.train.inverse_time_decay(mlr0, gs, 1., mlr_decay)
    oo_optim = tf.train.AdamOptimizer(meta_lr)
    farho = far.HyperOptimizer()
    return io_optim, gs, meta_lr, oo_optim, farho
def _optimizers(lr, mlr0, mlr_decay, learn_lr=True):
    io_optim = far.GradientDescentOptimizer(far.get_hyperparameter('lr', lr) if learn_lr else
                                            tf.constant(lr, name='lr'))
    gs = tf.get_variable('global_step', initializer=0, trainable=False)
    meta_lr = tf.train.inverse_time_decay(mlr0, gs, 1., mlr_decay)
    oo_optim = tf.train.AdamOptimizer(meta_lr)
    farho = far.HyperOptimizer()
    return io_optim, gs, meta_lr, oo_optim, farho
def get_stc_hyperparameter(name,
                           initializer=None,
                           shape=None,
                           constraints=None,
                           sample_func=None,
                           hyper_probs=None):
    """
    Get a stochastic hyperparameter. Defaults to Bernoulli hyperparameter. Mostly follows the signature of
    `tf.get_variable`

    :param name: a name for the hyperparameter
    :param initializer: an initializer (or initial value) for the parameters of the distribution
    :param shape: a shape for the stochastic hyperparameter
    :param constraints: additional (simple) constraints for the parameters of the distribution
    :param sample_func: a function that takes the distribution parameters and returns a sample
    :param hyper_probs: the variables used for the underlying probability distribution
    :return: The stochastic hyperparameter (not the distribution variables!)
    """
    if constraints is None:
        constraints = lambda _v: tf.maximum(tf.minimum(_v, 1.), 0.)
    if hyper_probs is None:  # creates the hyperparameter that is also used for sampling
        hyper_probs = tf.get_variable(name + '/' + GraphKeys.STOCHASTIC_HYPER,
                                      trainable=False,
                                      constraint=constraints,
                                      initializer=initializer,
                                      shape=shape,
                                      collections=[
                                          GraphKeys.GLOBAL_VARIABLES,
                                          GraphKeys.STOCHASTIC_HYPER
                                      ])
    if sample_func is None:
        sample_func = bernoulli_hard_sample
    hyper_sample = far.get_hyperparameter(
        name,
        initializer=sample_func(hyper_probs),
        collections=GraphKeys.STOCHASTIC_HYPER)
    far.utils.remove_from_collection(GraphKeys.GLOBAL_VARIABLES, hyper_sample)
    with tf.control_dependencies([tf.variables_initializer([hyper_sample])
                                  ]):  # re-initialize and return the value
        _STC_INITIALIZERs[hyper_sample] = hyper_sample.read_value()

    _STC_MAP[hyper_sample] = hyper_probs

    return hyper_sample
Example #4
0
    h1_hyp = tcl.fully_connected(
        x,
        300,
        variables_collections=far.HYPERPARAMETERS_COLLECTIONS,
        trainable=False)
    out_hyp = tcl.fully_connected(
        h1_hyp,
        datasets.train.dim_target,
        variables_collections=far.HYPERPARAMETERS_COLLECTIONS,
        trainable=False)
    print('Initial model weights (hyperparameters)')
    [print(e) for e in far.utils.hyperparameters()]
#     far.utils.remove_from_collection(far.GraphKeys.MODEL_VARIABLES, *far.utils.hyperparameters())

# get an hyperparameter for weighting the examples for the inner objective loss (training error)
weights = far.get_hyperparameter('ex_weights', tf.zeros(batch))

# build loss and accuracy
# inner objective (training error), weighted mean of cross entropy errors (with sigmoid to be sure is > 0)
with tf.name_scope('errors'):
    tr_loss = tf.reduce_mean(
        tf.sigmoid(weights) *
        tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out))
    # outer objective (validation error) (not weighted)
    val_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out))
accuracy = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out, 1)), tf.float32))

# optimizers
# get an hyperparameter for the learning rate
# In[16]:

try: ss.close()
except: pass
tf.reset_default_graph()
ss = tf.InteractiveSession()

v1 = tf.Variable([1.,3])

v2 = tf.Variable([[-1., -2], [1., 0.]])


# In[17]:

lmbd = far.get_hyperparameter('lambda', 
                              initializer=tf.ones_initializer,
                             shape=v2.get_shape())

cost = tf.reduce_mean(v1**2) + tf.reduce_sum(lmbd*v2**2)

io_optim = far.AdamOptimizer(epsilon=1.e-6)

#io_optim = far.MomentumOptimizer(far.get_hyperparameter('eta', 0.1), far.get_hyperparameter('mu', .9))
io_optim_dict = io_optim.minimize(cost)

oo = tf.reduce_mean(v1*v2)


# In[18]:

rhg = far.ReverseHg()
    return datasets.train, datasets.validation


def g_logits(x, y):
    with tf.variable_scope('model'):
        h1 = layers.fully_connected(x, 300)
        logits = layers.fully_connected(h1, int(y.shape[1]))
    return logits


x = tf.placeholder(tf.float32, shape=(None, 28**2), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
logits = g_logits(x, y)
train_set, validation_set = get_data()

lambdas = far.get_hyperparameter('lambdas', tf.zeros(train_set.num_examples))
lr = far.get_hyperparameter('lr', initializer=0.01)

ce = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
L = tf.reduce_mean(tf.sigmoid(lambdas) * ce)
E = tf.reduce_mean(ce)

inner_optimizer = far.GradientDescentOptimizer(lr)
outer_optimizer = tf.train.AdamOptimizer()
hyper_step = far.HyperOptimizer().minimize(E, outer_optimizer, L,
                                           inner_optimizer)

T = 200  # Number of inner iterations
train_set_supplier = train_set.create_supplier(x, y)
validation_set_supplier = validation_set.create_supplier(x, y)
tf.global_variables_initializer().run()
Example #7
0
import far_ho as far
import tensorflow as tf
import numpy as np

run_gd = False  # true for constant step size
right_step = False

tf.reset_default_graph()
ss = tf.InteractiveSession()

L = tf.constant(10.65158)
kappa = .25
lmbd = far.get_hyperparameter('lmbd', .008921)

# L / (1 + lmbd)
sol = L / (1 + lmbd)
# w = tf.get_variable('w', initializer=sol)
w = tf.get_variable('w', initializer=tf.zeros_initializer, shape=(1, ))
b = tf.get_variable('b', initializer=tf.ones_initializer, shape=(2, ))

outer_obj = (w - 2.)**2 / 2. + lmbd**2


#  this should be a callable! yeah
def inner_obj(var_list):
    w = var_list[0]
    obj = (w - L)**2 / 2. + lmbd * (w)**2 / 2 + tf.reduce_sum(var_list[1]**2)
    return obj[0]


io_lip = 1. + lmbd
Example #8
0
try:
    ss.close()
except:
    pass
tf.reset_default_graph()
ss = tf.InteractiveSession()

v1 = tf.Variable([1., 3])

v2 = tf.Variable([[-1., -2], [1., 0.]])

# In[17]:

lmbd = far.get_hyperparameter('lambda',
                              initializer=tf.ones_initializer,
                              shape=v2.get_shape())

cost = tf.reduce_mean(v1**2) + tf.reduce_sum(lmbd * v2**2)

io_optim = far.AdamOptimizer(epsilon=1.e-6)

#io_optim = far.MomentumOptimizer(far.get_hyperparameter('eta', 0.1), far.get_hyperparameter('mu', .9))
io_optim_dict = io_optim.minimize(cost)

oo = tf.reduce_mean(v1 * v2)

# In[18]:

rhg = far.ReverseHG()
rhg.compute_gradients(oo, io_optim_dict)
        # build loss and accuracy
        # inner objective (training error), weighted mean of cross entropy errors (with sigmoid to be sure is > 0)
        with tf.name_scope('errors'):
            #tr_loss = tf.reduce_mean(tf.sigmoid(weights)*tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out))
            #tr_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out))
            # outer objective (validation error) (not weighted)
            val_loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=out))
            accuracy = tf.reduce_mean(
                tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out, 1)),
                        tf.float32))

        # optimizers
        # get an hyperparameter for the learning rate
        lr = far.get_hyperparameter('lr', 0.01)
        io_optim = far.GradientDescentOptimizer(
            lr
        )  # for training error minimization an optimizer from far_ho is needed
        oo_optim = tf.train.AdamOptimizer(
        )  # for outer objective optimizer all optimizers from tf are valid

        print('hyperparameters to optimize')
        [print(h) for h in far.hyperparameters()]

        # build hyperparameter optimizer
        farho = far.HyperOptimizer()
        run = farho.minimize(
            val_loss,
            oo_optim,
            val_loss,
Example #10
0
import tensorflow as tf
import far_ho as far


tf.reset_default_graph()
ss = tf.InteractiveSession()

v1 = tf.Variable([10., 3])

v2 = tf.Variable([[-1., -2], [1., -21.]])


# In[17]:

lmbd = far.get_hyperparameter('lambda', 
                              initializer=tf.ones_initializer, shape=v2.get_shape())

reg2 = far.get_hyperparameter('reg2', 0.1)

eta = far.get_hyperparameter('eta', 0.1)
beta1 = far.get_hyperparameter('beta1', 1.)
beta2 = far.get_hyperparameter('beta2', 2.)

# noinspection PyTypeChecker
cost = tf.reduce_mean(v1**2) + tf.reduce_sum(lmbd*v2**2) + reg2*tf.nn.l2_loss(v1)

io_optim = far.AdamOptimizer(eta, tf.nn.sigmoid(beta1), tf.nn.sigmoid(beta2), epsilon=1.e-4)

oo = tf.reduce_mean(v1*v2)

rhg = far.ReverseHG()
Example #11
0
                       initializer=w_init,
                       collections=far.HYPERPARAMETERS_COLLECTIONS,
                       trainable=False)
  fb_hyp = tf.get_variable('fb_hyp', (t_feature,), tf.float32,
                       initializer=b_init,
                       collections=far.HYPERPARAMETERS_COLLECTIONS,
                       trainable=False)
  fe_emb_hyp = tf.tensordot(tf.one_hot(x, t_feature), fe_hyp, axes=1)
  fe_emb_hyp = tf.reduce_sum(tf.reduce_prod(fe_emb_hyp, axis=1), axis=1)
  fb_emb_hyp = tf.tensordot(tf.one_hot(x, t_feature), fb_hyp, axes=1)
  fb_emb_hyp = tf.reduce_sum(fb_emb_hyp, axis=1)
  out_hyp = tf.add_n([fe_emb_hyp, fb_emb_hyp])
  print('Initial model weights (hyperparameters)')
  [print(e) for e in far.utils.hyperparameters()];

weights = far.get_hyperparameter('ex_weights', tf.zeros(datasets.train.num_examples))

with tf.name_scope('errors'):
  # tr_loss = tf.reduce_mean(tf.sigmoid(weights) * tf.losses.mean_squared_error(y, out))
  # val_loss = tf.reduce_mean(tf.losses.mean_squared_error(y, out))
  tr_loss = 0.5 * tf.reduce_sum(tf.sigmoid(weights) * tf.square(y - out))
  tr_loss += 0.01 * tf.reduce_sum(tf.square(fe))
  tr_loss += 0.01 * tf.reduce_sum(tf.square(fb))
  val_loss = 0.5 * tf.reduce_sum(tf.square(y - out))
  val_loss += 0.01 * tf.reduce_sum(tf.square(weights))
accuracy = tf.keras.metrics.mean_squared_error(y, tf.clip_by_value(out, 1.0, 5.0))

lr = far.get_hyperparameter('lr', 0.01)
# lr = tf.constant(0.01, name='lr')
io_optim = far.GradientDescentOptimizer(lr)  # for training error minimization an optimizer from far_ho is needed
oo_optim = tf.train.AdamOptimizer()  # for outer objective optimizer all optimizers from tf are valid
Example #12
0
def build(metasets,
          hyper_model_builder,
          learn_lr,
          lr0,
          MBS,
          mlr0,
          mlr_decay,
          batch_norm_before_classifier,
          weights_initializer,
          process_fn=None):
    exs = [em.SLExperiment(metasets) for _ in range(MBS)]

    hyper_repr_model = hyper_model_builder(exs[0].x, 'HyperRepr')

    if learn_lr:
        lr = far.get_hyperparameter('lr', lr0)
    else:
        lr = tf.constant(lr0, name='lr')

    gs = tf.get_variable('global_step', initializer=0, trainable=False)
    meta_lr = tf.train.inverse_time_decay(mlr0,
                                          gs,
                                          decay_steps=1.,
                                          decay_rate=mlr_decay)

    io_opt = far.GradientDescentOptimizer(lr)
    oo_opt = tf.train.AdamOptimizer(meta_lr)
    far_ho = far.HyperOptimizer()

    for k, ex in enumerate(exs):
        # print(k)  # DEBUG
        with tf.device(available_devices[k % len(available_devices)]):
            repr_out = hyper_repr_model.for_input(ex.x).out

            other_train_vars = []
            if batch_norm_before_classifier:
                batch_mean, batch_var = tf.nn.moments(repr_out, [0])
                scale = tf.Variable(tf.ones_like(repr_out[0]))
                beta = tf.Variable(tf.zeros_like(repr_out[0]))
                other_train_vars.append(scale)
                other_train_vars.append(beta)
                repr_out = tf.nn.batch_normalization(repr_out, batch_mean,
                                                     batch_var, beta, scale,
                                                     1e-3)

            ex.model = em.models.FeedForwardNet(
                repr_out,
                metasets.train.dim_target,
                output_weight_initializer=weights_initializer,
                name='Classifier_%s' % k)

            ex.errors['training'] = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=ex.y,
                                                        logits=ex.model.out))
            ex.errors['validation'] = ex.errors['training']
            ex.scores['accuracy'] = tf.reduce_mean(tf.cast(
                tf.equal(tf.argmax(ex.y, 1), tf.argmax(ex.model.out, 1)),
                tf.float32),
                                                   name='accuracy')

            # simple training step used for testing (look
            ex.optimizers['ts'] = tf.train.GradientDescentOptimizer(
                lr).minimize(ex.errors['training'], var_list=ex.model.var_list)

            optim_dict = far_ho.inner_problem(ex.errors['training'],
                                              io_opt,
                                              var_list=ex.model.var_list +
                                              other_train_vars)
            far_ho.outer_problem(ex.errors['validation'],
                                 optim_dict,
                                 oo_opt,
                                 hyper_list=tf.get_collection(
                                     far.GraphKeys.HYPERPARAMETERS),
                                 global_step=gs)

    far_ho.finalize(process_fn=process_fn)
    saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES),
                           max_to_keep=240)
    return exs, far_ho, saver