Ejemplo n.º 1
0
def test_zaremba():
    from baseline.tf import optz
    tf.reset_default_graph()
    sess = tf.Session()

    lr_sched = create_lr_scheduler(**ZAREMBA_LR_CONFIG)
    bl_zaremba = ZarembaDecayScheduler(**ZAREMBA_LR_CONFIG)
    lr_var = tf.placeholder(tf.float32, shape=(), name='lr')
    step_var = tf.placeholder(tf.int32, shape=(), name='step')

    gph = lr_sched(lr_var, step_var)
    sess.run(tf.global_variables_initializer())

    lrs = []
    lrs_bl = []
    expect_lrs = []
    current_lr = INIT_LR
    for step in range(NUM_STEPS):
        lr = sess.run(gph, feed_dict={lr_var: INIT_LR, step_var: step})
        lr_bl = bl_zaremba(step)
        lrs += [lr]
        lrs_bl += [lr_bl]
        if step in BOUNDS:
            b = BOUNDS.index(step)
            current_lr = ZAREMBA_DECAY_VALUES[b]
        expect_lrs += [current_lr]
    np.allclose(expect_lrs, lrs)
    np.allclose(expect_lrs, lrs_bl)
Ejemplo n.º 2
0
def test_exp():
    from baseline.tf import optz
    tf.reset_default_graph()
    sess = tf.Session()

    lr_sched = create_lr_scheduler(**EXP_LR_CONFIG)
    bl_exp = ExponentialDecayScheduler(**EXP_LR_CONFIG)
    decay_rate = EXP_LR_CONFIG['decay_rate']

    lr_var = tf.placeholder(tf.float32, shape=(), name='lr')
    step_var = tf.placeholder(tf.int32, shape=(), name='step')

    gph = lr_sched(lr_var, step_var)
    sess.run(tf.global_variables_initializer())

    lrs = []
    lrs_bl = []
    for step in range(NUM_STEPS):
        lr = sess.run(gph, feed_dict={lr_var: INIT_LR, step_var: step})
        lrs += [lr]
        lr_bl = bl_exp(step)
        lrs_bl += [lr_bl]
    inv_times = [(INIT_LR * decay_rate ** (t/100.)) for t in range(NUM_STEPS)]
    assert np.allclose(inv_times, lrs)
    assert np.allclose(inv_times, lrs_bl)
Ejemplo n.º 3
0
 def __init__(self, model, global_step=0, **kwargs):
     self.global_step = global_step
     if 'lr_function' in kwargs:
         self.lr_function = kwargs['lr_function']
     else:
         if 'lr_scheduler_type' not in kwargs:
             kwargs['lr_scheduler_type'] = 'default'
         self.lr_function = create_lr_scheduler(**kwargs)
     self._init_optimizer(model, **kwargs)
Ejemplo n.º 4
0
def optimizer(loss_fn, **kwargs):

    #global_step = tf.Variable(0, trainable=False)
    global_step = tf.train.get_or_create_global_step()
    clip = kwargs.get('clip', None)
    optim = kwargs.get('optim', 'sgd')
    eta = kwargs.get('lr', kwargs.get('eta', 0.01))
    lr_scheduler = create_lr_scheduler(**kwargs)
    decay_fn = None
    colocate_gradients_with_ops = bool(
        kwargs.get('colocate_gradients_with_ops', False))
    sgd_mom = float(kwargs.get('mom', 0.9))
    if optim == 'adadelta':
        rho = float(kwargs.get('rho', 0.95))
        eps = float(kwargs.get('epsilon', 1e-6))
        logger.info('adadelta(eta=%f, rho=%f, epsilon=%f)', eta, rho, eps)
        optz = lambda lr: tf.train.AdadeltaOptimizer(lr, rho, eps)
    elif optim == 'adam':
        beta1 = float(kwargs.get('beta1', 0.9))
        beta2 = float(kwargs.get('beta2', 0.999))
        eps = float(kwargs.get('epsilon', 1e-8))
        logger.info('adam(eta=%f beta1=%f, beta2=%f, eps=%f)', eta, beta1,
                    beta2, eps)
        optz = lambda lr: tf.train.AdamOptimizer(lr, beta1, beta2, eps)
    elif optim == 'adamw':
        wd = float(kwargs.get('weight_decay', 0))
        beta1 = float(kwargs.get('beta1', 0.9))
        beta2 = float(kwargs.get('beta2', 0.999))
        eps = float(kwargs.get('epsilon', 1e-8))
        logger.info('adamw(eta=%f beta1=%f, beta2=%f, eps=%f)', eta, beta1,
                    beta2, eps)
        optz = lambda lr: AdamWOptimizer(lr, wd, beta1, beta2, eps)
    elif optim == 'rmsprop':
        # Get mom again with difference default
        mom = float(kwargs.get('mom', 0.0))
        logger.info('rmsprop(eta=%f, mom=%f)', eta, mom)
        optz = lambda lr: tf.train.RMSPropOptimizer(lr, momentum=mom)
    elif sgd_mom > 0:
        logger.info('sgd-mom(eta=%f, mom=%f)', eta, sgd_mom)
        optz = lambda lr: tf.train.MomentumOptimizer(lr, sgd_mom)
    else:
        logger.info('sgd(eta=%f)', eta)
        optz = lambda lr: tf.train.GradientDescentOptimizer(lr)

    logger.info('clip gradients at %s', clip)
    return global_step, tf.contrib.layers.optimize_loss(
        loss_fn,
        global_step,
        eta,
        optz,
        colocate_gradients_with_ops=colocate_gradients_with_ops,
        clip_gradients=clip,
        learning_rate_decay_fn=lr_scheduler,
        increment_global_step=True)
Ejemplo n.º 5
0
def optimizer(loss_fn, **kwargs):

    #global_step = tf.Variable(0, trainable=False)
    global_step = tf.train.get_or_create_global_step()
    clip = kwargs.get('clip', None)
    optim = kwargs.get('optim', 'sgd')
    eta = kwargs.get('lr', kwargs.get('eta', 0.01))
    lr_scheduler = create_lr_scheduler(**kwargs)
    decay_fn = None
    colocate_gradients_with_ops = bool(
        kwargs.get('colocate_gradients_with_ops', False))
    sgd_mom = float(kwargs.get('mom', 0.9))
    if optim == 'adadelta':
        #print('adadelta', eta)
        optz = lambda lr: tf.train.AdadeltaOptimizer(lr, 0.95, 1e-6)
    elif optim == 'adam':
        #print('adam', eta)
        optz = lambda lr: tf.train.AdamOptimizer(lr, kwargs.get('beta1', 0.9),
                                                 kwargs.get('beta2', 0.999),
                                                 kwargs.get('epsilon', 1e-8))
    elif optim == 'adamw':
        wd = float(kwargs.get('weight_decay', 0))
        optz = lambda lr: AdamWOptimizer(lr, wd, kwargs.get('beta1', 0.9),
                                         kwargs.get('beta2', 0.999),
                                         kwargs.get('epsilon', 1e-8))
    elif optim == 'rmsprop':
        #print('rmsprop', eta)
        optz = lambda lr: tf.train.RMSPropOptimizer(
            lr, momentum=float(kwargs.get('mom', 0.0)))
    elif sgd_mom > 0:
        #print('sgd-mom', eta, sgd_mom)
        optz = lambda lr: tf.train.MomentumOptimizer(lr, sgd_mom)
    else:
        #print('sgd')
        optz = lambda lr: tf.train.GradientDescentOptimizer(lr)

    #print('clip', clip)
    #print('decay', decay_fn)
    return global_step, tf.contrib.layers.optimize_loss(
        loss_fn,
        global_step,
        eta,
        optz,
        colocate_gradients_with_ops=colocate_gradients_with_ops,
        clip_gradients=clip,
        learning_rate_decay_fn=lr_scheduler,
        increment_global_step=True)
Ejemplo n.º 6
0
def optimizer(loss_fn, **kwargs):

    #global_step = tf.Variable(0, trainable=False)
    global_step = tf.train.get_or_create_global_step()
    clip = kwargs.get('clip', None)
    optim = kwargs.get('optim', 'sgd')
    eta = kwargs.get('lr', kwargs.get('eta', 0.01))
    lr_scheduler = create_lr_scheduler(**kwargs)
    decay_fn = None
    colocate_gradients_with_ops = bool(kwargs.get('colocate_gradients_with_ops', False))
    sgd_mom = float(kwargs.get('mom', 0.9))
    if optim == 'adadelta':
        rho = float(kwargs.get('rho', 0.95))
        eps = float(kwargs.get('epsilon', 1e-6))
        logger.info('adadelta(eta=%f, rho=%f, epsilon=%f)', eta, rho, eps)
        optz = lambda lr: tf.train.AdadeltaOptimizer(lr, rho, eps)
    elif optim == 'adam':
        beta1 = float(kwargs.get('beta1', 0.9))
        beta2 = float(kwargs.get('beta2', 0.999))
        eps = float(kwargs.get('epsilon', 1e-8))
        logger.info('adam(eta=%f beta1=%f, beta2=%f, eps=%f)', eta, beta1, beta2, eps)
        optz = lambda lr: tf.train.AdamOptimizer(lr, beta1, beta2, eps)
    elif optim == 'adamw':
        wd = float(kwargs.get('weight_decay', 0))
        beta1 = float(kwargs.get('beta1', 0.9))
        beta2 = float(kwargs.get('beta2', 0.999))
        eps = float(kwargs.get('epsilon', 1e-8))
        logger.info('adamw(eta=%f beta1=%f, beta2=%f, eps=%f)', eta, beta1, beta2, eps)
        optz = lambda lr: AdamWOptimizer(lr, wd, beta1, beta2, eps)
    elif optim == 'rmsprop':
        # Get mom again with difference default
        mom = float(kwargs.get('mom', 0.0))
        logger.info('rmsprop(eta=%f, mom=%f)', eta, mom)
        optz = lambda lr: tf.train.RMSPropOptimizer(lr, momentum=mom)
    elif sgd_mom > 0:
        logger.info('sgd-mom(eta=%f, mom=%f)', eta, sgd_mom)
        optz = lambda lr: tf.train.MomentumOptimizer(lr, sgd_mom)
    else:
        logger.info('sgd(eta=%f)', eta)
        optz = lambda lr: tf.train.GradientDescentOptimizer(lr)

    logger.info('clip gradients at %s', clip)
    return global_step, tf.contrib.layers.optimize_loss(loss_fn, global_step, eta, optz,
                                                        colocate_gradients_with_ops=colocate_gradients_with_ops,
                                                        clip_gradients=clip, learning_rate_decay_fn=lr_scheduler,
                                                        increment_global_step=True)
Ejemplo n.º 7
0
def test_cyclic():
    from baseline.tf import optz
    tf.reset_default_graph()
    sess = tf.Session()

    lr_sched = create_lr_scheduler(**CYCLIC_LR_CONFIG)
    bl_const = CyclicLRScheduler(**CYCLIC_LR_CONFIG)

    lr_var = tf.placeholder(tf.float32, shape=(), name='lr')
    step_var = tf.placeholder(tf.int32, shape=(), name='step')

    gph = lr_sched(lr_var, step_var)
    sess.run(tf.global_variables_initializer())

    for step in range(NUM_STEPS):
        lr = sess.run(gph, feed_dict={lr_var: INIT_LR, step_var: step})
        lr_bl = bl_const(step)
        assert np.isclose(lr, lr_bl)
Ejemplo n.º 8
0
def test_constant():
    from baseline.tf import optz
    tf.reset_default_graph()
    sess = tf.Session()

    lr_sched = create_lr_scheduler(lr=INIT_LR, lr_scheduler_type='default')
    bl_const = ConstantScheduler(lr=INIT_LR)

    lr_var = tf.placeholder(tf.float32, shape=(), name='lr')
    step_var = tf.placeholder(tf.int32, shape=(), name='step')

    gph = lr_sched(lr_var, step_var)
    sess.run(tf.global_variables_initializer())

    for step in range(NUM_STEPS):
        lr = sess.run(gph, feed_dict={lr_var: INIT_LR, step_var: step})
        assert np.isclose(INIT_LR, lr)
        assert np.isclose(INIT_LR, bl_const(step))
Ejemplo n.º 9
0
def test_linear_warmup():
    from baseline.tf import optz
    tf.reset_default_graph()
    sess = tf.Session()

    lr_sched = create_lr_scheduler(**LINEAR_WARMUP_LR_CONFIG)
    warmup_steps = LINEAR_WARMUP_LR_CONFIG['warmup_steps']

    lr_var = tf.placeholder(tf.float32, shape=(), name='lr')
    step_var = tf.placeholder(tf.int32, shape=(), name='step')

    gph = lr_sched(lr_var, step_var)
    sess.run(tf.global_variables_initializer())

    lrs = []
    for step in range(NUM_STEPS):
        lr = sess.run(gph, feed_dict={lr_var: INIT_LR, step_var: step})
        lrs += [lr]

    expected_lrs = [INIT_LR*min(1.0, step / warmup_steps) for step in range(NUM_STEPS)]
    assert np.allclose(expected_lrs, lrs)
Ejemplo n.º 10
0
def test_composite_warmup():
    from baseline.tf import optz
    tf.reset_default_graph()
    warmup_steps = COMPOSITE_LR_CONFIG['warmup_steps']
    decay_rate = EXP_LR_CONFIG['decay_rate']
    with tf.Session() as sess:
        lr_sched = create_lr_scheduler(**COMPOSITE_LR_CONFIG)
        lr_var = tf.placeholder(tf.float32, name='lr')
        step_var = tf.placeholder(tf.int32, name='step')

        out = lr_sched(lr_var, step_var)
        sess.run(tf.global_variables_initializer())

        lrs = [sess.run(out, {lr_var: INIT_LR, step_var: step}) for step in range(NUM_STEPS)]

        warmup_expected = [INIT_LR * min(1.0, step / warmup_steps) for step in range(NUM_STEPS)]
        exp_expected = [(INIT_LR * decay_rate ** (t/100.)) for t in range(NUM_STEPS)]

    for step in range(NUM_STEPS):
        if step < warmup_steps:
            assert np.allclose(lrs[step], warmup_expected[step])
        else:
            assert np.allclose(lrs[step], exp_expected[step - warmup_steps])
Ejemplo n.º 11
0
def test_composite_error():
    pytest.importorskip('torch')
    from baseline.pytorch.optz import CompositeLRSchedulerPyTorch
    with pytest.raises(AssertionError):
        _ = create_lr_scheduler(**{"lr_scheduler_type": ["exponential", "zaremba"]})
Ejemplo n.º 12
0
 def __init__(self, model, global_step=0, **kwargs):
     self.global_step = global_step
     if 'lr_scheduler_type' not in kwargs:
         kwargs['lr_scheduler_type'] = 'default'
     self.lr_function = create_lr_scheduler(**kwargs)
     self._init_optimizer(model, **kwargs)