Example #1
0
def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
    """Applies cosine decay to the learning rate.
    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
    decrease lr for every mini-batch and start with warmup.
    """
    global_step = _decay_step_counter()
    lr = fluid.layers.tensor.create_global_var(shape=[1],
                                               value=0.0,
                                               dtype='float32',
                                               persistable=True,
                                               name="learning_rate")

    warmup_epoch = fluid.layers.fill_constant(shape=[1],
                                              dtype='float32',
                                              value=float(5),
                                              force_cpu=True)

    with init_on_cpu():
        epoch = ops.floor(global_step / step_each_epoch)
        with fluid.layers.control_flow.Switch() as switch:
            with switch.case(epoch < warmup_epoch):
                decayed_lr = learning_rate * (global_step /
                                              (step_each_epoch * warmup_epoch))
                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
            with switch.default():
                decayed_lr = learning_rate * \
                    (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
    return lr
Example #2
0
def cosine_decay(lr, step_each_epoch, epochs):
    global_step = _decay_step_counter()
    with init_on_cpu():
        epoch = fluid.layers.floor(global_step / step_each_epoch)
        decayed_lr = lr * (fluid.layers.cos(epoch *
                                            (math.pi / epochs)) + 1) / 2
    return decayed_lr
Example #3
0
def exponential_decay_with_warmup(learning_rate,
                                  step_each_epoch,
                                  decay_epochs,
                                  decay_rate=0.97,
                                  warm_up_epoch=5.0):
    """Applies exponential decay to the learning rate.
    """
    global_step = _decay_step_counter()
    lr = fluid.layers.tensor.create_global_var(shape=[1],
                                               value=0.0,
                                               dtype='float32',
                                               persistable=True,
                                               name="learning_rate")

    warmup_epoch = fluid.layers.fill_constant(shape=[1],
                                              dtype='float32',
                                              value=float(warm_up_epoch),
                                              force_cpu=True)

    with init_on_cpu():
        epoch = ops.floor(global_step / step_each_epoch)
        with fluid.layers.control_flow.Switch() as switch:
            with switch.case(epoch < warmup_epoch):
                decayed_lr = learning_rate * (global_step /
                                              (step_each_epoch * warmup_epoch))
                fluid.layers.assign(input=decayed_lr, output=lr)
            with switch.default():
                div_res = (global_step -
                           warmup_epoch * step_each_epoch) / decay_epochs
                div_res = ops.floor(div_res)
                decayed_lr = learning_rate * (decay_rate**div_res)
                fluid.layers.assign(input=decayed_lr, output=lr)

    return lr
Example #4
0
 def cosine_decay():
     """
         Applies cosine decay to the learning rate.
     """
     global_step = _decay_step_counter()
     with init_on_cpu():
         frac = (1 + ops.cos(global_step / max_step * math.pi)) / 2
     return FLAGS.lr_min + (FLAGS.lr_max - FLAGS.lr_min) * frac
Example #5
0
def cosine_decay(learning_rate, step_each_epoch, epochs = 120):
    """Applies cosine decay to the learning rate.
    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
    """
    global_step = _decay_step_counter()
    with init_on_cpu():
        epoch = fluid.layers.floor(global_step / step_each_epoch)
        lr = learning_rate / 2.
        decayed_lr = lr * (fluid.layers.cos(epoch * (math.pi / epochs)) + 1)
    return decayed_lr
Example #6
0
def cosine_decay(learning_rate, num_epoch, steps_one_epoch):
    """Applies cosine decay to the learning rate.
    lr = 0.5 * (math.cos(epoch * (math.pi / 120)) + 1)
    """
    global_step = _decay_step_counter()

    with init_on_cpu():
        decayed_lr = learning_rate * \
                 (ops.cos((global_step / steps_one_epoch) \
                 * math.pi / num_epoch) + 1)/2
    return decayed_lr
def cosine_decay_v2(learning_rate, totalsteps):
    """Applies cosine decay to the learning rate.
    lr = 0.05 * (math.cos(global_step * (math.pi / totalsteps)) + 1)
    decrease lr for every mini-batch.
    """
    global_step = _decay_step_counter()

    with init_on_cpu():
        decayed_lr = learning_rate * \
                     (ops.cos(global_step * (math.pi / float(totalsteps))) + 1)/2
    return decayed_lr
Example #8
0
def cosine_decay(learning_rate, step_each_epoch, epochs = 120):
    """Applies cosine decay to the learning rate.
    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
    """
    global_step = _decay_step_counter()

    with init_on_cpu():
        # update 
        epoch = ops.floor(global_step / step_each_epoch)
        decayed_lr = learning_rate * \
                     (ops.cos(epoch * (math.pi / epochs)) + 1)/2
    #if global_step % step_each_epoch == 0:
    #    print("epoch={0}, global_step={1},decayed_lr={2} \
    #          (step_each_epoch={3})".format( \
    #          epoch,global_step,decayed_lr,step_each_epoch))
    return decayed_lr
def cosine_decay_v2_with_warmup(learning_rate, warmupsteps, totalsteps):
    """Applies cosine decay to the learning rate.
    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
    decrease lr for every mini-batch and start with warmup.
    """
    global_step = _decay_step_counter()
    lr = fluid.layers.tensor.create_global_var(shape=[1],
                                               value=0.0,
                                               dtype='float32',
                                               persistable=True,
                                               name="learning_rate")

    with init_on_cpu():
        with control_flow.Switch() as switch:
            with switch.case(global_step < warmupsteps):
                decayed_lr = learning_rate * (global_step / float(warmupsteps))
                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
            with switch.default():
                decayed_lr = learning_rate * \
                     (ops.cos((global_step - warmupsteps) * (math.pi / (totalsteps))) + 1)/2
                fluid.layers.tensor.assign(input=decayed_lr, output=lr)
    return lr
Example #10
0
def poly_decay():
    global_step = _decay_step_counter()
    with init_on_cpu():
        decayed_lr = LEARNING_RATE * (fluid.layers.pow(
            (1 - global_step / TOTAL_STEP), POWER))
    return decayed_lr