def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) decrease lr for every mini-batch and start with warmup. """ global_step = _decay_step_counter() lr = fluid.layers.tensor.create_global_var(shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") warmup_epoch = fluid.layers.fill_constant(shape=[1], dtype='float32', value=float(5), force_cpu=True) with init_on_cpu(): epoch = ops.floor(global_step / step_each_epoch) with fluid.layers.control_flow.Switch() as switch: with switch.case(epoch < warmup_epoch): decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) fluid.layers.tensor.assign(input=decayed_lr, output=lr) with switch.default(): decayed_lr = learning_rate * \ (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 fluid.layers.tensor.assign(input=decayed_lr, output=lr) return lr
def cosine_decay(lr, step_each_epoch, epochs): global_step = _decay_step_counter() with init_on_cpu(): epoch = fluid.layers.floor(global_step / step_each_epoch) decayed_lr = lr * (fluid.layers.cos(epoch * (math.pi / epochs)) + 1) / 2 return decayed_lr
def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs, decay_rate=0.97, warm_up_epoch=5.0): """Applies exponential decay to the learning rate. """ global_step = _decay_step_counter() lr = fluid.layers.tensor.create_global_var(shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") warmup_epoch = fluid.layers.fill_constant(shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True) with init_on_cpu(): epoch = ops.floor(global_step / step_each_epoch) with fluid.layers.control_flow.Switch() as switch: with switch.case(epoch < warmup_epoch): decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) fluid.layers.assign(input=decayed_lr, output=lr) with switch.default(): div_res = (global_step - warmup_epoch * step_each_epoch) / decay_epochs div_res = ops.floor(div_res) decayed_lr = learning_rate * (decay_rate**div_res) fluid.layers.assign(input=decayed_lr, output=lr) return lr
def cosine_decay(): """ Applies cosine decay to the learning rate. """ global_step = _decay_step_counter() with init_on_cpu(): frac = (1 + ops.cos(global_step / max_step * math.pi)) / 2 return FLAGS.lr_min + (FLAGS.lr_max - FLAGS.lr_min) * frac
def cosine_decay(learning_rate, step_each_epoch, epochs = 120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) """ global_step = _decay_step_counter() with init_on_cpu(): epoch = fluid.layers.floor(global_step / step_each_epoch) lr = learning_rate / 2. decayed_lr = lr * (fluid.layers.cos(epoch * (math.pi / epochs)) + 1) return decayed_lr
def cosine_decay(learning_rate, num_epoch, steps_one_epoch): """Applies cosine decay to the learning rate. lr = 0.5 * (math.cos(epoch * (math.pi / 120)) + 1) """ global_step = _decay_step_counter() with init_on_cpu(): decayed_lr = learning_rate * \ (ops.cos((global_step / steps_one_epoch) \ * math.pi / num_epoch) + 1)/2 return decayed_lr
def cosine_decay_v2(learning_rate, totalsteps): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(global_step * (math.pi / totalsteps)) + 1) decrease lr for every mini-batch. """ global_step = _decay_step_counter() with init_on_cpu(): decayed_lr = learning_rate * \ (ops.cos(global_step * (math.pi / float(totalsteps))) + 1)/2 return decayed_lr
def cosine_decay(learning_rate, step_each_epoch, epochs = 120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) """ global_step = _decay_step_counter() with init_on_cpu(): # update epoch = ops.floor(global_step / step_each_epoch) decayed_lr = learning_rate * \ (ops.cos(epoch * (math.pi / epochs)) + 1)/2 #if global_step % step_each_epoch == 0: # print("epoch={0}, global_step={1},decayed_lr={2} \ # (step_each_epoch={3})".format( \ # epoch,global_step,decayed_lr,step_each_epoch)) return decayed_lr
def cosine_decay_v2_with_warmup(learning_rate, warmupsteps, totalsteps): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) decrease lr for every mini-batch and start with warmup. """ global_step = _decay_step_counter() lr = fluid.layers.tensor.create_global_var(shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") with init_on_cpu(): with control_flow.Switch() as switch: with switch.case(global_step < warmupsteps): decayed_lr = learning_rate * (global_step / float(warmupsteps)) fluid.layers.tensor.assign(input=decayed_lr, output=lr) with switch.default(): decayed_lr = learning_rate * \ (ops.cos((global_step - warmupsteps) * (math.pi / (totalsteps))) + 1)/2 fluid.layers.tensor.assign(input=decayed_lr, output=lr) return lr
def poly_decay(): global_step = _decay_step_counter() with init_on_cpu(): decayed_lr = LEARNING_RATE * (fluid.layers.pow( (1 - global_step / TOTAL_STEP), POWER)) return decayed_lr