def adjust_learning_rate(optimizer, epoch, step_in_epoch, total_steps_in_epoch): lr = args.lr epoch = epoch + step_in_epoch / total_steps_in_epoch # LR warm-up to handle large minibatch sizes from https://arxiv.org/abs/1706.02677 lr = ramps.linear_rampup( epoch, args.lr_rampup) * (args.lr - args.initial_lr) + args.initial_lr if args.lr_rampdown_epochs: if epoch < args.epochs: # Cosine LR rampdown from https://arxiv.org/abs/1608.03983 assert args.lr_rampdown_epochs >= args.epochs lr *= ramps.cosine_rampdown(epoch, args.lr_rampdown_epochs) elif epoch >= args.epochs: if args.constant_lr: constant_lr = ramps.cosine_rampdown(args.constant_lr_epoch, args.lr_rampdown_epochs) lr *= constant_lr else: lr_rampdown_epochs = args.lr_rampdown_epochs if args.cycle_rampdown_epochs == 0 else args.cycle_rampdown_epochs lr *= ramps.cosine_rampdown( (lr_rampdown_epochs - (args.lr_rampdown_epochs - args.epochs) - args.cycle_interval) + ((epoch - args.epochs) % args.cycle_interval), lr_rampdown_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr
def adjust_learning_rate(self, optimizer, epoch, step_in_epoch, total_steps_in_epoch): """ Adjust the learning rate :param optimizer: :param epoch: :param step_in_epoch: :param total_steps_in_epoch: :return: """ args = self.args lr = self.args.lr epoch = epoch + step_in_epoch / total_steps_in_epoch # LR warm-up to handle large minibatch sizes from https://arxiv.org/abs/1706.02677 """ With these simple techniques, our Caffe2- based system trains ResNet-50 with a minibatch size of 8192 on 256 GPUs in one hour, while matching small minibatch accuracy. Using commodity hardware, our implementation achieves ∼90% scaling efficiency when moving from 8 to 256 GPUs. Our findings enable training visual recognition models on internet-scale data with high efficiency """ lr = ramps.linear_rampup(epoch, self.args.lr_rampup) * ( self.args.lr - self.args.initial_lr) + self.args.initial_lr # Cosine LR rampdown from https://arxiv.org/abs/1608.03983 (but one cycle only) if self.args.lr_rampdown_epochs: assert self.args.lr_rampdown_epochs >= self.args.epochs lr *= ramps.cosine_rampdown(epoch, self.args.lr_rampdown_epochs) #logger.warning("Learning rate: " + str(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr
def adjust_learning_rate(optimizer, epoch, step_in_epoch, total_steps_in_epoch): lr = args.lr epoch = epoch + step_in_epoch / total_steps_in_epoch # LR warm-up to handle large minibatch sizes from https://arxiv.org/abs/1706.02677 lr = ramps.linear_rampup(epoch, args.lr_rampup) * (args.lr - args.initial_lr) + args.initial_lr # Cosine LR rampdown from https://arxiv.org/abs/1608.03983 (but one cycle only) if args.lr_rampdown_epochs: assert args.lr_rampdown_epochs >= args.epochs lr *= ramps.cosine_rampdown(epoch, args.lr_rampdown_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr
def adjust_learning_rate( optimizer, epoch, # modified for fastSWA step_in_epoch, total_steps_in_epoch): lr = args.lr # max lr ( initial lr ) epoch = epoch + step_in_epoch / total_steps_in_epoch # LR warm-up to handle large minibatch sizes from # https://arxiv.org/abs/1706.02677 # lr = ramps.linear_rampup(epoch, args.lr_rampup) * (args.lr - args.initial_lr) + args.initial_lr # no rampup when doing fastSWA # Cosine LR rampdown from # https://arxiv.org/abs/1608.03983 (but one cycle only) if args.cycle_rampdown_epochs: assert args.cycle_rampdown_epochs >= args.epochs if epoch <= args.epochs: lr *= ramps.cosine_rampdown(epoch, args.cycle_rampdown_epochs) else: epoch_ = (args.epochs - args.cycle_interval) + ( (epoch - args.epochs) % args.cycle_interval) lr *= ramps.cosine_rampdown(epoch_, args.cycle_rampdown_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr
def adjust_learning_rate(epoch, step_in_epoch, total_steps_in_epoch): epoch = epoch + step_in_epoch / total_steps_in_epoch if args.swa and epoch >= args.swa_start: lr = args.swa_lr else: # LR warm-up to handle large minibatch sizes from https://arxiv.org/abs/1706.02677 lr = ramps.linear_rampup(epoch, args.lr_rampup) * ( args.lr - args.initial_lr) + args.initial_lr # Cosine LR rampdown from https://arxiv.org/abs/1608.03983 (but one cycle only) if args.lr_rampdown_epochs: assert args.lr_rampdown_epochs >= args.epochs lr *= ramps.cosine_rampdown(epoch, args.lr_rampdown_epochs) return lr