def get_learning_rate_scheduler(optimizer): """Build the learning rate scheduler.""" args = get_args() # Add linear learning rate scheduler. if args.lr_decay_iters is not None: num_iters = args.lr_decay_iters else: num_iters = args.train_iters num_iters = max(1, num_iters) init_step = 0 if args.warmup_iters is not None: warmup_iter = args.warmup_iters else: warmup_iter = args.warmup * num_iters lr_scheduler = AnnealingLR( optimizer, start_lr=args.lr, warmup_iter=warmup_iter, total_iters=num_iters, decay_style=args.lr_decay_style, last_iter=init_step, min_lr=args.min_lr, use_checkpoint_lr_scheduler=args.use_checkpoint_lr_scheduler, override_lr_scheduler=args.override_lr_scheduler) return lr_scheduler
def get_learning_rate_scheduler(optimizer): """Build the learning rate scheduler.""" args = get_args() if args.deepspeed and args.onebitadam: print_rank_0("WARNING: onebitadam requires the lr scheduler be built by deepspeed - " "Make sure one is added to your deepspeed config") return None # Add linear learning rate scheduler. if args.lr_decay_iters is not None: num_iters = args.lr_decay_iters else: num_iters = args.train_iters num_iters = max(1, num_iters) init_step = 0 warmup_iter = args.warmup * num_iters lr_scheduler = AnnealingLR( optimizer, start_lr=args.lr, warmup_iter=warmup_iter, total_iters=num_iters, decay_style=args.lr_decay_style, last_iter=init_step, min_lr=args.min_lr, use_checkpoint_lr_scheduler=args.use_checkpoint_lr_scheduler, override_lr_scheduler=args.override_lr_scheduler) return lr_scheduler
def get_learning_rate_scheduler(optimizer, neox_args): """Build the learning rate scheduler.""" if neox_args.no_load_optim: # TODO: this should be configured as a separate arg return None if neox_args.deepspeed and neox_args.optimizer_type.lower( ) == "onebitadam": print_rank_0( "WARNING: onebitadam requires the lr scheduler be built by deepspeed - " "Make sure one is added to your deepspeed config") return None # Add linear learning rate scheduler. if neox_args.lr_decay_iters is not None: num_iters = neox_args.lr_decay_iters else: num_iters = neox_args.train_iters num_iters = max(1, num_iters) init_step = 0 warmup_iter = neox_args.warmup * num_iters lr_scheduler = AnnealingLR( optimizer, start_lr=neox_args.lr, warmup_iter=warmup_iter, total_iters=num_iters, decay_style=neox_args.lr_decay_style, last_iter=init_step, min_lr=neox_args.min_lr, use_checkpoint_lr_scheduler=neox_args.use_checkpoint_lr_scheduler, override_lr_scheduler=neox_args.override_lr_scheduler, ) return lr_scheduler
def get_learning_rate_scheduler(optimizer): """Build the learning rate scheduler.""" args = get_args() # Iteration-based training. if args.train_iters: if args.lr_decay_iters is None: args.lr_decay_iters = args.train_iters decay_steps = args.lr_decay_iters * args.global_batch_size if args.lr_warmup_fraction is not None: warmup_steps = args.lr_warmup_fraction * decay_steps else: warmup_steps = args.lr_warmup_iters * args.global_batch_size # Sample-based training. elif args.train_samples: # We need to set training iters for later use. Technically # we need to adjust the training samples too (due to last # batch being incomplete) but we leave it as is for now. update_train_iters(args) if args.lr_decay_samples is None: args.lr_decay_samples = args.train_samples decay_steps = args.lr_decay_samples if args.lr_warmup_fraction is not None: warmup_steps = args.lr_warmup_fraction * decay_steps else: warmup_steps = args.lr_warmup_samples else: raise Exception( 'either train-iters or train-samples should be provided.') lr_scheduler = AnnealingLR( optimizer, max_lr=args.lr, min_lr=args.min_lr, warmup_steps=warmup_steps, decay_steps=decay_steps, decay_style=args.lr_decay_style, use_checkpoint_lr_scheduler=args.use_checkpoint_lr_scheduler, override_lr_scheduler=args.override_lr_scheduler) return lr_scheduler