def build_optim(args, model, checkpoint): saved_optimizer_state_dict = None if args.train_from != '': optim = checkpoint['optim'] saved_optimizer_state_dict = optim.optimizer.state_dict() else: optim = Optimizer(args.optim, args.learning_rate, args.max_grad_norm, beta1=args.beta1, beta2=args.beta2, decay_method=args.decay_method, warmup_steps=args.warmup_steps) optim.set_parameters(list(model.named_parameters())) if args.train_from != '': optim.optimizer.load_state_dict(saved_optimizer_state_dict) optim.learning_rate = args.learning_rate for param_group in optim.optimizer.param_groups: param_group['lr'] = args.learning_rate if (optim.method == 'adam') and (len(optim.optimizer.state) < 1): raise RuntimeError( "Error: loaded Adam optimizer from existing model" + " but optimizer state is empty") return optim
def build_optim_dec(args, model, checkpoint): """ Build optimizer """ if checkpoint is not None: optim = checkpoint['optims'][1] saved_optimizer_state_dict = optim.optimizer.state_dict() optim.optimizer.load_state_dict(saved_optimizer_state_dict) if args.visible_gpus != '-1': for state in optim.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if (optim.method == 'adam') and (len(optim.optimizer.state) < 1): raise RuntimeError( "Error: loaded Adam optimizer from existing model" + " but optimizer state is empty") else: optim = Optimizer( args.optim, args.lr_dec, args.max_grad_norm, beta1=args.beta1, beta2=args.beta2, decay_method='noam', warmup_steps=args.warmup_steps_dec) params = [(n, p) for n, p in list(model.named_parameters()) if not n.startswith('bert.model')] optim.set_parameters(params) return optim
def build_optim(args, model, checkpoint): """ Build optimizer """ saved_optimizer_state_dict = None if args.train_from != '': optim = checkpoint['optim'] saved_optimizer_state_dict = optim.optimizer.state_dict() else: optim = Optimizer(args.optim, args.lr, args.max_grad_norm, beta1=args.beta1, beta2=args.beta2, decay_method=args.decay_method, warmup_steps=args.warmup_steps) optim.set_parameters(list(model.named_parameters())) if args.train_from != '': optim.optimizer.load_state_dict(saved_optimizer_state_dict) if args.visible_gpus != '-1': for state in optim.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if (optim.method == 'adam') and (len(optim.optimizer.state) < 1): raise RuntimeError( "Error: loaded Adam optimizer from existing model" + " but optimizer state is empty") return optim