def configure_optimizers(self): opt_g = RAdam(self.generator.parameters(), lr=self.hparams.lr_gen) opt_d = RAdam(self.discriminator.parameters(), lr=self.hparams.lr_dis) schedulers = [] if self.hparams.lr_scheduler_G: lr_scheduler_G = getattr(torch.optim.lr_scheduler, self.hparams.lr_scheduler_G)(opt_g) schedulers.append({ "scheduler": lr_scheduler_G, 'monitor': 'val_loss', 'frequency': 1 }) else: schedulers.append(None) if self.hparams.lr_scheduler_D: lr_scheduler_D = getattr(torch.optim.lr_scheduler, self.hparams.lr_scheduler_D)(opt_d) schedulers.append({ "scheduler": lr_scheduler_D, 'monitor': 'val_loss', 'frequency': 1 }) else: schedulers.append(None) self.gsteps = self.hparams.g_steps self.dsteps = 0 return [opt_g, opt_d], schedulers
def get_optimizers(conf, model): nets = ['encoder', 'decoder'] # if conf.model.arch.use_attention: # nets += ['attention'] optimizers = [] for net in nets: parameters = getattr(model.module, net).parameters() if conf.train.optim == "Adam": optimizer = torch.optim.Adam(parameters, conf.train.lr, betas=(conf.train.beta1, conf.train.beta2), eps=conf.train.eps, weight_decay=conf.train.weight_decay) elif conf.train.optim == "RAdam": optimizer = RAdam(parameters, conf.train.lr, betas=(conf.train.beta1, conf.train.beta2), eps=1e-8, weight_decay=conf.train.weight_decay) else: raise NotImplementedError optimizers.append((net, optimizer)) return optimizers
def configure_optimizers(self): opt_g = RAdam(self.generator.parameters(), lr=self.cfg.lr_pretrain) if self.cfg.lr_scheduler_pretrain: lr_scheduler_pretrain = [ getattr(torch.optim.lr_scheduler, self.cfg.lr_scheduler_pretrain)(opt_g) ] else: lr_scheduler_pretrain = [] return [opt_g], lr_scheduler_pretrain
def get_optimizer(model, args): parameters = [] for name, param in model.named_parameters(): # bias weight_decay zero # bias_list = (param for name, param in model.named_parameters() if name[-4:] == 'bias') # others_list = (param for name, param in model.named_parameters() if name[-4:] != 'bias') # parameters = [{'parameters': bias_list, 'weight_decay': 0}, # {'parameters': others_list}] # trick => custom layer params learning_rate to enlarge if 'fc' in name or 'class' in name or 'last_linear' in name or 'ca' in name or 'sa' in name: # trick => bias params do not use weight_decay if name[-4:] == 'bias': parameters.append({'params': param, 'lr': args.lr * args.lr_times, 'weight_decay': 0.0}) else: parameters.append({'params': param, 'lr': args.lr * args.lr_times}) else: if name[:-4] == 'bias': parameters.append({'params': param, 'lr': args.lr, 'weight_decay':0.0}) else: parameters.append({'params': param, 'lr': args.lr}) if args.optimizer == 'sgd': return torch.optim.SGD(parameters, # model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop(parameters, # model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam(parameters, # model.parameters(), args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'AdaBound': return adabound.AdaBound(parameters, # model.parameters(), lr=args.lr, final_lr=args.final_lr) elif args.optimizer == 'radam': return RAdam(parameters, lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) else: raise NotImplementedError
def optimizer_choice(args, network): "selects the optimizer" if args.optimizer == "adam": optimizer = optim.Adam(network.parameters(), lr=args.learning_rate, weight_decay=args.weightdecay) elif args.optimizer == "sgd": optimizer = optim.SGD(network.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weightdecay) elif args.optimizer == "radam": optimizer = RAdam(network.parameters(), lr=args.learning_rate) elif args.optimizer == "nesterov": optimizer = optim.SGD(network.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weightdecay, nesterov=True) else: raise Exception("Unknown optimizer: {}".format(args.optimizer)) return optimizer
def get_optimizer(model, args): parameters = [] for name, param in model.named_parameters(): if 'fc' in name or 'class' in name or 'last_linear' in name or 'ca' in name or 'sa' in name: parameters.append({ 'params': param, 'lr': args.lr * args.lr_fc_times }) else: parameters.append({'params': param, 'lr': args.lr}) if args.optimizer == 'sgd': return torch.optim.SGD( parameters, # model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop( parameters, # model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam( parameters, # model.parameters(), args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'AdaBound': return adabound.AdaBound( parameters, # model.parameters(), lr=args.lr, final_lr=args.final_lr) elif args.optimizer == 'radam': return RAdam(parameters, lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) else: raise NotImplementedError
def get_optimizer(model, args): if args.optimizer == 'sgd': return torch.optim.SGD( model.parameters(), # model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop( model.parameters(), # model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam( model.parameters(), # model.parameters(), args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'AdaBound': return adabound.AdaBound( model.parameters(), # model.parameters(), lr=args.lr, final_lr=args.final_lr) elif args.optimizer == 'radam': return RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) else: raise NotImplementedError
def get_optimizer(net, opt): if opt.no_bias_decay: weight_params = [] bias_params = [] for n, p in net.named_parameters(): if 'bias' in n: bias_params.append(p) else: weight_params.append(p) parameters = [{ 'params': bias_params, 'weight_decay': 0 }, { 'params': weight_params }] else: parameters = net.parameters() if opt.optim.lower() == 'rmsprop': optimizer = optim.RMSprop(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.wd) elif opt.optim.lower() == 'sgd': optimizer = optim.SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.wd) elif opt.optim.lower() == 'adam': optimizer = optim.Adam(parameters, lr=opt.lr) elif opt.optim.lower() == 'adamw': optimizer = optim.AdamW(parameters, lr=opt.lr, weight_decay=opt.wd) elif opt.optim.lower() == 'radam': from utils.radam import RAdam optimizer = RAdam(parameters, lr=opt.lr, weight_decay=opt.wd) return optimizer