def configure_optimizers(self): opt1 = torch.optim.Adam(self.discriminator.parameters(), lr=1e-3, eps=1e-07, amsgrad=True) opt2 = torch.optim.Adam(self.generator.parameters(), lr=1e-3, eps=1e-07, amsgrad=True) num_procs = self._trainer.num_gpus * self._trainer.num_nodes num_samples = len(self._train_dl.dataset) batch_size = self._train_dl.batch_size iter_per_epoch = np.ceil(num_samples / (num_procs * batch_size)) max_steps = iter_per_epoch * self._trainer.max_epochs logging.info(f"MAX STEPS: {max_steps}") sch1 = CosineAnnealing( opt1, max_steps=max_steps, min_lr=1e-5, warmup_steps=np.ceil(0.2 * max_steps)) # Use warmup to delay start sch1_dict = { 'scheduler': sch1, 'interval': 'step', } sch2 = CosineAnnealing(opt2, max_steps=max_steps, min_lr=1e-5) sch2_dict = { 'scheduler': sch2, 'interval': 'step', } return [opt1, opt2], [sch1_dict, sch2_dict]
def configure_optimizers(self): self.optim_g = instantiate( self._cfg.optim, params=self.generator.parameters(), ) self.optim_d = instantiate( self._cfg.optim, params=itertools.chain(self.msd.parameters(), self.mpd.parameters()), ) max_steps = self._cfg.max_steps warmup_steps = 0 if self.finetune else np.ceil(0.2 * max_steps) self.scheduler_g = CosineAnnealing( self.optim_g, max_steps=max_steps, min_lr=1e-5, warmup_steps=warmup_steps) # Use warmup to delay start sch1_dict = { 'scheduler': self.scheduler_g, 'interval': 'step', } self.scheduler_d = CosineAnnealing(self.optim_d, max_steps=max_steps, min_lr=1e-5) sch2_dict = { 'scheduler': self.scheduler_d, 'interval': 'step', } return [self.optim_g, self.optim_d], [sch1_dict, sch2_dict]
def configure_optimizers(self): self.optim_g = instantiate( self._cfg.optim, params=self.generator.parameters(), ) self.optim_d = instantiate( self._cfg.optim, params=itertools.chain(self.msd.parameters(), self.mpd.parameters()), ) self.scheduler_g = CosineAnnealing( optimizer=self.optim_g, max_steps=self._cfg.max_steps, min_lr=self._cfg.sched.min_lr, warmup_steps=self._cfg.sched.warmup_ratio * self._cfg.max_steps, ) # Use warmup to delay start sch1_dict = { 'scheduler': self.scheduler_g, 'interval': 'step', } self.scheduler_d = CosineAnnealing( optimizer=self.optim_d, max_steps=self._cfg.max_steps, min_lr=self._cfg.sched.min_lr, ) sch2_dict = { 'scheduler': self.scheduler_d, 'interval': 'step', } return [self.optim_g, self.optim_d], [sch1_dict, sch2_dict]
def configure_optimizers(self): optim_config = self._cfg.optim.copy() OmegaConf.set_struct(optim_config, False) sched_config = optim_config.pop("sched", None) OmegaConf.set_struct(optim_config, True) optim_g = instantiate( optim_config, params=self.generator.parameters(), ) optim_d = instantiate( optim_config, params=itertools.chain(self.msd.parameters(), self.mpd.parameters()), ) # Backward compatibility if sched_config is None and 'sched' in self._cfg: sched_config = self._cfg.sched if sched_config is not None: max_steps = self._cfg.get("max_steps", None) if max_steps is None or max_steps < 0: max_steps = self._get_max_steps() warmup_steps = self._get_warmup_steps( max_steps=max_steps, warmup_steps=sched_config.get("warmup_steps", None), warmup_ratio=sched_config.get("warmup_ratio", None), ) scheduler_g = CosineAnnealing( optimizer=optim_g, max_steps=max_steps, min_lr=sched_config.min_lr, warmup_steps=warmup_steps, ) # Use warmup to delay start sch1_dict = { 'scheduler': scheduler_g, 'interval': 'step', } scheduler_d = CosineAnnealing( optimizer=optim_d, max_steps=max_steps, min_lr=sched_config.min_lr, ) sch2_dict = { 'scheduler': scheduler_d, 'interval': 'step', } return [optim_g, optim_d], [sch1_dict, sch2_dict] else: return [optim_g, optim_d]
def configure_optimizers(self): self.optim_g = instantiate( self._cfg.optim, params=self.generator.parameters(), ) self.optim_d = instantiate( self._cfg.optim, params=itertools.chain(self.msd.parameters(), self.mpd.parameters()), ) if hasattr(self._cfg, 'sched'): max_steps = self._cfg.get("max_steps", None) if max_steps is None or max_steps < 0: max_steps = self._get_max_steps() warmup_steps = self._get_warmup_steps(max_steps) self.scheduler_g = CosineAnnealing( optimizer=self.optim_g, max_steps=max_steps, min_lr=self._cfg.sched.min_lr, warmup_steps=warmup_steps, ) # Use warmup to delay start sch1_dict = { 'scheduler': self.scheduler_g, 'interval': 'step', } self.scheduler_d = CosineAnnealing( optimizer=self.optim_d, max_steps=max_steps, min_lr=self._cfg.sched.min_lr, ) sch2_dict = { 'scheduler': self.scheduler_d, 'interval': 'step', } return [self.optim_g, self.optim_d], [sch1_dict, sch2_dict] else: return [self.optim_g, self.optim_d]