def configure_optimizers(self: Model): num_epoch_steps = sum( len(dataset) for dataset in distill_datasets.values()) num_train_steps = num_epoch_steps * self.hparams.max_epochs setattr(self, 'num_train_steps', num_train_steps) optimizer, scheduler = optimization.from_argparse_args( self.hparams, model=self, num_train_steps=num_train_steps, n_transformer_layers=self.transformer.config.num_hidden_layers) return [optimizer], [{'scheduler': scheduler, 'interval': 'step'}]
def configure_optimizers(self: Model): num_epoch_steps = sum( (len(dataset[datasets.Split.TRAIN]) + self.hparams.batch_size - 1) // self.hparams.batch_size for dataset in multi_dataset.values()) num_train_steps = num_epoch_steps * self.hparams.max_epochs optimizer, scheduler = optimization.from_argparse_args( self.hparams, model=self, num_train_steps=num_train_steps, n_transformer_layers=self.transformer.config.num_hidden_layers) return [optimizer], [{'scheduler': scheduler, 'interval': 'step'}]