Example #1
0
    def configure_optimizers(self: Model):
        num_epoch_steps = (len(dataset[datasets.Split.TRAIN]) + self.hparams.batch_size - 1) // self.hparams.batch_size
        num_train_steps = num_epoch_steps * self.hparams.max_epochs
        optimizer, scheduler = optimization.create_optimizer(
            self,
            lr=self.hparams.lr,
            num_train_steps=num_train_steps,
            weight_decay=self.hparams.weight_decay,
            warmup_steps=self.hparams.warmup_steps,
            warmup_proportion=self.hparams.warmup_proportion,
            layerwise_lr_decay_power=self.hparams.layerwise_lr_decay_power,
            n_transformer_layers=self.transformer.config.num_hidden_layers,
            lr_scheduler=optimization.get_polynomial_decay_schedule_with_warmup,
            lr_scheduler_kwargs={
                'lr_end': self.hparams.lr_end,
                'power': self.hparams.lr_decay_power
            }
        )

        return [optimizer], [{'scheduler': scheduler, 'interval': 'step'}]
Example #2
0
 def configure_optimizers(self: Model):
     num_epoch_steps = sum(
         len(dataset) for dataset in distill_datasets.values())
     num_train_steps = num_epoch_steps * self.hparams.max_epochs
     setattr(self, 'num_train_steps', num_train_steps)
     optimizer, scheduler = optimization.create_optimizer(
         self,
         lr=self.hparams.lr,
         num_train_steps=num_train_steps,
         weight_decay=self.hparams.weight_decay,
         warmup_steps=self.hparams.warmup_steps,
         warmup_proportion=self.hparams.warmup_proportion,
         layerwise_lr_decay_power=self.hparams.layerwise_lr_decay_power,
         n_transformer_layers=self.transformer.config.num_hidden_layers,
         get_layer_lrs=optimization.get_layer_lrs_with_crf,
         get_layer_lrs_kwargs={'crf_preffix': 'rel_crf'},
         lr_scheduler=optimization.
         get_polynomial_decay_schedule_with_warmup,
         lr_scheduler_kwargs={
             'lr_end': self.hparams.lr_end,
             'power': self.hparams.lr_decay_power
         })
     return [optimizer], [{'scheduler': scheduler, 'interval': 'step'}]