Exemple #1
0
 def build_optimizer(self,
                     trn,
                     epochs,
                     lr,
                     adam_epsilon,
                     weight_decay,
                     warmup_steps,
                     transformer_lr,
                     **kwargs):
     # noinspection PyProtectedMember
     transformer = self._get_transformer()
     if transformer:
         num_training_steps = len(trn) * epochs // self.config.get('gradient_accumulation', 1)
         optimizer, scheduler = build_optimizer_scheduler_with_transformer(self.model,
                                                                           transformer,
                                                                           lr, transformer_lr,
                                                                           num_training_steps, warmup_steps,
                                                                           weight_decay, adam_epsilon)
     else:
         optimizer = torch.optim.Adam(self.model.parameters(), self.config.lr)
         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
             optimizer=optimizer,
             mode='max',
             factor=0.5,
             patience=2,
             verbose=True,
         )
     return optimizer, scheduler
Exemple #2
0
 def build_optimizer(self, epochs, trn, gradient_accumulation, **kwargs):
     config = self.config
     model = self.model
     if isinstance(model, nn.DataParallel):
         model = model.module
     transformer = self._get_transformer_builder()
     if transformer and transformer.trainable:
         transformer = self._get_transformer()
         optimizer = Adam(
             set(model.parameters()) - set(transformer.parameters()),
             config.lr, (config.mu, config.nu), config.epsilon)
         if self.config.transformer_lr:
             num_training_steps = len(trn) * epochs // gradient_accumulation
             if not self.config.separate_optimizer:
                 optimizer, scheduler = build_optimizer_scheduler_with_transformer(
                     model, transformer, config.lr, config.transformer_lr,
                     num_training_steps, config.warmup_steps,
                     config.weight_decay, config.epsilon)
                 transformer_optimizer, transformer_scheduler = None, None
             else:
                 transformer_optimizer, transformer_scheduler = \
                     build_optimizer_scheduler_with_transformer(transformer,
                                                                transformer,
                                                                config.lr,
                                                                config.transformer_lr,
                                                                num_training_steps,
                                                                config.warmup_steps,
                                                                config.weight_decay,
                                                                config.epsilon)
         else:
             transformer.requires_grad_(False)
             transformer_optimizer, transformer_scheduler = None, None
     else:
         optimizer = Adam(model.parameters(), config.lr,
                          (config.mu, config.nu), config.epsilon)
         transformer_optimizer, transformer_scheduler = None, None
     if self.config.separate_optimizer:
         scheduler = ExponentialLR(optimizer,
                                   config.decay**(1 / config.decay_steps))
     # noinspection PyUnboundLocalVariable
     optimizer = Adam(model.parameters(), **{
         'lr': 0.002,
         'betas': (0.9, 0.9),
         'eps': 1e-12
     })
     scheduler = ExponentialLR(optimizer, **{'gamma': 0.9999424652406974})
     return optimizer, scheduler, transformer_optimizer, transformer_scheduler
Exemple #3
0
 def build_optimizer(self, epochs, trn, gradient_accumulation, **kwargs):
     config = self.config
     model = self.model
     if isinstance(model, nn.DataParallel):
         model = model.module
     if self.config.transformer:
         transformer = model.encoder.transformer
         optimizer = Adam(
             set(model.parameters()) - set(transformer.parameters()),
             config.lr, (config.mu, config.nu), config.epsilon)
         if self.config.transformer_lr:
             num_training_steps = len(trn) * epochs // gradient_accumulation
             if self.config.separate_optimizer:
                 transformer_optimizer, transformer_scheduler = \
                     build_optimizer_scheduler_with_transformer(transformer,
                                                                transformer,
                                                                config.transformer_lr,
                                                                config.transformer_lr,
                                                                num_training_steps,
                                                                config.warmup_steps,
                                                                config.weight_decay,
                                                                adam_epsilon=1e-8)
             else:
                 optimizer, scheduler = build_optimizer_scheduler_with_transformer(
                     model,
                     transformer,
                     config.lr,
                     config.transformer_lr,
                     num_training_steps,
                     config.warmup_steps,
                     config.weight_decay,
                     adam_epsilon=1e-8)
                 transformer_optimizer, transformer_scheduler = None, None
         else:
             transformer.requires_grad_(False)
             transformer_optimizer, transformer_scheduler = None, None
     else:
         optimizer = Adam(model.parameters(), config.lr,
                          (config.mu, config.nu), config.epsilon)
         transformer_optimizer, transformer_scheduler = None, None
     if self.config.separate_optimizer:
         scheduler = ExponentialLR(optimizer,
                                   config.decay**(1 / config.decay_steps))
     # noinspection PyUnboundLocalVariable
     return optimizer, scheduler, transformer_optimizer, transformer_scheduler
Exemple #4
0
 def build_optimizer(self,
                     trn,
                     epochs,
                     gradient_accumulation=1,
                     lr=1e-3,
                     transformer_lr=5e-5,
                     adam_epsilon=1e-8,
                     weight_decay=0.0,
                     warmup_steps=0.1,
                     **kwargs):
     num_training_steps = len(trn) * epochs // gradient_accumulation
     optimizer, scheduler = build_optimizer_scheduler_with_transformer(
         self.model, self.model.base_model, lr, transformer_lr,
         num_training_steps, warmup_steps, weight_decay, adam_epsilon)
     return optimizer, scheduler
Exemple #5
0
 def build_optimizer(self,
                     trn,
                     epochs,
                     lr,
                     adam_epsilon,
                     weight_decay,
                     warmup_steps,
                     transformer_lr=None,
                     gradient_accumulation=1,
                     **kwargs):
     num_training_steps = len(trn) * epochs // gradient_accumulation
     if transformer_lr is None:
         transformer_lr = lr
     transformer = find_transformer(self.model.embed)
     optimizer, scheduler = build_optimizer_scheduler_with_transformer(
         self.model, transformer, lr, transformer_lr, num_training_steps,
         warmup_steps, weight_decay, adam_epsilon)
     return optimizer, scheduler
 def build_optimizer(self,
                     trn,
                     epochs,
                     lr,
                     adam_epsilon,
                     weight_decay,
                     warmup_steps,
                     transformer_lr=None,
                     teacher=None,
                     **kwargs):
     num_training_steps = len(trn) * epochs // self.config.get('gradient_accumulation', 1)
     if transformer_lr is None:
         transformer_lr = lr
     transformer = self.model.encoder.transformer
     optimizer, scheduler = build_optimizer_scheduler_with_transformer(self.model, transformer,
                                                                       lr, transformer_lr,
                                                                       num_training_steps, warmup_steps,
                                                                       weight_decay, adam_epsilon)
     if teacher:
         lambda_scheduler = LinearTeacherAnnealingScheduler(num_training_steps)
         scheduler = (scheduler, lambda_scheduler)
     return optimizer, scheduler