Exemple #1
0
 class Config(ConfigBase):
     #: Training epochs
     epochs: int = 10
     #: Stop after how many epochs when the eval metric is not improving
     early_stop_after: int = 0
     #: Clip gradient norm if set
     max_clip_norm: Optional[float] = None
     #: Whether metrics on training data should be computed and reported.
     report_train_metrics: bool = True
     #: Target time limit for training, default (None) to no time limit.
     target_time_limit_seconds: Optional[int] = None
     #: Whether to do evaluation and model selection based on it.
     do_eval: bool = True
     #: Number of samples for logging training progress.
     num_samples_to_log_progress: int = 1000
     #: Number of forward & backward per batch before update gradients, the
     #: actual_batch_size = batch_size x num_accumulated_batches
     num_accumulated_batches: int = 1
     #: Define epoch as a fixed number of batches. Subsequent epochs will continue
     #: to iterate through the data, cycling through it when they reach the end.
     #: If not set, use exactly one pass through the dataset as one epoch.
     #: This configuration only affects the train epochs, test and eval
     #: will always test their entire datasets.
     num_batches_per_epoch: Optional[int] = None
     #: config for optimizer, used in parameter update
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Optional[Scheduler.Config] = None
     sparsifier: Optional[Sparsifier.Config] = None
     #: Define arguments for fp16 training. A fp16_optimizer will be created
     #: and wraps the original optimizer, which will scale loss during
     #: backward and master weight will be maintained on original optimizer.
     #: https://arxiv.org/abs/1710.03740
     fp16_args: FP16Optimizer.Config = FP16OptimizerFairseq.Config()
Exemple #2
0
 class Config(ConfigBase):
     #: Training epochs
     epochs: int = 10
     #: Stop after how many epochs when the eval metric is not improving
     early_stop_after: int = 0
     #: Clip gradient norm if set
     max_clip_norm: Optional[float] = None
     #: Whether metrics on training data should be computed and reported.
     report_train_metrics: bool = True
     #: Target time limit for training, default (None) to no time limit.
     target_time_limit_seconds: Optional[int] = None
     #: Whether to do evaluation and model selection based on it.
     do_eval: bool = True
     #: if do_eval, do we load the best model state dict after training or just
     # use the latest model state
     load_best_model_after_train: bool = True
     #: Number of samples for logging training progress.
     num_samples_to_log_progress: int = 1000
     #: Number of forward & backward per batch before update gradients, the
     #: actual_batch_size = batch_size x num_accumulated_batches
     num_accumulated_batches: int = 1
     #: Define epoch as a fixed number of batches. Subsequent epochs will continue
     #: to iterate through the data, cycling through it when they reach the end.
     #: If not set, use exactly one pass through the dataset as one epoch.
     #: This configuration only affects the train epochs, test and eval
     #: will always test their entire datasets.
     num_batches_per_epoch: Optional[int] = None
     #: config for optimizer, used in parameter update
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Optional[Scheduler.Config] = None
     sparsifier: Optional[Sparsifier.Config] = None
     #: Define arguments for fp16 training. A fp16_optimizer will be created
     #: and wraps the original optimizer, which will scale loss during
     #: backward and master weight will be maintained on original optimizer.
     #: https://arxiv.org/abs/1710.03740
     fp16_args: FP16Optimizer.Config = FP16OptimizerFairseq.Config()
     use_tensorboard: bool = False
     find_unused_parameters: bool = True
     #: Set a discriminative learning rate for some of the parameters in model.
     #: If None, all parameters will have the same lr.
     discriminative_lr: Optional[float] = None
     #: Model parameters match any patterns in the list will have discriminative_lr
     #: Parameters not matching any patterns will have default lr.
     # E.g. ["decoder.mlp.0", "decoder.mlp.3"]
     discriminative_lr_params_pattern: Optional[List[str]] = None
     #: Model parameters match any patterns in the list will have lr = 0.0
     freeze_params_pattern: Optional[List[str]] = None