class Config(ConfigBase): #: Training epochs epochs: int = 10 #: Stop after how many epochs when the eval metric is not improving early_stop_after: int = 0 #: Clip gradient norm if set max_clip_norm: Optional[float] = None #: Whether metrics on training data should be computed and reported. report_train_metrics: bool = True #: Target time limit for training, default (None) to no time limit. target_time_limit_seconds: Optional[int] = None #: Whether to do evaluation and model selection based on it. do_eval: bool = True #: Number of samples for logging training progress. num_samples_to_log_progress: int = 1000 #: Number of forward & backward per batch before update gradients, the #: actual_batch_size = batch_size x num_accumulated_batches num_accumulated_batches: int = 1 #: Define epoch as a fixed number of batches. Subsequent epochs will continue #: to iterate through the data, cycling through it when they reach the end. #: If not set, use exactly one pass through the dataset as one epoch. #: This configuration only affects the train epochs, test and eval #: will always test their entire datasets. num_batches_per_epoch: Optional[int] = None #: config for optimizer, used in parameter update optimizer: Optimizer.Config = Adam.Config() scheduler: Optional[Scheduler.Config] = None sparsifier: Optional[Sparsifier.Config] = None #: Define arguments for fp16 training. A fp16_optimizer will be created #: and wraps the original optimizer, which will scale loss during #: backward and master weight will be maintained on original optimizer. #: https://arxiv.org/abs/1710.03740 fp16_args: FP16Optimizer.Config = FP16OptimizerFairseq.Config()
class Config(ConfigBase): #: Training epochs epochs: int = 10 #: Stop after how many epochs when the eval metric is not improving early_stop_after: int = 0 #: Clip gradient norm if set max_clip_norm: Optional[float] = None #: Whether metrics on training data should be computed and reported. report_train_metrics: bool = True #: Target time limit for training, default (None) to no time limit. target_time_limit_seconds: Optional[int] = None #: Whether to do evaluation and model selection based on it. do_eval: bool = True #: if do_eval, do we load the best model state dict after training or just # use the latest model state load_best_model_after_train: bool = True #: Number of samples for logging training progress. num_samples_to_log_progress: int = 1000 #: Number of forward & backward per batch before update gradients, the #: actual_batch_size = batch_size x num_accumulated_batches num_accumulated_batches: int = 1 #: Define epoch as a fixed number of batches. Subsequent epochs will continue #: to iterate through the data, cycling through it when they reach the end. #: If not set, use exactly one pass through the dataset as one epoch. #: This configuration only affects the train epochs, test and eval #: will always test their entire datasets. num_batches_per_epoch: Optional[int] = None #: config for optimizer, used in parameter update optimizer: Optimizer.Config = Adam.Config() scheduler: Optional[Scheduler.Config] = None sparsifier: Optional[Sparsifier.Config] = None #: Define arguments for fp16 training. A fp16_optimizer will be created #: and wraps the original optimizer, which will scale loss during #: backward and master weight will be maintained on original optimizer. #: https://arxiv.org/abs/1710.03740 fp16_args: FP16Optimizer.Config = FP16OptimizerFairseq.Config() use_tensorboard: bool = False find_unused_parameters: bool = True #: Set a discriminative learning rate for some of the parameters in model. #: If None, all parameters will have the same lr. discriminative_lr: Optional[float] = None #: Model parameters match any patterns in the list will have discriminative_lr #: Parameters not matching any patterns will have default lr. # E.g. ["decoder.mlp.0", "decoder.mlp.3"] discriminative_lr_params_pattern: Optional[List[str]] = None #: Model parameters match any patterns in the list will have lr = 0.0 freeze_params_pattern: Optional[List[str]] = None