class AdamOptimizerConfig(BaseOptimizerConfig): """Parameters for adam optimization.""" optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adam """Points to `torch.optim.Adam`.""" type: str = StringOptions(["adam"], default="adam", allow_none=False) """Must be 'adam' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default: 'adam')""" # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adam.html#torch.optim.Adam : lr: float = FloatRange(default=1e-03, min=0.0, max=1.0, description="Learning rate.") betas: Tuple[float, float] = FloatRangeTupleDataclassField( default=(0.9, 0.999), description= "Coefficients used for computing running averages of gradient and its square." ) eps: float = NonNegativeFloat( default=1e-08, description= "Term added to the denominator to improve numerical stability.") weight_decay: float = NonNegativeFloat( default=0.0, description="Weight decay (L2 penalty).") amsgrad: bool = Boolean( default=False, description= ("Whether to use the AMSGrad variant of this algorithm from the paper 'On the Convergence of Adam and" "Beyond'."), )
class RMSPropOptimizerConfig(BaseOptimizerConfig): """Parameters for rmsprop optimization.""" optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.RMSprop """Points to `torch.optim.RMSprop`.""" type: str = StringOptions(["rmsprop"], default="rmsprop", allow_none=False) """Must be 'rmsprop' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default: 'rmsprop')""" # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.RMSprop.html#torch.optim.RMSprop: lr: float = FloatRange(default=1e-2, min=0.0, max=1.0, description="Learning rate.") momentum: float = NonNegativeFloat(default=0.0, description="Momentum factor.") alpha: float = NonNegativeFloat(default=0.99, description="Smoothing constant.") eps: float = NonNegativeFloat( default=1e-08, description= "Term added to the denominator to improve numerical stability.") centered: bool = Boolean( default=False, description= ("If True, computes the centered RMSProp, and the gradient is normalized by an estimation of its variance." ), ) weight_decay: float = NonNegativeFloat( default=0.0, description="Weight decay ($L2$ penalty).")
class FtrlOptimizerConfig(BaseOptimizerConfig): # optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Ftrl type: str = StringOptions(["ftrl"], default="ftrl", allow_none=False) learning_rate_power: float = FloatRange(default=-0.5, max=0.0) initial_accumulator_value: float = NonNegativeFloat(default=0.1) l1_regularization_strength: float = NonNegativeFloat(default=0.0) l2_regularization_strength: float = NonNegativeFloat(default=0.0)
class AdamaxOptimizerConfig(BaseOptimizerConfig): """Parameters for adamax optimization.""" optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adamax """Points to `torch.optim.Adamax`.""" type: str = StringOptions(["adamax"], default="adamax", allow_none=False) """Must be 'adamax' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default: 'adamax')""" # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adamax.html#torch.optim.Adamax : lr: float = FloatRange(default=2e-3, min=0.0, max=1.0, description="Learning rate.") betas: Tuple[float, float] = FloatRangeTupleDataclassField( default=(0.9, 0.999), description= "Coefficients used for computing running averages of gradient and its square." ) eps: float = NonNegativeFloat( default=1e-08, description= "Term added to the denominator to improve numerical stability.") weight_decay: float = NonNegativeFloat( default=0.0, description="Weight decay ($L2$ penalty).")
class AdagradOptimizerConfig(BaseOptimizerConfig): """Parameters for adagrad optimization.""" # Example docstring optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adagrad """Points to `torch.optim.Adagrad`.""" type: str = StringOptions(["adagrad"], default="adagrad", allow_none=False) """Must be 'adagrad' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default: 'adagrad')""" # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adagrad.html#torch.optim.Adagrad : initial_accumulator_value: float = NonNegativeFloat(default=0, description="") lr: float = FloatRange(default=1e-2, min=0.0, max=1.0, description="Learning rate.") lr_decay: float = FloatRange(default=0, description="Learning rate decay.") weight_decay: float = FloatRange( default=0, description="Weight decay ($L2$ penalty).") eps: float = FloatRange( default=1e-10, description= "Term added to the denominator to improve numerical stability.")
class AdadeltaOptimizerConfig(BaseOptimizerConfig): """Parameters for adadelta optimization.""" optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adadelta """Points to `torch.optim.Adadelta`.""" type: str = StringOptions(["adadelta"], default="adadelta", allow_none=False) """Must be 'adadelta' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default: 'adadelta')""" # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adadelta.html#torch.optim.Adadelta : rho: float = FloatRange( default=0.9, min=0.0, max=1.0, description= "Coefficient used for computing a running average of squared gradients.", ) eps: float = NonNegativeFloat( default=1e-06, description= "Term added to the denominator to improve numerical stability.") lr: float = FloatRange( default=1.0, min=0.0, max=1.0, description= "Coefficient that scales delta before it is applied to the parameters.", ) weight_decay: float = NonNegativeFloat( default=0.0, description="Weight decay ($L2$ penalty).")
class NadamOptimizerConfig(BaseOptimizerConfig): # optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Nadam type: str = StringOptions(["nadam"], default="nadam", allow_none=False) # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.NAdam.html#torch.optim.NAdam : lr: float = FloatRange(default=2e-3, min=0.0, max=1.0, description="Learning rate.") betas: Tuple[float, float] = FloatRangeTupleDataclassField( default=(0.9, 0.999), description= "Coefficients used for computing running averages of gradient and its square." ) eps: float = NonNegativeFloat( default=1e-08, description= "Term added to the denominator to improve numerical stability.") weight_decay: float = NonNegativeFloat( default=0.0, description="Weight decay ($L2$ penalty).") momentum_decay: float = NonNegativeFloat(default=4e-3, description="Momentum decay.")
class SGDOptimizerConfig(BaseOptimizerConfig): """Parameters for stochastic gradient descent.""" optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.SGD """Points to `torch.optim.SGD`.""" type: str = StringOptions(["sgd"], default="sgd", allow_none=False) """Must be 'sgd' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default: 'sgd')""" lr: float = FloatRange(default=1e-03, min=0.0, max=1.0, description="Learning rate.") # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.SGD.html#torch.optim.SGD : momentum: float = NonNegativeFloat(default=0.0, description="Momentum factor.") weight_decay: float = NonNegativeFloat( default=0.0, description="Weight decay ($L2$ penalty).") dampening: float = NonNegativeFloat(default=0.0, description="Dampening for momentum.") nesterov: bool = Boolean(default=False, description="Enables Nesterov momentum.")