예제 #1
0
class AdagradOptimizerConfig(BaseOptimizerConfig):
    """Parameters for adagrad optimization."""

    # Example docstring
    optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adagrad
    """Points to `torch.optim.Adagrad`."""

    type: str = StringOptions(["adagrad"], default="adagrad", allow_none=False)
    """Must be 'adagrad' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry`
       (default: 'adagrad')"""

    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adagrad.html#torch.optim.Adagrad :
    initial_accumulator_value: float = NonNegativeFloat(default=0,
                                                        description="")
    lr: float = FloatRange(default=1e-2,
                           min=0.0,
                           max=1.0,
                           description="Learning rate.")
    lr_decay: float = FloatRange(default=0, description="Learning rate decay.")
    weight_decay: float = FloatRange(
        default=0, description="Weight decay ($L2$ penalty).")
    eps: float = FloatRange(
        default=1e-10,
        description=
        "Term added to the denominator to improve numerical stability.")
예제 #2
0
class AdadeltaOptimizerConfig(BaseOptimizerConfig):
    """Parameters for adadelta optimization."""

    optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adadelta
    """Points to `torch.optim.Adadelta`."""

    type: str = StringOptions(["adadelta"],
                              default="adadelta",
                              allow_none=False)
    """Must be 'adadelta' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry`
       (default: 'adadelta')"""

    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adadelta.html#torch.optim.Adadelta :
    rho: float = FloatRange(
        default=0.9,
        min=0.0,
        max=1.0,
        description=
        "Coefficient used for computing a running average of squared gradients.",
    )
    eps: float = NonNegativeFloat(
        default=1e-06,
        description=
        "Term added to the denominator to improve numerical stability.")
    lr: float = FloatRange(
        default=1.0,
        min=0.0,
        max=1.0,
        description=
        "Coefficient that scales delta before it is applied to the parameters.",
    )
    weight_decay: float = NonNegativeFloat(
        default=0.0, description="Weight decay ($L2$ penalty).")
예제 #3
0
class GradientClippingConfig(BaseMarshmallowConfig):
    """Dataclass that holds gradient clipping parameters."""

    clipglobalnorm: Optional[float] = FloatRange(default=0.5,
                                                 allow_none=True,
                                                 description="")

    clipnorm: Optional[float] = FloatRange(default=None,
                                           allow_none=True,
                                           description="")

    clipvalue: Optional[float] = FloatRange(default=None,
                                            allow_none=True,
                                            description="")
예제 #4
0
class AdamOptimizerConfig(BaseOptimizerConfig):
    """Parameters for adam optimization."""

    optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adam
    """Points to `torch.optim.Adam`."""

    type: str = StringOptions(["adam"], default="adam", allow_none=False)
    """Must be 'adam' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry`
       (default: 'adam')"""

    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adam.html#torch.optim.Adam :
    lr: float = FloatRange(default=1e-03,
                           min=0.0,
                           max=1.0,
                           description="Learning rate.")
    betas: Tuple[float, float] = FloatRangeTupleDataclassField(
        default=(0.9, 0.999),
        description=
        "Coefficients used for computing running averages of gradient and its square."
    )
    eps: float = NonNegativeFloat(
        default=1e-08,
        description=
        "Term added to the denominator to improve numerical stability.")
    weight_decay: float = NonNegativeFloat(
        default=0.0, description="Weight decay (L2 penalty).")
    amsgrad: bool = Boolean(
        default=False,
        description=
        ("Whether to use the AMSGrad variant of this algorithm from the paper 'On the Convergence of Adam and"
         "Beyond'."),
    )
예제 #5
0
class RMSPropOptimizerConfig(BaseOptimizerConfig):
    """Parameters for rmsprop optimization."""

    optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.RMSprop
    """Points to `torch.optim.RMSprop`."""

    type: str = StringOptions(["rmsprop"], default="rmsprop", allow_none=False)
    """Must be 'rmsprop' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry`
       (default: 'rmsprop')"""

    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.RMSprop.html#torch.optim.RMSprop:
    lr: float = FloatRange(default=1e-2,
                           min=0.0,
                           max=1.0,
                           description="Learning rate.")
    momentum: float = NonNegativeFloat(default=0.0,
                                       description="Momentum factor.")
    alpha: float = NonNegativeFloat(default=0.99,
                                    description="Smoothing constant.")
    eps: float = NonNegativeFloat(
        default=1e-08,
        description=
        "Term added to the denominator to improve numerical stability.")
    centered: bool = Boolean(
        default=False,
        description=
        ("If True, computes the centered RMSProp, and the gradient is normalized by an estimation of its variance."
         ),
    )
    weight_decay: float = NonNegativeFloat(
        default=0.0, description="Weight decay ($L2$ penalty).")
예제 #6
0
class FtrlOptimizerConfig(BaseOptimizerConfig):
    # optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Ftrl
    type: str = StringOptions(["ftrl"], default="ftrl", allow_none=False)
    learning_rate_power: float = FloatRange(default=-0.5, max=0.0)
    initial_accumulator_value: float = NonNegativeFloat(default=0.1)
    l1_regularization_strength: float = NonNegativeFloat(default=0.0)
    l2_regularization_strength: float = NonNegativeFloat(default=0.0)
예제 #7
0
class AdamaxOptimizerConfig(BaseOptimizerConfig):
    """Parameters for adamax optimization."""

    optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Adamax
    """Points to `torch.optim.Adamax`."""

    type: str = StringOptions(["adamax"], default="adamax", allow_none=False)
    """Must be 'adamax' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry`
       (default: 'adamax')"""

    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.Adamax.html#torch.optim.Adamax :
    lr: float = FloatRange(default=2e-3,
                           min=0.0,
                           max=1.0,
                           description="Learning rate.")
    betas: Tuple[float, float] = FloatRangeTupleDataclassField(
        default=(0.9, 0.999),
        description=
        "Coefficients used for computing running averages of gradient and its square."
    )
    eps: float = NonNegativeFloat(
        default=1e-08,
        description=
        "Term added to the denominator to improve numerical stability.")
    weight_decay: float = NonNegativeFloat(
        default=0.0, description="Weight decay ($L2$ penalty).")
예제 #8
0
class NadamOptimizerConfig(BaseOptimizerConfig):
    # optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.Nadam
    type: str = StringOptions(["nadam"], default="nadam", allow_none=False)
    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.NAdam.html#torch.optim.NAdam :
    lr: float = FloatRange(default=2e-3,
                           min=0.0,
                           max=1.0,
                           description="Learning rate.")
    betas: Tuple[float, float] = FloatRangeTupleDataclassField(
        default=(0.9, 0.999),
        description=
        "Coefficients used for computing running averages of gradient and its square."
    )
    eps: float = NonNegativeFloat(
        default=1e-08,
        description=
        "Term added to the denominator to improve numerical stability.")
    weight_decay: float = NonNegativeFloat(
        default=0.0, description="Weight decay ($L2$ penalty).")
    momentum_decay: float = NonNegativeFloat(default=4e-3,
                                             description="Momentum decay.")
예제 #9
0
class SGDOptimizerConfig(BaseOptimizerConfig):
    """Parameters for stochastic gradient descent."""

    optimizer_class: ClassVar[torch.optim.Optimizer] = torch.optim.SGD
    """Points to `torch.optim.SGD`."""

    type: str = StringOptions(["sgd"], default="sgd", allow_none=False)
    """Must be 'sgd' - corresponds to name in `ludwig.modules.optimization_modules.optimizer_registry` (default:
       'sgd')"""

    lr: float = FloatRange(default=1e-03,
                           min=0.0,
                           max=1.0,
                           description="Learning rate.")

    # Defaults taken from https://pytorch.org/docs/stable/generated/torch.optim.SGD.html#torch.optim.SGD :
    momentum: float = NonNegativeFloat(default=0.0,
                                       description="Momentum factor.")
    weight_decay: float = NonNegativeFloat(
        default=0.0, description="Weight decay ($L2$ penalty).")
    dampening: float = NonNegativeFloat(default=0.0,
                                        description="Dampening for momentum.")
    nesterov: bool = Boolean(default=False,
                             description="Enables Nesterov momentum.")