예제 #1
0
class NumberPreprocessingConfig(schema_utils.BaseMarshmallowConfig):
    """NumberPreprocessingConfig is a dataclass that configures the parameters used for a number input feature."""

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a number column",
    )

    fill_value: float = schema_utils.NonNegativeFloat(
        default=0.0,
        allow_none=False,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: float = schema_utils.NonNegativeFloat(
        default=0.0,
        allow_none=False,
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )

    normalization: str = schema_utils.StringOptions(
        ["zscore", "minmax", "log1p"],
        default=None,
        allow_none=True,
        description="Normalization strategy to use for this number feature.",
    )
예제 #2
0
class TimeseriesPreprocessingConfig(schema_utils.BaseMarshmallowConfig):

    tokenizer: str = schema_utils.StringOptions(
        sorted(list(tokenizer_registry.keys())),
        default="space",
        allow_none=False,
        description=
        "Defines how to map from the raw string content of the dataset column to a sequence of elements.",
    )

    timeseries_length_limit: int = schema_utils.PositiveInteger(
        default=256,
        allow_none=False,
        description=
        "Defines the maximum length of the timeseries. All timeseries longer than this limit are cut off.",
    )

    padding_value: float = schema_utils.NonNegativeFloat(
        default=0.0,
        allow_none=False,
        description="Float value that is used for padding.",
    )

    padding: str = schema_utils.StringOptions(
        ["left", "right"],
        default="right",
        allow_none=False,
        description=
        "the direction of the padding. right and left are available options.",
    )

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a text column",
    )

    fill_value: str = schema_utils.String(
        default="",
        allow_none=False,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: str = schema_utils.String(
        default="",
        allow_none=False,
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )
예제 #3
0
class BagPreprocessingConfig(schema_utils.BaseMarshmallowConfig):

    tokenizer: str = schema_utils.StringOptions(
        tokenizer_registry.keys(),
        default="space",
        allow_none=False,
        description=
        "Defines how to transform the raw text content of the dataset column to a set of elements. The "
        "default value space splits the string on spaces. Common options include: underscore (splits on "
        "underscore), comma (splits on comma), json (decodes the string into a set or a list through a "
        "JSON parser).",
    )

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a set column",
    )

    fill_value: str = schema_utils.String(
        default=strings_utils.UNKNOWN_SYMBOL,
        allow_none=False,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: str = schema_utils.String(
        default=strings_utils.UNKNOWN_SYMBOL,
        allow_none=False,
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )

    lowercase: bool = schema_utils.Boolean(
        default=False,
        description=
        "If true, converts the string to lowercase before tokenizing.",
    )

    most_common: int = schema_utils.PositiveInteger(
        default=10000,
        allow_none=True,
        description=
        "The maximum number of most common tokens to be considered. If the data contains more than this "
        "amount, the most infrequent tokens will be treated as unknown.",
    )
예제 #4
0
class VectorPreprocessingConfig(schema_utils.BaseMarshmallowConfig):

    vector_size: int = schema_utils.PositiveInteger(
        default=None,
        allow_none=True,
        description=
        "The size of the vector. If None, the vector size will be inferred from the data.",
    )

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a vector column",
    )

    fill_value: str = schema_utils.String(
        default="",
        allow_none=False,
        pattern=r"^([0-9]+(\.[0-9]*)?\s*)*$",
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: str = schema_utils.String(
        default="",
        allow_none=False,
        pattern=r"^([0-9]+(\.[0-9]*)?\s*)*$",
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )
예제 #5
0
class DatePreprocessingConfig(schema_utils.BaseMarshmallowConfig):

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a date column",
    )

    fill_value: str = schema_utils.String(
        default="",
        allow_none=False,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: str = schema_utils.String(
        default="",
        allow_none=False,
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )

    datetime_format: str = schema_utils.String(
        default=None,
        allow_none=True,
        description=
        "This parameter can either be a datetime format string, or null, in which case the datetime "
        "format will be inferred automatically.",
    )
예제 #6
0
class H3PreprocessingConfig(schema_utils.BaseMarshmallowConfig):

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in an h3 column",
    )

    fill_value: int = schema_utils.PositiveInteger(
        default=576495936675512319,
        allow_none=False,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: int = schema_utils.PositiveInteger(
        default=576495936675512319,
        allow_none=False,
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )
예제 #7
0
class TextOutputFeatureConfig(BaseOutputFeatureConfig):
    """TextOutputFeatureConfig is a dataclass that configures the parameters used for a text output feature."""

    decoder: Optional[str] = schema_utils.StringOptions(
        list(get_decoder_classes(TEXT).keys()),
        default="generator",
        description="Decoder to use for this text output feature.",
    )
예제 #8
0
class VectorOutputFeatureConfig(BaseOutputFeatureConfig):
    """VectorOutputFeatureConfig is a dataclass that configures the parameters used for a vector output feature."""

    decoder: Optional[str] = schema_utils.StringOptions(
        list(get_decoder_classes(VECTOR).keys()),
        default="projector",
        description="Decoder to use for this vector feature.",
    )
예제 #9
0
class NumberOutputFeatureConfig(BaseOutputFeatureConfig):

    decoder: Optional[str] = schema_utils.StringOptions(
        list(get_decoder_classes(NUMBER).keys()),
        default="regressor",
        allow_none=True,
        description="Decoder to use for this number feature.",
    )
예제 #10
0
class CategoryOutputFeatureConfig(BaseOutputFeatureConfig):
    """CategoryOutputFeature is a dataclass that configures the parameters used for a category output feature."""

    decoder: Optional[str] = schema_utils.StringOptions(
        list(get_decoder_classes(CATEGORY).keys()),
        default="classifier",
        allow_none=True,
        description="Decoder to use for this category feature.",
    )
예제 #11
0
class SetOutputFeatureConfig(BaseOutputFeatureConfig):
    """SetOutputFeatureConfig is a dataclass that configures the parameters used for a set output feature."""

    decoder: Optional[str] = schema_utils.StringOptions(
        list(get_decoder_classes(SET).keys()),
        default="classifier",
        allow_none=True,
        description="Decoder to use for this set feature.",
    )
예제 #12
0
class BinaryOutputFeatureConfig(BaseOutputFeatureConfig):
    """BinaryOutputFeature is a dataclass that configures the parameters used for a binary output feature."""

    decoder: Optional[str] = schema_utils.StringOptions(
        list(get_decoder_classes(BINARY).keys()),
        default="regressor",
        allow_none=True,
        description="Decoder to use for this binary feature.",
    )
예제 #13
0
class BinaryInputFeatureConfig(BaseInputFeatureConfig):
    """BinaryInputFeature is a dataclass that configures the parameters used for a binary input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=BINARY)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(BINARY).keys()),
        default="passthrough",
        description="Encoder to use for this binary feature.",
    )
예제 #14
0
class DateInputFeatureConfig(BaseInputFeatureConfig):
    """DateInputFeature is a dataclass that configures the parameters used for a date input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=DATE)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(DATE).keys()),
        default="embed",
        description="Encoder to use for this date feature.",
    )
예제 #15
0
class VectorInputFeatureConfig(BaseInputFeatureConfig):
    """VectorInputFeatureConfig is a dataclass that configures the parameters used for a vector input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(
        feature_type=VECTOR)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(VECTOR).keys()),
        default="dense",
        description="Encoder to use for this vector feature.",
    )
예제 #16
0
class CategoryInputFeatureConfig(BaseInputFeatureConfig):
    """CategoryInputFeature is a dataclass that configures the parameters used for a category input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(
        feature_type=CATEGORY)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(CATEGORY).keys()),
        default="dense",
        description="Encoder to use for this category feature.",
    )
예제 #17
0
class AudioInputFeatureConfig(BaseInputFeatureConfig):
    """AudioFeatureInputFeature is a dataclass that configures the parameters used for an audio input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(
        feature_type=AUDIO)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(AUDIO).keys()),
        default="parallel_cnn",
        description="Encoder to use for this audio feature.",
    )
예제 #18
0
class NumberInputFeatureConfig(BaseInputFeatureConfig):
    """NumberInputFeature is a dataclass that configures the parameters used for a number input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(
        feature_type=NUMBER)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(NUMBER).keys()),
        default="passthrough",
        description="Encoder to use for this number feature.",
    )
예제 #19
0
class ImageInputFeatureConfig(BaseInputFeatureConfig):
    """ImageInputFeatureConfig is a dataclass that configures the parameters used for an image input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(
        feature_type=IMAGE)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(IMAGE).keys()),
        default="stacked_cnn",
        description="Encoder to use for this image feature.",
    )
예제 #20
0
class TextInputFeatureConfig(BaseInputFeatureConfig):
    """TextInputFeatureConfig is a dataclass that configures the parameters used for a text input feature."""

    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(
        feature_type=TEXT)

    encoder: Optional[str] = schema_utils.StringOptions(
        list(get_encoder_classes(TEXT).keys()),
        default="parallel_cnn",
        description="Encoder to use for this text feature.",
    )
예제 #21
0
class ProjectAggregateCombinerConfig(BaseCombinerConfig):
    projection_size: int = schema_utils.PositiveInteger(
        default=128,
        description=
        "All combiner inputs are projected to this size before being aggregated."
    )
    fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList(
        description=
        "Full secification of the fully connected layers after the aggregation. "
        "It should be a list of dict, each disct representing one layer.")
    num_fc_layers: int = schema_utils.NonNegativeInteger(
        default=2,
        description="Number of fully connected layers after aggregation.")
    output_size: int = schema_utils.PositiveInteger(
        default=128,
        description=
        "Output size of each layer of the stack of fully connected layers.")
    use_bias: bool = schema_utils.Boolean(
        default=True, description="Whether the layers use a bias vector.")
    weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict(
        default="xavier_uniform",
        description=
        "Initializer to use for the weights of the projection and for the fully connected layers.",
    )
    bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict(
        default="zeros",
        description=
        "Initializer to use for the baias of the projection and for the fully connected layers.",
    )
    norm: Optional[str] = schema_utils.StringOptions(
        ["batch", "layer"],
        default="layer",
        description=
        "Normalization to apply to each projection and fully connected layer.",
    )
    norm_params: Optional[dict] = schema_utils.Dict(
        description=
        "Parameters of the normalization to apply to each projection and fully connected layer."
    )
    activation: str = schema_utils.ActivationOptions(
        default="relu",
        description="Activation to apply to each fully connected layer.")
    dropout: float = schema_utils.FloatRange(
        default=0.0,
        min=0,
        max=1,
        description="Dropout rate to apply to each fully connected layer.")
    residual: bool = schema_utils.Boolean(
        default=True,
        description=
        "Whether to add residual skip connection between the fully connected layers in the stack..",
    )
예제 #22
0
class SequenceCombinerConfig(BaseCombinerConfig):
    """Parameters for sequence combiner."""

    main_sequence_feature: Optional[str] = schema_utils.String(default=None,
                                                               description="")

    reduce_output: Optional[str] = schema_utils.ReductionOptions(
        default=None, description="")

    encoder: Optional[str] = schema_utils.StringOptions(list(
        sequence_encoder_registry.keys()),
                                                        default=None,
                                                        description="")
예제 #23
0
def test_StringOptions():
    # Test case of default conflicting with allowed options:
    test_options = ["one"]
    with pytest.raises(MarshmallowValidationError):
        schema_utils.StringOptions(test_options, default=None, allow_none=False)

    # Test creating a schema with simple option, null not allowed:
    test_options = ["one"]

    @dataclass
    class CustomTestSchema(schema_utils.BaseMarshmallowConfig):
        foo: str = schema_utils.StringOptions(test_options, "one", allow_none=False)

    with pytest.raises(MarshmallowValidationError):
        CustomTestSchema.Schema().load({"foo": None})
예제 #24
0
class BinaryPreprocessingConfig(schema_utils.BaseMarshmallowConfig):
    """BinaryPreprocessingConfig is a dataclass that configures the parameters used for a binary input feature."""

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS + ["fill_with_false"],
        default="fill_with_false",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a binary column",
    )

    fill_value: Union[int, float, str] = schema_utils.NumericOrStringOptionsField(
        strings_utils.all_bool_strs(),
        default=None,
        default_numeric=None,
        default_option=None,
        allow_none=False,
        min=0,
        max=1,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: Union[
        int, float, str] = schema_utils.NumericOrStringOptionsField(
            strings_utils.all_bool_strs(),
            default=None,
            default_numeric=None,
            default_option=None,
            allow_none=False,
            min=0,
            max=1,
            description=
            "The internally computed fill value to replace missing values with in case the "
            "missing_value_strategy is fill_with_mode or fill_with_mean",
            parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
        )

    fallback_true_label: str = schema_utils.String(
        default=None,
        allow_none=True,
        description=
        "The label to interpret as 1 (True) when the binary feature doesn't have a "
        "conventional boolean value",
    )
예제 #25
0
파일: concat.py 프로젝트: ludwig-ai/ludwig
class ConcatCombinerConfig(BaseCombinerConfig):
    """Parameters for concat combiner."""

    fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList(
        description="")

    num_fc_layers: int = schema_utils.NonNegativeInteger(default=0,
                                                         description="")

    output_size: int = schema_utils.PositiveInteger(
        default=256, description="Output size of a fully connected layer.")

    use_bias: bool = schema_utils.Boolean(
        default=True, description="Whether the layer uses a bias vector.")

    weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict(
        default="xavier_uniform", description="")

    bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict(
        default="zeros", description="")

    norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"],
                                                     description="")

    norm_params: Optional[dict] = schema_utils.Dict(description="")

    activation: str = schema_utils.ActivationOptions(default="relu",
                                                     description="")

    dropout: float = schema_utils.FloatRange(default=0.0,
                                             min=0,
                                             max=1,
                                             description="")

    flatten_inputs: bool = schema_utils.Boolean(
        default=False,
        description="Whether to flatten input tensors to a vector.")

    residual: bool = schema_utils.Boolean(
        default=False,
        description=
        ("Whether to add a residual connection to each fully connected layer block. All fully connected layers must"
         " have the same size"),
    )
예제 #26
0
class CategoryPreprocessingConfig(schema_utils.BaseMarshmallowConfig):
    """CategoryPreprocessingConfig is a dataclass that configures the parameters used for a category input
    feature."""

    missing_value_strategy: str = schema_utils.StringOptions(
        MISSING_VALUE_STRATEGY_OPTIONS,
        default="fill_with_const",
        allow_none=False,
        description=
        "What strategy to follow when there's a missing value in a category column",
    )

    fill_value: str = schema_utils.String(
        default=strings_utils.UNKNOWN_SYMBOL,
        allow_none=False,
        description=
        "The value to replace missing values with in case the missing_value_strategy is fill_with_const",
    )

    computed_fill_value: str = schema_utils.String(
        default=strings_utils.UNKNOWN_SYMBOL,
        allow_none=False,
        description=
        "The internally computed fill value to replace missing values with in case the "
        "missing_value_strategy is fill_with_mode or fill_with_mean",
        parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"],
    )

    lowercase: bool = schema_utils.Boolean(
        default=False,
        description=
        "Whether the string has to be lowercased before being handled by the tokenizer.",
    )

    most_common: int = schema_utils.PositiveInteger(
        default=10000,
        allow_none=True,
        description=
        "The maximum number of most common tokens to be considered. if the data contains more than this "
        "amount, the most infrequent tokens will be treated as unknown.",
    )
예제 #27
0
class ComparatorCombinerConfig(BaseCombinerConfig):
    """Parameters for comparator combiner."""

    entity_1: List[str]
    """TODO: Document parameters."""

    entity_2: List[str]
    """TODO: Document parameters."""

    fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList(
        description="")

    num_fc_layers: int = schema_utils.NonNegativeInteger(default=1,
                                                         description="")

    output_size: int = schema_utils.PositiveInteger(
        default=256, description="Output size of a fully connected layer")

    use_bias: bool = schema_utils.Boolean(
        default=True, description="Whether the layer uses a bias vector.")

    weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict(
        default="xavier_uniform", description="")

    bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict(
        default="zeros", description="")

    norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"],
                                                     description="")

    norm_params: Optional[dict] = schema_utils.Dict(description="")

    activation: str = schema_utils.ActivationOptions(default="relu",
                                                     description="")

    dropout: float = schema_utils.FloatRange(
        default=0.0,
        min=0,
        max=1,
        description="Dropout rate for the transformer block.")
예제 #28
0
파일: tabnet.py 프로젝트: ludwig-ai/ludwig
class TabNetCombinerConfig(BaseCombinerConfig):
    """Parameters for tabnet combiner."""

    size: int = schema_utils.PositiveInteger(default=32,
                                             description="`N_a` in the paper.")

    output_size: int = schema_utils.PositiveInteger(
        default=128,
        description="Output size of a fully connected layer. `N_d` in the paper"
    )

    num_steps: int = schema_utils.NonNegativeInteger(
        default=3,
        description=
        ("Number of steps / repetitions of the the attentive transformer and feature transformer computations. "
         "`N_steps` in the paper"),
    )

    num_total_blocks: int = schema_utils.NonNegativeInteger(
        default=4,
        description="Total number of feature transformer block at each step")

    num_shared_blocks: int = schema_utils.NonNegativeInteger(
        default=2,
        description=
        "Number of shared feature transformer blocks across the steps")

    relaxation_factor: float = schema_utils.FloatRange(
        default=1.5,
        description=
        ("Factor that influences how many times a feature should be used across the steps of computation. a value of"
         " 1 implies it each feature should be use once, a higher value allows for multiple usages. `gamma` in the "
         "paper"),
    )

    bn_epsilon: float = schema_utils.FloatRange(
        default=1e-3,
        description="Epsilon to be added to the batch norm denominator.")

    bn_momentum: float = schema_utils.FloatRange(
        default=0.05,
        description=
        "Momentum of the batch norm. 1 - `m_B` from the TabNet paper.")

    bn_virtual_bs: Optional[int] = schema_utils.PositiveInteger(
        default=1024,
        allow_none=True,
        description=
        ("Size of the virtual batch size used by ghost batch norm. If null, regular batch norm is used instead. "
         "`B_v` from the TabNet paper"),
    )

    sparsity: float = schema_utils.FloatRange(
        default=1e-4,
        description=
        "Multiplier of the sparsity inducing loss. `lambda_sparse` in the paper"
    )

    entmax_mode: str = schema_utils.StringOptions(
        ["entmax15", "sparsemax", "constant", "adaptive"],
        default="sparsemax",
        description="")

    entmax_alpha: float = schema_utils.FloatRange(
        default=1.5, min=1, max=2,
        description="")  # 1 corresponds to softmax, 2 is sparsemax.

    dropout: float = schema_utils.FloatRange(
        default=0.05,
        min=0,
        max=1,
        description="Dropout rate for the transformer block.")
예제 #29
0
class ECDTrainerConfig(BaseTrainerConfig):
    """Dataclass that configures most of the hyperparameters used for ECD model training."""

    type: str = schema_utils.StringOptions(
        ["trainer", "ray_legacy_trainer"],
        default="trainer",
        description=(
            "Trainer to use for training the model. Must be one of ['trainer', 'ray_legacy_trainer'] - "
            "corresponds to name in `ludwig.trainers.registry.(ray_)trainers_registry` (default: 'trainer')"
        ),
        allow_none=False,
    )

    optimizer: BaseOptimizerConfig = OptimizerDataclassField(
        default={"type": "adam"}, description="Parameter values for selected torch optimizer."
    )

    epochs: int = schema_utils.PositiveInteger(
        default=100,
        description="Number of epochs the algorithm is intended to be run over.",
        parameter_metadata=TRAINER_METADATA["epochs"],
    )

    train_steps: int = schema_utils.PositiveInteger(
        default=None,
        allow_none=True,
        description=(
            "Maximum number of training steps the algorithm is intended to be run over. "
            + "If unset, then `epochs` is used to determine training length."
        ),
        parameter_metadata=TRAINER_METADATA["train_steps"],
    )

    regularization_lambda: float = schema_utils.FloatRange(
        default=0.0,
        min=0,
        description="Strength of the $L2$ regularization.",
        parameter_metadata=TRAINER_METADATA["regularization_lambda"],
    )

    regularization_type: Optional[str] = schema_utils.RegularizerOptions(
        default="l2", description="Type of regularization."
    )

    should_shuffle: bool = schema_utils.Boolean(
        default=True,
        description="Whether to shuffle batches during training when true.",
        parameter_metadata=TRAINER_METADATA["should_shuffle"],
    )

    batch_size: Union[int, str] = schema_utils.IntegerOrAutoField(
        default=128,
        default_numeric=128,
        allow_none=False,
        min_exclusive=0,
        description=(
            "The number of training examples utilized in one training step of the model. If ’auto’, the "
            "biggest batch size (power of 2) that can fit in memory will be used."
        ),
        parameter_metadata=TRAINER_METADATA["batch_size"],
    )

    steps_per_checkpoint: int = schema_utils.NonNegativeInteger(
        default=0,
        description=(
            "How often the model is checkpointed. Also dictates maximum evaluation frequency. If 0 the model is "
            "checkpointed after every epoch."
        ),
        parameter_metadata=TRAINER_METADATA["steps_per_checkpoint"],
    )

    checkpoints_per_epoch: int = schema_utils.NonNegativeInteger(
        default=0,
        description=(
            "Number of checkpoints per epoch. For example, 2 -> checkpoints are written every half of an epoch. Note "
            "that it is invalid to specify both non-zero `steps_per_checkpoint` and non-zero `checkpoints_per_epoch`."
        ),
        parameter_metadata=TRAINER_METADATA["checkpoints_per_epoch"],
    )

    reduce_learning_rate_on_plateau: float = schema_utils.FloatRange(
        default=0.0,
        min=0.0,
        max=1.0,
        description=(
            "Reduces the learning rate when the algorithm hits a plateau (i.e. the performance on the validation does "
            "not improve."
        ),
        parameter_metadata=TRAINER_METADATA["reduce_learning_rate_on_plateau"],
    )

    reduce_learning_rate_on_plateau_patience: int = schema_utils.NonNegativeInteger(
        default=5,
        description="How many epochs have to pass before the learning rate reduces.",
        parameter_metadata=TRAINER_METADATA["reduce_learning_rate_on_plateau_patience"],
    )

    reduce_learning_rate_on_plateau_rate: float = schema_utils.FloatRange(
        default=0.5,
        min=0.0,
        max=1.0,
        description="Rate at which we reduce the learning rate.",
        parameter_metadata=TRAINER_METADATA["reduce_learning_rate_on_plateau_rate"],
    )

    reduce_learning_rate_eval_metric: str = schema_utils.String(
        default=LOSS,
        description="Rate at which we reduce the learning rate.",
        parameter_metadata=TRAINER_METADATA["reduce_learning_rate_eval_metric"],
    )

    reduce_learning_rate_eval_split: str = schema_utils.String(
        default=TRAINING,
        description="Which dataset split to listen on for reducing the learning rate.",
        parameter_metadata=TRAINER_METADATA["reduce_learning_rate_eval_split"],
    )

    increase_batch_size_on_plateau: int = schema_utils.NonNegativeInteger(
        default=0,
        description="Number to increase the batch size by on a plateau.",
        parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau"],
    )

    increase_batch_size_on_plateau_patience: int = schema_utils.NonNegativeInteger(
        default=5,
        description="How many epochs to wait for before increasing the batch size.",
        parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_patience"],
    )

    increase_batch_size_on_plateau_rate: float = schema_utils.NonNegativeFloat(
        default=2.0,
        description="Rate at which the batch size increases.",
        parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_rate"],
    )

    increase_batch_size_on_plateau_max: int = schema_utils.PositiveInteger(
        default=512,
        description="Maximum size of the batch.",
        parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_max"],
    )

    increase_batch_size_eval_metric: str = schema_utils.String(
        default=LOSS,
        description="Which metric to listen on for increasing the batch size.",
        parameter_metadata=TRAINER_METADATA["increase_batch_size_eval_metric"],
    )

    increase_batch_size_eval_split: str = schema_utils.String(
        default=TRAINING,
        description="Which dataset split to listen on for increasing the batch size.",
        parameter_metadata=TRAINER_METADATA["increase_batch_size_eval_split"],
    )

    decay: bool = schema_utils.Boolean(
        default=False,
        description="Turn on exponential decay of the learning rate.",
        parameter_metadata=TRAINER_METADATA["decay"],
    )

    decay_steps: int = schema_utils.PositiveInteger(
        default=10000,
        description="The number of steps to take in the exponential learning rate decay.",
        parameter_metadata=TRAINER_METADATA["decay_steps"],
    )

    decay_rate: float = schema_utils.FloatRange(
        default=0.96,
        min=0.0,
        max=1.0,
        description="Decay per epoch (%): Factor to decrease the Learning rate.",
        parameter_metadata=TRAINER_METADATA["decay_steps"],
    )

    staircase: bool = schema_utils.Boolean(
        default=False,
        description="Decays the learning rate at discrete intervals.",
        parameter_metadata=TRAINER_METADATA["staircase"],
    )

    gradient_clipping: Optional[GradientClippingConfig] = GradientClippingDataclassField(
        description="Parameter values for gradient clipping.",
        default={},
    )

    learning_rate_warmup_epochs: float = schema_utils.NonNegativeFloat(
        default=1.0,
        description="Number of epochs to warmup the learning rate for.",
        parameter_metadata=TRAINER_METADATA["learning_rate_warmup_epochs"],
    )

    learning_rate_scaling: str = schema_utils.StringOptions(
        ["constant", "sqrt", "linear"],
        default="linear",
        description=(
            "Scale by which to increase the learning rate as the number of distributed workers increases. "
            "Traditionally the learning rate is scaled linearly with the number of workers to reflect the proportion by"
            " which the effective batch size is increased. For very large batch sizes, a softer square-root scale can "
            "sometimes lead to better model performance. If the learning rate is hand-tuned for a given number of "
            "workers, setting this value to constant can be used to disable scale-up."
        ),
        parameter_metadata=TRAINER_METADATA["learning_rate_scaling"],
    )
예제 #30
0
class GBMTrainerConfig(BaseTrainerConfig):
    """Dataclass that configures most of the hyperparameters used for GBM model training."""

    type: str = schema_utils.StringOptions(
        ["lightgbm_trainer"],
        default="lightgbm_trainer",
        description=(
            "Trainer to use for training the model. Must be one of ['lightgbm_trainer'] - "
            "corresponds to name in `ludwig.trainers.registry.(ray_)trainers_registry` "
            "(default: 'lightgbm_trainer')"
        ),
        allow_none=False,
    )

    # LightGBM core parameters (https://lightgbm.readthedocs.io/en/latest/Parameters.html)
    boosting_type: str = schema_utils.StringOptions(
        ["gbdt", "rf", "dart", "goss"],
        default="gbdt",
        description="Type of boosting algorithm to use with GBM trainer.",
    )

    tree_learner: str = schema_utils.StringOptions(
        ["serial", "feature", "data", "voting"],
        default="serial",
        description="Type of tree learner to use with GBM trainer.",
    )

    num_boost_round: int = schema_utils.PositiveInteger(
        default=100, description="Number of boosting rounds to perform with GBM trainer."
    )

    num_leaves: int = schema_utils.PositiveInteger(
        default=31, description="Number of leaves to use in the tree with GBM trainer."
    )

    # LightGBM Learning Control params
    max_depth: int = schema_utils.Integer(
        default=-1,
        description="Maximum depth of a tree in the GBM trainer. A negative value means no limit.",
    )

    min_data_in_leaf: int = schema_utils.PositiveInteger(
        default=20, description="Minimum number of data points in a leaf with GBM trainer."
    )

    min_sum_hessian_in_leaf: float = schema_utils.NonNegativeFloat(
        default=1e-3, description="Minimum sum of hessians in a leaf with GBM trainer."
    )

    bagging_fraction: float = schema_utils.FloatRange(
        default=1.0, min=0.0, max=1.0, description="Fraction of data to use for bagging with GBM trainer."
    )

    pos_bagging_fraction: float = schema_utils.FloatRange(
        default=1.0, min=0.0, max=1.0, description="Fraction of positive data to use for bagging with GBM trainer."
    )

    neg_bagging_fraction: float = schema_utils.FloatRange(
        default=1.0, min=0.0, max=1.0, description="Fraction of negative data to use for bagging with GBM trainer."
    )

    bagging_freq: int = schema_utils.NonNegativeInteger(default=0, description="Frequency of bagging with GBM trainer.")

    bagging_seed: int = schema_utils.Integer(default=3, description="Random seed for bagging with GBM trainer.")

    feature_fraction: float = schema_utils.FloatRange(
        default=1.0, min=0.0, max=1.0, description="Fraction of features to use in the GBM trainer."
    )

    feature_fraction_bynode: float = schema_utils.FloatRange(
        default=1.0, min=0.0, max=1.0, description="Fraction of features to use for each tree node with GBM trainer."
    )

    feature_fraction_seed: int = schema_utils.Integer(
        default=2, description="Random seed for feature fraction with GBM trainer."
    )

    extra_trees: bool = schema_utils.Boolean(
        default=False, description="Whether to use extremely randomized trees in the GBM trainer."
    )

    extra_seed: int = schema_utils.Integer(
        default=6, description="Random seed for extremely randomized trees in the GBM trainer."
    )

    max_delta_step: float = schema_utils.FloatRange(
        default=0.0,
        min=0.0,
        max=1.0,
        description=(
            "Used to limit the max output of tree leaves in the GBM trainer. A negative value means no constraint."
        ),
    )

    lambda_l1: float = schema_utils.NonNegativeFloat(
        default=0.0, description="L1 regularization factor for the GBM trainer."
    )

    lambda_l2: float = schema_utils.NonNegativeFloat(
        default=0.0, description="L2 regularization factor for the GBM trainer."
    )

    linear_lambda: float = schema_utils.NonNegativeFloat(
        default=0.0, description="Linear tree regularization in the GBM trainer."
    )

    min_gain_to_split: float = schema_utils.NonNegativeFloat(
        default=0.0, description="Minimum gain to split a leaf in the GBM trainer."
    )

    drop_rate: float = schema_utils.FloatRange(
        default=0.1,
        min=0.0,
        max=1.0,
        description="Dropout rate for the GBM trainer. Used only with boosting_type 'dart'.",
    )

    max_drop: int = schema_utils.Integer(
        default=50,
        description=(
            "Maximum number of dropped trees during one boosting iteration. "
            "Used only with boosting_type 'dart'. A negative value means no limit."
        ),
    )

    skip_drop: float = schema_utils.FloatRange(
        default=0.5,
        min=0.0,
        max=1.0,
        description=(
            "Probability of skipping the dropout during one boosting iteration. Used only with boosting_type 'dart'."
        ),
    )

    xgboost_dart_mode: bool = schema_utils.Boolean(
        default=False,
        description="Whether to use xgboost dart mode in the GBM trainer. Used only with boosting_type 'dart'.",
    )

    uniform_drop: bool = schema_utils.Boolean(
        default=False,
        description=("Whether to use uniform dropout in the GBM trainer. Used only with boosting_type 'dart'."),
    )

    drop_seed: int = schema_utils.Integer(
        default=4,
        description="Random seed to choose dropping models in the GBM trainer. Used only with boosting_type 'dart'.",
    )

    top_rate: float = schema_utils.FloatRange(
        default=0.2,
        min=0.0,
        max=1.0,
        description="The retain ratio of large gradient data in the GBM trainer. Used only with boosting_type 'goss'.",
    )

    other_rate: float = schema_utils.FloatRange(
        default=0.1,
        min=0.0,
        max=1.0,
        description="The retain ratio of small gradient data in the GBM trainer. Used only with boosting_type 'goss'.",
    )

    min_data_per_group: int = schema_utils.PositiveInteger(
        default=100,
        description="Minimum number of data points per categorical group for the GBM trainer.",
    )

    max_cat_threshold: int = schema_utils.PositiveInteger(
        default=32,
        description="Number of split points considered for categorical features for the GBM trainer.",
    )

    cat_l2: float = schema_utils.NonNegativeFloat(
        default=10.0, description="L2 regularization factor for categorical split in the GBM trainer."
    )

    cat_smooth: float = schema_utils.NonNegativeFloat(
        default=10.0, description="Smoothing factor for categorical split in the GBM trainer."
    )

    max_cat_to_onehot: int = schema_utils.PositiveInteger(
        default=4,
        description="Maximum categorical cardinality required before one-hot encoding in the GBM trainer.",
    )

    cegb_tradeoff: float = schema_utils.NonNegativeFloat(
        default=1.0,
        description="Cost-effective gradient boosting multiplier for all penalties in the GBM trainer.",
    )

    cegb_penalty_split: float = schema_utils.NonNegativeFloat(
        default=0.0,
        description="Cost-effective gradient boosting penalty for splitting a node in the GBM trainer.",
    )

    path_smooth: float = schema_utils.NonNegativeFloat(
        default=0.0,
        description="Smoothing factor applied to tree nodes in the GBM trainer.",
    )

    verbose: int = schema_utils.IntegerRange(default=0, min=-1, max=2, description="Verbosity level for GBM trainer.")

    # LightGBM IO params
    max_bin: int = schema_utils.PositiveInteger(
        default=255, description="Maximum number of bins to use for discretizing features with GBM trainer."
    )