class NumberPreprocessingConfig(schema_utils.BaseMarshmallowConfig): """NumberPreprocessingConfig is a dataclass that configures the parameters used for a number input feature.""" missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in a number column", ) fill_value: float = schema_utils.NonNegativeFloat( default=0.0, allow_none=False, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: float = schema_utils.NonNegativeFloat( default=0.0, allow_none=False, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], ) normalization: str = schema_utils.StringOptions( ["zscore", "minmax", "log1p"], default=None, allow_none=True, description="Normalization strategy to use for this number feature.", )
class TimeseriesPreprocessingConfig(schema_utils.BaseMarshmallowConfig): tokenizer: str = schema_utils.StringOptions( sorted(list(tokenizer_registry.keys())), default="space", allow_none=False, description= "Defines how to map from the raw string content of the dataset column to a sequence of elements.", ) timeseries_length_limit: int = schema_utils.PositiveInteger( default=256, allow_none=False, description= "Defines the maximum length of the timeseries. All timeseries longer than this limit are cut off.", ) padding_value: float = schema_utils.NonNegativeFloat( default=0.0, allow_none=False, description="Float value that is used for padding.", ) padding: str = schema_utils.StringOptions( ["left", "right"], default="right", allow_none=False, description= "the direction of the padding. right and left are available options.", ) missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in a text column", ) fill_value: str = schema_utils.String( default="", allow_none=False, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: str = schema_utils.String( default="", allow_none=False, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], )
class BagPreprocessingConfig(schema_utils.BaseMarshmallowConfig): tokenizer: str = schema_utils.StringOptions( tokenizer_registry.keys(), default="space", allow_none=False, description= "Defines how to transform the raw text content of the dataset column to a set of elements. The " "default value space splits the string on spaces. Common options include: underscore (splits on " "underscore), comma (splits on comma), json (decodes the string into a set or a list through a " "JSON parser).", ) missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in a set column", ) fill_value: str = schema_utils.String( default=strings_utils.UNKNOWN_SYMBOL, allow_none=False, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: str = schema_utils.String( default=strings_utils.UNKNOWN_SYMBOL, allow_none=False, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], ) lowercase: bool = schema_utils.Boolean( default=False, description= "If true, converts the string to lowercase before tokenizing.", ) most_common: int = schema_utils.PositiveInteger( default=10000, allow_none=True, description= "The maximum number of most common tokens to be considered. If the data contains more than this " "amount, the most infrequent tokens will be treated as unknown.", )
class VectorPreprocessingConfig(schema_utils.BaseMarshmallowConfig): vector_size: int = schema_utils.PositiveInteger( default=None, allow_none=True, description= "The size of the vector. If None, the vector size will be inferred from the data.", ) missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in a vector column", ) fill_value: str = schema_utils.String( default="", allow_none=False, pattern=r"^([0-9]+(\.[0-9]*)?\s*)*$", description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: str = schema_utils.String( default="", allow_none=False, pattern=r"^([0-9]+(\.[0-9]*)?\s*)*$", description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], )
class DatePreprocessingConfig(schema_utils.BaseMarshmallowConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in a date column", ) fill_value: str = schema_utils.String( default="", allow_none=False, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: str = schema_utils.String( default="", allow_none=False, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], ) datetime_format: str = schema_utils.String( default=None, allow_none=True, description= "This parameter can either be a datetime format string, or null, in which case the datetime " "format will be inferred automatically.", )
class H3PreprocessingConfig(schema_utils.BaseMarshmallowConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in an h3 column", ) fill_value: int = schema_utils.PositiveInteger( default=576495936675512319, allow_none=False, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: int = schema_utils.PositiveInteger( default=576495936675512319, allow_none=False, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], )
class TextOutputFeatureConfig(BaseOutputFeatureConfig): """TextOutputFeatureConfig is a dataclass that configures the parameters used for a text output feature.""" decoder: Optional[str] = schema_utils.StringOptions( list(get_decoder_classes(TEXT).keys()), default="generator", description="Decoder to use for this text output feature.", )
class VectorOutputFeatureConfig(BaseOutputFeatureConfig): """VectorOutputFeatureConfig is a dataclass that configures the parameters used for a vector output feature.""" decoder: Optional[str] = schema_utils.StringOptions( list(get_decoder_classes(VECTOR).keys()), default="projector", description="Decoder to use for this vector feature.", )
class NumberOutputFeatureConfig(BaseOutputFeatureConfig): decoder: Optional[str] = schema_utils.StringOptions( list(get_decoder_classes(NUMBER).keys()), default="regressor", allow_none=True, description="Decoder to use for this number feature.", )
class CategoryOutputFeatureConfig(BaseOutputFeatureConfig): """CategoryOutputFeature is a dataclass that configures the parameters used for a category output feature.""" decoder: Optional[str] = schema_utils.StringOptions( list(get_decoder_classes(CATEGORY).keys()), default="classifier", allow_none=True, description="Decoder to use for this category feature.", )
class SetOutputFeatureConfig(BaseOutputFeatureConfig): """SetOutputFeatureConfig is a dataclass that configures the parameters used for a set output feature.""" decoder: Optional[str] = schema_utils.StringOptions( list(get_decoder_classes(SET).keys()), default="classifier", allow_none=True, description="Decoder to use for this set feature.", )
class BinaryOutputFeatureConfig(BaseOutputFeatureConfig): """BinaryOutputFeature is a dataclass that configures the parameters used for a binary output feature.""" decoder: Optional[str] = schema_utils.StringOptions( list(get_decoder_classes(BINARY).keys()), default="regressor", allow_none=True, description="Decoder to use for this binary feature.", )
class BinaryInputFeatureConfig(BaseInputFeatureConfig): """BinaryInputFeature is a dataclass that configures the parameters used for a binary input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=BINARY) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(BINARY).keys()), default="passthrough", description="Encoder to use for this binary feature.", )
class DateInputFeatureConfig(BaseInputFeatureConfig): """DateInputFeature is a dataclass that configures the parameters used for a date input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=DATE) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(DATE).keys()), default="embed", description="Encoder to use for this date feature.", )
class VectorInputFeatureConfig(BaseInputFeatureConfig): """VectorInputFeatureConfig is a dataclass that configures the parameters used for a vector input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField( feature_type=VECTOR) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(VECTOR).keys()), default="dense", description="Encoder to use for this vector feature.", )
class CategoryInputFeatureConfig(BaseInputFeatureConfig): """CategoryInputFeature is a dataclass that configures the parameters used for a category input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField( feature_type=CATEGORY) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(CATEGORY).keys()), default="dense", description="Encoder to use for this category feature.", )
class AudioInputFeatureConfig(BaseInputFeatureConfig): """AudioFeatureInputFeature is a dataclass that configures the parameters used for an audio input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField( feature_type=AUDIO) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(AUDIO).keys()), default="parallel_cnn", description="Encoder to use for this audio feature.", )
class NumberInputFeatureConfig(BaseInputFeatureConfig): """NumberInputFeature is a dataclass that configures the parameters used for a number input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField( feature_type=NUMBER) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(NUMBER).keys()), default="passthrough", description="Encoder to use for this number feature.", )
class ImageInputFeatureConfig(BaseInputFeatureConfig): """ImageInputFeatureConfig is a dataclass that configures the parameters used for an image input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField( feature_type=IMAGE) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(IMAGE).keys()), default="stacked_cnn", description="Encoder to use for this image feature.", )
class TextInputFeatureConfig(BaseInputFeatureConfig): """TextInputFeatureConfig is a dataclass that configures the parameters used for a text input feature.""" preprocessing: BasePreprocessingConfig = PreprocessingDataclassField( feature_type=TEXT) encoder: Optional[str] = schema_utils.StringOptions( list(get_encoder_classes(TEXT).keys()), default="parallel_cnn", description="Encoder to use for this text feature.", )
class ProjectAggregateCombinerConfig(BaseCombinerConfig): projection_size: int = schema_utils.PositiveInteger( default=128, description= "All combiner inputs are projected to this size before being aggregated." ) fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description= "Full secification of the fully connected layers after the aggregation. " "It should be a list of dict, each disct representing one layer.") num_fc_layers: int = schema_utils.NonNegativeInteger( default=2, description="Number of fully connected layers after aggregation.") output_size: int = schema_utils.PositiveInteger( default=128, description= "Output size of each layer of the stack of fully connected layers.") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layers use a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description= "Initializer to use for the weights of the projection and for the fully connected layers.", ) bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description= "Initializer to use for the baias of the projection and for the fully connected layers.", ) norm: Optional[str] = schema_utils.StringOptions( ["batch", "layer"], default="layer", description= "Normalization to apply to each projection and fully connected layer.", ) norm_params: Optional[dict] = schema_utils.Dict( description= "Parameters of the normalization to apply to each projection and fully connected layer." ) activation: str = schema_utils.ActivationOptions( default="relu", description="Activation to apply to each fully connected layer.") dropout: float = schema_utils.FloatRange( default=0.0, min=0, max=1, description="Dropout rate to apply to each fully connected layer.") residual: bool = schema_utils.Boolean( default=True, description= "Whether to add residual skip connection between the fully connected layers in the stack..", )
class SequenceCombinerConfig(BaseCombinerConfig): """Parameters for sequence combiner.""" main_sequence_feature: Optional[str] = schema_utils.String(default=None, description="") reduce_output: Optional[str] = schema_utils.ReductionOptions( default=None, description="") encoder: Optional[str] = schema_utils.StringOptions(list( sequence_encoder_registry.keys()), default=None, description="")
def test_StringOptions(): # Test case of default conflicting with allowed options: test_options = ["one"] with pytest.raises(MarshmallowValidationError): schema_utils.StringOptions(test_options, default=None, allow_none=False) # Test creating a schema with simple option, null not allowed: test_options = ["one"] @dataclass class CustomTestSchema(schema_utils.BaseMarshmallowConfig): foo: str = schema_utils.StringOptions(test_options, "one", allow_none=False) with pytest.raises(MarshmallowValidationError): CustomTestSchema.Schema().load({"foo": None})
class BinaryPreprocessingConfig(schema_utils.BaseMarshmallowConfig): """BinaryPreprocessingConfig is a dataclass that configures the parameters used for a binary input feature.""" missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS + ["fill_with_false"], default="fill_with_false", allow_none=False, description= "What strategy to follow when there's a missing value in a binary column", ) fill_value: Union[int, float, str] = schema_utils.NumericOrStringOptionsField( strings_utils.all_bool_strs(), default=None, default_numeric=None, default_option=None, allow_none=False, min=0, max=1, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: Union[ int, float, str] = schema_utils.NumericOrStringOptionsField( strings_utils.all_bool_strs(), default=None, default_numeric=None, default_option=None, allow_none=False, min=0, max=1, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], ) fallback_true_label: str = schema_utils.String( default=None, allow_none=True, description= "The label to interpret as 1 (True) when the binary feature doesn't have a " "conventional boolean value", )
class ConcatCombinerConfig(BaseCombinerConfig): """Parameters for concat combiner.""" fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description="") num_fc_layers: int = schema_utils.NonNegativeInteger(default=0, description="") output_size: int = schema_utils.PositiveInteger( default=256, description="Output size of a fully connected layer.") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layer uses a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description="") bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description="") norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"], description="") norm_params: Optional[dict] = schema_utils.Dict(description="") activation: str = schema_utils.ActivationOptions(default="relu", description="") dropout: float = schema_utils.FloatRange(default=0.0, min=0, max=1, description="") flatten_inputs: bool = schema_utils.Boolean( default=False, description="Whether to flatten input tensors to a vector.") residual: bool = schema_utils.Boolean( default=False, description= ("Whether to add a residual connection to each fully connected layer block. All fully connected layers must" " have the same size"), )
class CategoryPreprocessingConfig(schema_utils.BaseMarshmallowConfig): """CategoryPreprocessingConfig is a dataclass that configures the parameters used for a category input feature.""" missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, default="fill_with_const", allow_none=False, description= "What strategy to follow when there's a missing value in a category column", ) fill_value: str = schema_utils.String( default=strings_utils.UNKNOWN_SYMBOL, allow_none=False, description= "The value to replace missing values with in case the missing_value_strategy is fill_with_const", ) computed_fill_value: str = schema_utils.String( default=strings_utils.UNKNOWN_SYMBOL, allow_none=False, description= "The internally computed fill value to replace missing values with in case the " "missing_value_strategy is fill_with_mode or fill_with_mean", parameter_metadata=PREPROCESSING_METADATA["computed_fill_value"], ) lowercase: bool = schema_utils.Boolean( default=False, description= "Whether the string has to be lowercased before being handled by the tokenizer.", ) most_common: int = schema_utils.PositiveInteger( default=10000, allow_none=True, description= "The maximum number of most common tokens to be considered. if the data contains more than this " "amount, the most infrequent tokens will be treated as unknown.", )
class ComparatorCombinerConfig(BaseCombinerConfig): """Parameters for comparator combiner.""" entity_1: List[str] """TODO: Document parameters.""" entity_2: List[str] """TODO: Document parameters.""" fc_layers: Optional[List[Dict[str, Any]]] = schema_utils.DictList( description="") num_fc_layers: int = schema_utils.NonNegativeInteger(default=1, description="") output_size: int = schema_utils.PositiveInteger( default=256, description="Output size of a fully connected layer") use_bias: bool = schema_utils.Boolean( default=True, description="Whether the layer uses a bias vector.") weights_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="xavier_uniform", description="") bias_initializer: Union[str, Dict] = schema_utils.InitializerOrDict( default="zeros", description="") norm: Optional[str] = schema_utils.StringOptions(["batch", "layer"], description="") norm_params: Optional[dict] = schema_utils.Dict(description="") activation: str = schema_utils.ActivationOptions(default="relu", description="") dropout: float = schema_utils.FloatRange( default=0.0, min=0, max=1, description="Dropout rate for the transformer block.")
class TabNetCombinerConfig(BaseCombinerConfig): """Parameters for tabnet combiner.""" size: int = schema_utils.PositiveInteger(default=32, description="`N_a` in the paper.") output_size: int = schema_utils.PositiveInteger( default=128, description="Output size of a fully connected layer. `N_d` in the paper" ) num_steps: int = schema_utils.NonNegativeInteger( default=3, description= ("Number of steps / repetitions of the the attentive transformer and feature transformer computations. " "`N_steps` in the paper"), ) num_total_blocks: int = schema_utils.NonNegativeInteger( default=4, description="Total number of feature transformer block at each step") num_shared_blocks: int = schema_utils.NonNegativeInteger( default=2, description= "Number of shared feature transformer blocks across the steps") relaxation_factor: float = schema_utils.FloatRange( default=1.5, description= ("Factor that influences how many times a feature should be used across the steps of computation. a value of" " 1 implies it each feature should be use once, a higher value allows for multiple usages. `gamma` in the " "paper"), ) bn_epsilon: float = schema_utils.FloatRange( default=1e-3, description="Epsilon to be added to the batch norm denominator.") bn_momentum: float = schema_utils.FloatRange( default=0.05, description= "Momentum of the batch norm. 1 - `m_B` from the TabNet paper.") bn_virtual_bs: Optional[int] = schema_utils.PositiveInteger( default=1024, allow_none=True, description= ("Size of the virtual batch size used by ghost batch norm. If null, regular batch norm is used instead. " "`B_v` from the TabNet paper"), ) sparsity: float = schema_utils.FloatRange( default=1e-4, description= "Multiplier of the sparsity inducing loss. `lambda_sparse` in the paper" ) entmax_mode: str = schema_utils.StringOptions( ["entmax15", "sparsemax", "constant", "adaptive"], default="sparsemax", description="") entmax_alpha: float = schema_utils.FloatRange( default=1.5, min=1, max=2, description="") # 1 corresponds to softmax, 2 is sparsemax. dropout: float = schema_utils.FloatRange( default=0.05, min=0, max=1, description="Dropout rate for the transformer block.")
class ECDTrainerConfig(BaseTrainerConfig): """Dataclass that configures most of the hyperparameters used for ECD model training.""" type: str = schema_utils.StringOptions( ["trainer", "ray_legacy_trainer"], default="trainer", description=( "Trainer to use for training the model. Must be one of ['trainer', 'ray_legacy_trainer'] - " "corresponds to name in `ludwig.trainers.registry.(ray_)trainers_registry` (default: 'trainer')" ), allow_none=False, ) optimizer: BaseOptimizerConfig = OptimizerDataclassField( default={"type": "adam"}, description="Parameter values for selected torch optimizer." ) epochs: int = schema_utils.PositiveInteger( default=100, description="Number of epochs the algorithm is intended to be run over.", parameter_metadata=TRAINER_METADATA["epochs"], ) train_steps: int = schema_utils.PositiveInteger( default=None, allow_none=True, description=( "Maximum number of training steps the algorithm is intended to be run over. " + "If unset, then `epochs` is used to determine training length." ), parameter_metadata=TRAINER_METADATA["train_steps"], ) regularization_lambda: float = schema_utils.FloatRange( default=0.0, min=0, description="Strength of the $L2$ regularization.", parameter_metadata=TRAINER_METADATA["regularization_lambda"], ) regularization_type: Optional[str] = schema_utils.RegularizerOptions( default="l2", description="Type of regularization." ) should_shuffle: bool = schema_utils.Boolean( default=True, description="Whether to shuffle batches during training when true.", parameter_metadata=TRAINER_METADATA["should_shuffle"], ) batch_size: Union[int, str] = schema_utils.IntegerOrAutoField( default=128, default_numeric=128, allow_none=False, min_exclusive=0, description=( "The number of training examples utilized in one training step of the model. If ’auto’, the " "biggest batch size (power of 2) that can fit in memory will be used." ), parameter_metadata=TRAINER_METADATA["batch_size"], ) steps_per_checkpoint: int = schema_utils.NonNegativeInteger( default=0, description=( "How often the model is checkpointed. Also dictates maximum evaluation frequency. If 0 the model is " "checkpointed after every epoch." ), parameter_metadata=TRAINER_METADATA["steps_per_checkpoint"], ) checkpoints_per_epoch: int = schema_utils.NonNegativeInteger( default=0, description=( "Number of checkpoints per epoch. For example, 2 -> checkpoints are written every half of an epoch. Note " "that it is invalid to specify both non-zero `steps_per_checkpoint` and non-zero `checkpoints_per_epoch`." ), parameter_metadata=TRAINER_METADATA["checkpoints_per_epoch"], ) reduce_learning_rate_on_plateau: float = schema_utils.FloatRange( default=0.0, min=0.0, max=1.0, description=( "Reduces the learning rate when the algorithm hits a plateau (i.e. the performance on the validation does " "not improve." ), parameter_metadata=TRAINER_METADATA["reduce_learning_rate_on_plateau"], ) reduce_learning_rate_on_plateau_patience: int = schema_utils.NonNegativeInteger( default=5, description="How many epochs have to pass before the learning rate reduces.", parameter_metadata=TRAINER_METADATA["reduce_learning_rate_on_plateau_patience"], ) reduce_learning_rate_on_plateau_rate: float = schema_utils.FloatRange( default=0.5, min=0.0, max=1.0, description="Rate at which we reduce the learning rate.", parameter_metadata=TRAINER_METADATA["reduce_learning_rate_on_plateau_rate"], ) reduce_learning_rate_eval_metric: str = schema_utils.String( default=LOSS, description="Rate at which we reduce the learning rate.", parameter_metadata=TRAINER_METADATA["reduce_learning_rate_eval_metric"], ) reduce_learning_rate_eval_split: str = schema_utils.String( default=TRAINING, description="Which dataset split to listen on for reducing the learning rate.", parameter_metadata=TRAINER_METADATA["reduce_learning_rate_eval_split"], ) increase_batch_size_on_plateau: int = schema_utils.NonNegativeInteger( default=0, description="Number to increase the batch size by on a plateau.", parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau"], ) increase_batch_size_on_plateau_patience: int = schema_utils.NonNegativeInteger( default=5, description="How many epochs to wait for before increasing the batch size.", parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_patience"], ) increase_batch_size_on_plateau_rate: float = schema_utils.NonNegativeFloat( default=2.0, description="Rate at which the batch size increases.", parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_rate"], ) increase_batch_size_on_plateau_max: int = schema_utils.PositiveInteger( default=512, description="Maximum size of the batch.", parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_max"], ) increase_batch_size_eval_metric: str = schema_utils.String( default=LOSS, description="Which metric to listen on for increasing the batch size.", parameter_metadata=TRAINER_METADATA["increase_batch_size_eval_metric"], ) increase_batch_size_eval_split: str = schema_utils.String( default=TRAINING, description="Which dataset split to listen on for increasing the batch size.", parameter_metadata=TRAINER_METADATA["increase_batch_size_eval_split"], ) decay: bool = schema_utils.Boolean( default=False, description="Turn on exponential decay of the learning rate.", parameter_metadata=TRAINER_METADATA["decay"], ) decay_steps: int = schema_utils.PositiveInteger( default=10000, description="The number of steps to take in the exponential learning rate decay.", parameter_metadata=TRAINER_METADATA["decay_steps"], ) decay_rate: float = schema_utils.FloatRange( default=0.96, min=0.0, max=1.0, description="Decay per epoch (%): Factor to decrease the Learning rate.", parameter_metadata=TRAINER_METADATA["decay_steps"], ) staircase: bool = schema_utils.Boolean( default=False, description="Decays the learning rate at discrete intervals.", parameter_metadata=TRAINER_METADATA["staircase"], ) gradient_clipping: Optional[GradientClippingConfig] = GradientClippingDataclassField( description="Parameter values for gradient clipping.", default={}, ) learning_rate_warmup_epochs: float = schema_utils.NonNegativeFloat( default=1.0, description="Number of epochs to warmup the learning rate for.", parameter_metadata=TRAINER_METADATA["learning_rate_warmup_epochs"], ) learning_rate_scaling: str = schema_utils.StringOptions( ["constant", "sqrt", "linear"], default="linear", description=( "Scale by which to increase the learning rate as the number of distributed workers increases. " "Traditionally the learning rate is scaled linearly with the number of workers to reflect the proportion by" " which the effective batch size is increased. For very large batch sizes, a softer square-root scale can " "sometimes lead to better model performance. If the learning rate is hand-tuned for a given number of " "workers, setting this value to constant can be used to disable scale-up." ), parameter_metadata=TRAINER_METADATA["learning_rate_scaling"], )
class GBMTrainerConfig(BaseTrainerConfig): """Dataclass that configures most of the hyperparameters used for GBM model training.""" type: str = schema_utils.StringOptions( ["lightgbm_trainer"], default="lightgbm_trainer", description=( "Trainer to use for training the model. Must be one of ['lightgbm_trainer'] - " "corresponds to name in `ludwig.trainers.registry.(ray_)trainers_registry` " "(default: 'lightgbm_trainer')" ), allow_none=False, ) # LightGBM core parameters (https://lightgbm.readthedocs.io/en/latest/Parameters.html) boosting_type: str = schema_utils.StringOptions( ["gbdt", "rf", "dart", "goss"], default="gbdt", description="Type of boosting algorithm to use with GBM trainer.", ) tree_learner: str = schema_utils.StringOptions( ["serial", "feature", "data", "voting"], default="serial", description="Type of tree learner to use with GBM trainer.", ) num_boost_round: int = schema_utils.PositiveInteger( default=100, description="Number of boosting rounds to perform with GBM trainer." ) num_leaves: int = schema_utils.PositiveInteger( default=31, description="Number of leaves to use in the tree with GBM trainer." ) # LightGBM Learning Control params max_depth: int = schema_utils.Integer( default=-1, description="Maximum depth of a tree in the GBM trainer. A negative value means no limit.", ) min_data_in_leaf: int = schema_utils.PositiveInteger( default=20, description="Minimum number of data points in a leaf with GBM trainer." ) min_sum_hessian_in_leaf: float = schema_utils.NonNegativeFloat( default=1e-3, description="Minimum sum of hessians in a leaf with GBM trainer." ) bagging_fraction: float = schema_utils.FloatRange( default=1.0, min=0.0, max=1.0, description="Fraction of data to use for bagging with GBM trainer." ) pos_bagging_fraction: float = schema_utils.FloatRange( default=1.0, min=0.0, max=1.0, description="Fraction of positive data to use for bagging with GBM trainer." ) neg_bagging_fraction: float = schema_utils.FloatRange( default=1.0, min=0.0, max=1.0, description="Fraction of negative data to use for bagging with GBM trainer." ) bagging_freq: int = schema_utils.NonNegativeInteger(default=0, description="Frequency of bagging with GBM trainer.") bagging_seed: int = schema_utils.Integer(default=3, description="Random seed for bagging with GBM trainer.") feature_fraction: float = schema_utils.FloatRange( default=1.0, min=0.0, max=1.0, description="Fraction of features to use in the GBM trainer." ) feature_fraction_bynode: float = schema_utils.FloatRange( default=1.0, min=0.0, max=1.0, description="Fraction of features to use for each tree node with GBM trainer." ) feature_fraction_seed: int = schema_utils.Integer( default=2, description="Random seed for feature fraction with GBM trainer." ) extra_trees: bool = schema_utils.Boolean( default=False, description="Whether to use extremely randomized trees in the GBM trainer." ) extra_seed: int = schema_utils.Integer( default=6, description="Random seed for extremely randomized trees in the GBM trainer." ) max_delta_step: float = schema_utils.FloatRange( default=0.0, min=0.0, max=1.0, description=( "Used to limit the max output of tree leaves in the GBM trainer. A negative value means no constraint." ), ) lambda_l1: float = schema_utils.NonNegativeFloat( default=0.0, description="L1 regularization factor for the GBM trainer." ) lambda_l2: float = schema_utils.NonNegativeFloat( default=0.0, description="L2 regularization factor for the GBM trainer." ) linear_lambda: float = schema_utils.NonNegativeFloat( default=0.0, description="Linear tree regularization in the GBM trainer." ) min_gain_to_split: float = schema_utils.NonNegativeFloat( default=0.0, description="Minimum gain to split a leaf in the GBM trainer." ) drop_rate: float = schema_utils.FloatRange( default=0.1, min=0.0, max=1.0, description="Dropout rate for the GBM trainer. Used only with boosting_type 'dart'.", ) max_drop: int = schema_utils.Integer( default=50, description=( "Maximum number of dropped trees during one boosting iteration. " "Used only with boosting_type 'dart'. A negative value means no limit." ), ) skip_drop: float = schema_utils.FloatRange( default=0.5, min=0.0, max=1.0, description=( "Probability of skipping the dropout during one boosting iteration. Used only with boosting_type 'dart'." ), ) xgboost_dart_mode: bool = schema_utils.Boolean( default=False, description="Whether to use xgboost dart mode in the GBM trainer. Used only with boosting_type 'dart'.", ) uniform_drop: bool = schema_utils.Boolean( default=False, description=("Whether to use uniform dropout in the GBM trainer. Used only with boosting_type 'dart'."), ) drop_seed: int = schema_utils.Integer( default=4, description="Random seed to choose dropping models in the GBM trainer. Used only with boosting_type 'dart'.", ) top_rate: float = schema_utils.FloatRange( default=0.2, min=0.0, max=1.0, description="The retain ratio of large gradient data in the GBM trainer. Used only with boosting_type 'goss'.", ) other_rate: float = schema_utils.FloatRange( default=0.1, min=0.0, max=1.0, description="The retain ratio of small gradient data in the GBM trainer. Used only with boosting_type 'goss'.", ) min_data_per_group: int = schema_utils.PositiveInteger( default=100, description="Minimum number of data points per categorical group for the GBM trainer.", ) max_cat_threshold: int = schema_utils.PositiveInteger( default=32, description="Number of split points considered for categorical features for the GBM trainer.", ) cat_l2: float = schema_utils.NonNegativeFloat( default=10.0, description="L2 regularization factor for categorical split in the GBM trainer." ) cat_smooth: float = schema_utils.NonNegativeFloat( default=10.0, description="Smoothing factor for categorical split in the GBM trainer." ) max_cat_to_onehot: int = schema_utils.PositiveInteger( default=4, description="Maximum categorical cardinality required before one-hot encoding in the GBM trainer.", ) cegb_tradeoff: float = schema_utils.NonNegativeFloat( default=1.0, description="Cost-effective gradient boosting multiplier for all penalties in the GBM trainer.", ) cegb_penalty_split: float = schema_utils.NonNegativeFloat( default=0.0, description="Cost-effective gradient boosting penalty for splitting a node in the GBM trainer.", ) path_smooth: float = schema_utils.NonNegativeFloat( default=0.0, description="Smoothing factor applied to tree nodes in the GBM trainer.", ) verbose: int = schema_utils.IntegerRange(default=0, min=-1, max=2, description="Verbosity level for GBM trainer.") # LightGBM IO params max_bin: int = schema_utils.PositiveInteger( default=255, description="Maximum number of bins to use for discretizing features with GBM trainer." )