Esempio n. 1
0
class Hyperparams(hyperparams.Hyperparams):
    protected_attribute_cols = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](-1),
        default=[],
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to use as protected attributes.",
    )
    favorable_label = hyperparams.Bounded[float](
        lower=0.,
        upper=1., 
        default=1.,
        description='label value which is considered favorable (i.e. positive) in the binary label case',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    )
    pass
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(default = 'Disparate_Impact_Remover', 
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        values = ['Disparate_Impact_Remover', 'Learning_Fair_Representations', 'Reweighing'],
        description = 'type of fairness pre-processing algorithm to use')
    protected_attribute_cols = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](-1),
        default=[],
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to use as protected attributes.",
    )
    favorable_label = hyperparams.Bounded[float](
        lower=0.,
        upper=1., 
        default=1.,
        description='label value which is considered favorable (i.e. positive) in the binary label case',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    )
    pass
Esempio n. 3
0
class Hyperparams(hyperparams.Hyperparams):
    n_jobs = hyperparams.Hyperparameter[int](
        default=-1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The value of the n_jobs parameter for the joblib library",
    )
    left_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]](
        configuration=collections.OrderedDict(
            set=hyperparams.Set(
                elements=hyperparams.Hyperparameter[str](
                    default="",
                    semantic_types=[
                        "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                    ],
                    description="Name of the column.",
                ),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
            ),
            str=hyperparams.Hyperparameter[str](
                default="",
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="Name of the column.",
            ),
        ),
        default="str",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Columns to join on from left dataframe",
    )
    right_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]](
        configuration=collections.OrderedDict(
            set=hyperparams.Set(
                elements=hyperparams.Hyperparameter[str](
                    default="",
                    semantic_types=[
                        "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                    ],
                    description="Name of the column.",
                ),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
            ),
            str=hyperparams.Hyperparameter[str](
                default="",
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="Name of the column.",
            ),
        ),
        default="str",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Columns to join on from right dataframe",
    )
    accuracy = hyperparams.Union[typing.Union[float, typing.Sequence[float]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.Hyperparameter[float](-1),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A list of accuracies, corresponding respectively to the columns to join on.",
            ),
            float=hyperparams.Hyperparameter[float](0),
        ),
        default="float",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Required accuracy of join ranging from 0.0 to 1.0, where 1.0 is an exact match.",
    )
    join_type = hyperparams.Enumeration[str](
        default="left",
        values=("left", "right", "outer", "inner", "cross"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The type of join between two dataframes.",
    )
    absolute_accuracy = hyperparams.Union[typing.Union[bool, typing.Sequence[bool]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.UniformBool(False),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A list of flags for absolute values, corresponding respectively to the columns to join on.",
            ),
            bool=hyperparams.UniformBool(False),
        ),
        default="bool",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Used for numeric to use absolute comparison instead of percentage.",
    )
class Hyperparams(hyperparams.Hyperparams):
    hidden_layer_sizes = hyperparams.List(
        elements=hyperparams.Bounded(1, None, 100),
        default=(100, ),
        min_size=1,
        max_size=None,
        description='The ith element represents the number of neurons in the ith hidden layer.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    activation = hyperparams.Enumeration[str](
        values=['identity', 'logistic', 'tanh', 'relu'],
        default='relu',
        description='Activation function for the hidden layer.  - \'identity\', no-op activation, useful to implement linear bottleneck, returns f(x) = x  - \'logistic\', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)).  - \'tanh\', the hyperbolic tan function, returns f(x) = tanh(x).  - \'relu\', the rectified linear unit function, returns f(x) = max(0, x)',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    solver = hyperparams.Choice(
        choices={
            'lbfgs': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'max_fun': hyperparams.Bounded[int](
                        default=15000,
                        lower=1,
                        upper=None,
                        description='Maximum number of loss function calls',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            ),
            'sgd': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'learning_rate': hyperparams.Enumeration[str](
                        values=['constant', 'invscaling', 'adaptive'],
                        default='constant',
                        description='Learning rate schedule for weight updates. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'learning_rate_init': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.001,
                        description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'power_t': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.5,
                        description='The exponent for inverse scaling learning rate. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'shuffle': hyperparams.UniformBool(
                        default=True,
                        description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'momentum': hyperparams.Bounded[float](
                        default=0.9,
                        lower=0,
                        upper=1,
                        description='Momentum for gradient descent update. Should be between 0 and 1. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'nesterovs_momentum': hyperparams.UniformBool(
                        default=True,
                        description='Whether to use Nesterov’s momentum. Only used when solver=’sgd’ and momentum > 0.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'early_stopping': hyperparams.UniformBool(
                        default=False,
                        description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'n_iter_no_change': hyperparams.Bounded[int](
                        default=10,
                        lower=1,
                        upper=None,
                        description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            ),
            'adam': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'learning_rate_init': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.001,
                        description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'shuffle': hyperparams.UniformBool(
                        default=True,
                        description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'early_stopping': hyperparams.UniformBool(
                        default=False,
                        description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'beta_1': hyperparams.Bounded[float](
                        default=0.9,
                        lower=0,
                        upper=1,
                        description='Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1).',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'beta_2': hyperparams.Bounded[float](
                        default=0.999,
                        lower=0,
                        upper=1,
                        description='Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1).',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'epsilon': hyperparams.Bounded[float](
                        default=1e-08,
                        lower=0,
                        upper=None,
                        description='Value for numerical stability in adam. Only used when solver=’adam’',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'n_iter_no_change': hyperparams.Bounded[int](
                        default=10,
                        lower=1,
                        upper=None,
                        description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            )
        },
        default='adam',
        description='The solver for weight optimization.  - \'lbfgs\' is an optimizer in the family of quasi-Newton methods.  - \'sgd\' refers to stochastic gradient descent.  - \'adam\' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba  Note: The default solver \'adam\' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, \'lbfgs\' can converge faster and perform better.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    alpha = hyperparams.Bounded[float](
        lower=0,
        upper=None,
        default=0.0001,
        description='L2 penalty (regularization term) parameter.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    batch_size = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Bounded[int](
                lower=0,
                upper=None,
                default=16,
                description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch',
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'auto': hyperparams.Constant(
                default='auto',
                description='When set to \'auto\', batch_size=min(200, n_samples)',
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='auto',
        description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)`',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    max_iter = hyperparams.Bounded[int](
        lower=0,
        upper=None,
        default=200,
        description='Maximum number of iterations. The solver iterates until convergence (determined by \'tol\') or this number of iterations. For stochastic solvers (\'sgd\', \'adam\'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    tol = hyperparams.Bounded[float](
        default=0.0001,
        lower=0,
        upper=None,
        description='Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to \'adaptive\', convergence is considered to be reached and training stops.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    warm_start = hyperparams.UniformBool(
        default=False,
        description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary <warm_start>`.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    validation_fraction = hyperparams.Bounded[float](
        default=0.1,
        lower=0,
        upper=None,
        description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.",
    )
    use_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.",
    )
    exclude_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'],
        default='https://metadata.datadrivendiscovery.org/types/PredictedTarget',
        description='Decides what semantic type to attach to generated output',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Esempio n. 5
0
class Hyperparams(Hyperparams_ODBase):

    smoothing_perc = hyperparams.Hyperparameter[float](
        default=0.05,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "determines window size used in EWMA smoothing (percentage of total values for channel)"
    )

    window_size_ = hyperparams.Hyperparameter[int](
        default=100,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="number of trailing batches to use in error calculation")

    error_buffer = hyperparams.Hyperparameter[int](
        default=50,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences"
    )

    batch_size = hyperparams.Hyperparameter[int](
        default=70,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="Batch size while predicting")

    # LSTM Model Parameters

    dropout = hyperparams.Hyperparameter[float](
        default=0.3,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Dropout rate")

    validation_split = hyperparams.Hyperparameter[float](
        default=0.2,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="Validation split")

    optimizer = hyperparams.Hyperparameter[typing.Union[str, None]](
        default='Adam',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="Optimizer")

    lstm_batch_size = hyperparams.Hyperparameter[int](
        default=64,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="lstm model training batch size")

    loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]](
        default='mean_squared_error',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="loss function")

    layers = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](1),
        default=[10, 10],
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="No of units for the 2 lstm layers")

    # Training Parameters

    epochs = hyperparams.Hyperparameter[int](
        default=1,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Epoch")

    patience = hyperparams.Hyperparameter[int](
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
    )

    min_delta = hyperparams.Hyperparameter[float](
        default=0.0003,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
    )

    l_s = hyperparams.Hyperparameter[int](
        default=100,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "num previous timesteps provided to model to predict future values")

    n_predictions = hyperparams.Hyperparameter[int](
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="number of steps ahead to predict")

    # Error thresholding parameters
    # ==================================

    p = hyperparams.Hyperparameter[float](
        default=0.05,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "minimum percent decrease between max errors in anomalous sequences (used for pruning)"
    )

    # Contamination

    contamination = hyperparams.Uniform(
        lower=0.,
        upper=0.5,
        default=0.1,
        description=
        'the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
Esempio n. 6
0
class ForecastingNBEATSHyperparams(hyperparams.Hyperparams):
    input_size_multiplier = hyperparams.UniformInt(
        default=2,
        lower=1,
        upper=10000,
        description="",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    output_size = hyperparams.UniformInt(
        default=60,
        lower=1,
        upper=10000,
        description=
        "The forecast horizon of the recursive neural network, usually multiple of seasonality. The "
        "forecast horizon is the number of periods to forecast.",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    window_sampling_limit_multiplier = hyperparams.UniformInt(
        default=1,
        lower=1,
        upper=10000,
        description="",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    shared_weights = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="",
    )
    stack_types = hyperparams.List(
        elements=hyperparams.Hyperparameter[str](''),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=['trend', 'seasonality'],
        description="")
    n_blocks = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](1),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=[3, 3],
        description="")
    n_layers = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](1),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=[4, 4],
        description="")
    n_hidden = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](1),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=[256, 2048],
        description="")
    n_harmonics = hyperparams.Hyperparameter[int](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                                  default=1,
                                                  description="")
    n_polynomials = hyperparams.Hyperparameter[int](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                                    default=2,
                                                    description="")
    learning_rate = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-3,
        description='Size of the stochastic gradient descent steps')
    lr_decay = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1.0,
        description=
        'The gamma parameter of the RNN scheduler to shrink the learning rate.'
    )
    n_lr_decay_steps = hyperparams.Hyperparameter[int](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                                       default=3,
                                                       description="")
    batch_size = hyperparams.Hyperparameter[int](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                                 default=1024,
                                                 description="")
    n_iterations = hyperparams.Hyperparameter[int](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                                   default=300,
                                                   description="")
    loss = hyperparams.Hyperparameter[str](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                           default='MAPE',
                                           description="")
    frequency = hyperparams.Hyperparameter[str](
        default="",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A number of string aliases are given to useful common time series frequencies. If empty, "
        "we will try to infer the frequency from the data. If it fails, we use 'D'. "
        "See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases for a list of frequency aliases",
    )
    seasonality = hyperparams.Hyperparameter[int](semantic_types=[
        'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    ],
                                                  default=1,
                                                  description="")
    device = hyperparams.Hyperparameter[str](
        default="cuda",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Specify the device, such as cpu, cuda, cuda:0. CPU if GPU is not available",
    )
class Hyperparams(hyperparams.Hyperparams):
    mins = hyperparams.Union[typing.Union[float, typing.Sequence[float]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.Hyperparameter[float](-1),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description=
                "A set of minimum values, corresponding to the vector values to filter on",
            ),
            float=hyperparams.Hyperparameter[float](0),
        ),
        default="float",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to filter on",
    )
    maxs = hyperparams.Union[typing.Union[float, typing.Sequence[float]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.Hyperparameter[float](-1),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description=
                "A set of minimum values, corresponding to the vector values to filter on",
            ),
            float=hyperparams.Hyperparameter[float](0),
        ),
        default="float",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to filter on",
    )
    column = hyperparams.Hyperparameter[typing.Optional[int]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The indicated FloatVector column to operate on",
    )
    inclusive = hyperparams.Hyperparameter[bool](
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "True when values outside the range are removed; False gives the complement.",
    )
    strict = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "True when the filter bounds are strict (ie. less than), false then are not (ie. less than equal to).",
    )
Esempio n. 8
0
class Hyperparams(Hyperparams_ODBase):
    ######## Add more Hyperparamters #######

    hidden_neurons = hyperparams.List(
        default=[4, 2, 4],
        elements=hyperparams.Hyperparameter[int](1),
        description='The number of neurons per hidden layers.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    hidden_activation = hyperparams.Enumeration[str](
        values=[
            'relu', 'sigmoid', 'softmax', 'softplus', 'softsign', 'tanh',
            'selu', 'elu', 'exponential'
        ],
        default='relu',
        description='Activation function to use for hidden layers.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    output_activation = hyperparams.Enumeration[str](
        values=[
            'relu', 'sigmoid', 'softmax', 'softplus', 'softsign', 'tanh',
            'selu', 'elu', 'exponential'
        ],
        default='sigmoid',
        description='Activation function to use for output layer.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    loss = hyperparams.Enumeration[str](
        values=['mean_squared_error'],
        default='mean_squared_error',
        description='Loss function.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    optimizer = hyperparams.Enumeration[str](
        values=[
            'SGD', 'RMSprop', 'adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam',
            'Ftrl'
        ],
        default='adam',
        description='String (name of optimizer) or optimizer instance.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    epochs = hyperparams.Hyperparameter[int](
        default=100,
        description='Number of epochs to train the model.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    batch_size = hyperparams.Hyperparameter[int](
        default=32,
        description='Number of samples per gradient update.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    dropout_rate = hyperparams.Uniform(
        lower=0.,
        upper=1.,
        default=0.2,
        description='The dropout to be used across all layers.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    l2_regularizer = hyperparams.Uniform(
        lower=0.,
        upper=1.,
        default=0.1,
        description=
        'The regularization strength of activity_regularizer applied on each layer.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    validation_size = hyperparams.Uniform(
        lower=0.,
        upper=1.,
        default=0.1,
        description='The percentage of data to be used for validation.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    preprocessing = hyperparams.UniformBool(
        default=True,
        description='If True, apply standardization on the data.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    verbose = hyperparams.Enumeration[int](
        values=[0, 1, 2],
        default=1,
        description='Verbosity mode.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    random_state = hyperparams.Union[Union[int, None]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[int](default=0, ),
            ninit=hyperparams.Hyperparameter[None](default=None, ),
        ),
        default='ninit',
        description='the seed used by the random number generator.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )

    contamination = hyperparams.Uniform(
        lower=0.,
        upper=0.5,
        default=0.1,
        description=
        'The amount of contamination of the data set, i.e. the proportion of outliers in the data set. ',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    pass
Esempio n. 9
0
class ForecastingESRNNHyperparams(hyperparams.Hyperparams):
    auto_tune = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Allow ESRNN to automatically tune the hyperparameters. You must still specify the output_size.",
    )
    max_epochs = hyperparams.UniformInt(
        default=15,
        lower=0,
        upper=sys.maxsize,
        description=
        "Maximum number of complete passes to train data during fit",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    freq_of_test = hyperparams.UniformInt(
        default=50,
        lower=0,
        upper=sys.maxsize,
        description="period for the diagnostic evaluation of the model.",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    learning_rate = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-3,
        description='Size of the stochastic gradient descent steps')
    lr_decay = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=0.9,
        description=
        'The gamma parameter of the RNN scheduler to shrink the learning rate.'
    )
    lr_scheduler_step_size = hyperparams.UniformInt(
        default=9,
        lower=1,
        upper=10000,
        description="This step_size is the period for each learning rate decay",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    per_series_lr_multip = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1.0,
        description=
        'Multiplier for per-series parameters smoothing and initial seasonalities learning rate'
    )
    gradient_eps = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-8,
        description=
        'term added to the Adam optimizer denominator to improve numerical stability'
    )
    gradient_clipping_threshold = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=20,
        description=
        'max norm of gradient vector, with all parameters treated as a single vector'
    )
    rnn_weight_decay = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=0,
        description=
        'parameter to control classic L2/Tikhonov regularization of the rnn parameters'
    )
    noise_std = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-3,
        description=
        'standard deviation of white noise added to input during fit to avoid the model from memorizing '
        'the train data ')
    level_variability_penalty = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=80,
        description=
        'this parameter controls the strength of the penalization to the wigglines of the level vector, '
        'induces smoothness in the output ')
    training_percentile = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=50,
        description=
        'To reduce the model\'s tendency to over estimate, the training_percentile can be set to fit a smaller value through the Pinball Loss.'
        'controls for the value predicted, when forecasting point value, the forecast is the median, '
        'so percentile=50. ')
    batch_size = hyperparams.UniformInt(
        default=1,
        lower=1,
        upper=10000,
        description="The batch size for RNN training",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    batch_size_test = hyperparams.UniformInt(
        default=64,
        lower=1,
        upper=10000,
        description=
        "The batch size for RNN test. We separated this parameter since this batch size can be "
        "considerably larger than the train batch. It only affects the time it takes to perform "
        "predictions",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    seasonality = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](1),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=[],
        description=
        "The main frequency of the time series. The value should between 1 and 13. Quarterly 4, Daily 7, "
        "Monthly 12",
    )
    frequency = hyperparams.Hyperparameter[str](
        default="",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A number of string aliases are given to useful common time series frequencies. If empty, "
        "we will try to infer the frequency from the data. If it fails, we use 'D'. "
        "See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases for a list of frequency aliases",
    )
    input_size = hyperparams.UniformInt(
        default=4,
        lower=1,
        upper=10000,
        description=
        "input size of the recursive neural network, usually a multiple of seasonality",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    output_size = hyperparams.UniformInt(
        default=60,
        lower=1,
        upper=10000,
        description=
        "The forecast horizon of the recursive neural network, usually multiple of seasonality. The "
        "forecast horizon is the number of periods to forecast.",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    exogenous_size = hyperparams.UniformInt(
        default=60,
        lower=1,
        upper=10000,
        description=
        "size of one hot encoded categorical variable, invariannt per time series of the panel",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
    cell_type = hyperparams.Enumeration(
        default="LSTM",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["LSTM", "GRU", "RNN", "ResLSTM", "AttentiveLSTM"],
        description="Type of RNN cell, available GRU, LSTM, RNN, ResidualLSTM",
    )
    state_hsize = hyperparams.UniformInt(
        default=40,
        lower=1,
        upper=10000,
        description="dimension of hidden state of the recursive neural network",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
    dilations = hyperparams.List(
        elements=hyperparams.List(
            elements=hyperparams.Hyperparameter[int](1),
            default=[],
        ),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=[[1, 7], [28]],
        description=
        'a list of list of ints, each list represents one chunk of Dilated LSTMS, connected in standard '
        'ResNet fashion')
    add_nl_layer = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to insert a tanh() layer between the RNN stack and the linear adaptor (output) layers",
    )
    data_augmentation = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="True to turn on data augmentation support",
    )
    max_periods = hyperparams.UniformInt(
        default=20,
        lower=0,
        upper=sys.maxsize,
        description=
        "The max number of periods (one period is one season as specified in the other hyperparameters)",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    device = hyperparams.Hyperparameter[str](
        default="cpu",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Specify the device, such as cpu, cuda, cuda:0. We recommend using CPU. It fallbacks to "
        "CPU if GPU is not available",
    )