class Hyperparams(hyperparams.Hyperparams):
    """
    Hyper-parameters
    """
    n_samples = hyperparams.Constant(
        default=1000,
        description="Max number of samples/words to select",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
Example #2
0
class Hyperparams(Hyperparams_ODBase):
    ######## Add more Hyperparamters #######

    method = hyperparams.Enumeration[str](
        values=['average', 'maximization', 'median'],
        default='average',
        description=
        'Combination method: {average, maximization, median}. Pass in weights of detector for weighted version.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    weights = hyperparams.Union(
        configuration=OrderedDict(
            {
                'ndarray':
                hyperparams.Hyperparameter[ndarray](
                    default=np.array([]),
                    semantic_types=[
                        'https://metadata.datadrivendiscovery.org/types/TuningParameter'
                    ],
                ),
                'none':
                hyperparams.Constant(
                    default=None,
                    semantic_types=[
                        'https://metadata.datadrivendiscovery.org/types/TuningParameter'
                    ],
                )
            }),
        default='none',
        description=
        'Score weight by dimensions. If None, [1,1,...,1] will be used.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])

    pass
class Hyperparams(hyperparams.Hyperparams):
    hidden_layer_sizes = hyperparams.List(
        elements=hyperparams.Bounded(1, None, 100),
        default=(100, ),
        min_size=1,
        max_size=None,
        description='The ith element represents the number of neurons in the ith hidden layer.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    activation = hyperparams.Enumeration[str](
        values=['identity', 'logistic', 'tanh', 'relu'],
        default='relu',
        description='Activation function for the hidden layer.  - \'identity\', no-op activation, useful to implement linear bottleneck, returns f(x) = x  - \'logistic\', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)).  - \'tanh\', the hyperbolic tan function, returns f(x) = tanh(x).  - \'relu\', the rectified linear unit function, returns f(x) = max(0, x)',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    solver = hyperparams.Choice(
        choices={
            'lbfgs': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'max_fun': hyperparams.Bounded[int](
                        default=15000,
                        lower=1,
                        upper=None,
                        description='Maximum number of loss function calls',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            ),
            'sgd': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'learning_rate': hyperparams.Enumeration[str](
                        values=['constant', 'invscaling', 'adaptive'],
                        default='constant',
                        description='Learning rate schedule for weight updates. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'learning_rate_init': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.001,
                        description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'power_t': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.5,
                        description='The exponent for inverse scaling learning rate. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'shuffle': hyperparams.UniformBool(
                        default=True,
                        description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'momentum': hyperparams.Bounded[float](
                        default=0.9,
                        lower=0,
                        upper=1,
                        description='Momentum for gradient descent update. Should be between 0 and 1. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'nesterovs_momentum': hyperparams.UniformBool(
                        default=True,
                        description='Whether to use Nesterov’s momentum. Only used when solver=’sgd’ and momentum > 0.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'early_stopping': hyperparams.UniformBool(
                        default=False,
                        description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'n_iter_no_change': hyperparams.Bounded[int](
                        default=10,
                        lower=1,
                        upper=None,
                        description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            ),
            'adam': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'learning_rate_init': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.001,
                        description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'shuffle': hyperparams.UniformBool(
                        default=True,
                        description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'early_stopping': hyperparams.UniformBool(
                        default=False,
                        description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'beta_1': hyperparams.Bounded[float](
                        default=0.9,
                        lower=0,
                        upper=1,
                        description='Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1).',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'beta_2': hyperparams.Bounded[float](
                        default=0.999,
                        lower=0,
                        upper=1,
                        description='Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1).',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'epsilon': hyperparams.Bounded[float](
                        default=1e-08,
                        lower=0,
                        upper=None,
                        description='Value for numerical stability in adam. Only used when solver=’adam’',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'n_iter_no_change': hyperparams.Bounded[int](
                        default=10,
                        lower=1,
                        upper=None,
                        description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            )
        },
        default='adam',
        description='The solver for weight optimization.  - \'lbfgs\' is an optimizer in the family of quasi-Newton methods.  - \'sgd\' refers to stochastic gradient descent.  - \'adam\' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba  Note: The default solver \'adam\' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, \'lbfgs\' can converge faster and perform better.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    alpha = hyperparams.Bounded[float](
        lower=0,
        upper=None,
        default=0.0001,
        description='L2 penalty (regularization term) parameter.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    batch_size = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Bounded[int](
                lower=0,
                upper=None,
                default=16,
                description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch',
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'auto': hyperparams.Constant(
                default='auto',
                description='When set to \'auto\', batch_size=min(200, n_samples)',
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='auto',
        description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)`',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    max_iter = hyperparams.Bounded[int](
        lower=0,
        upper=None,
        default=200,
        description='Maximum number of iterations. The solver iterates until convergence (determined by \'tol\') or this number of iterations. For stochastic solvers (\'sgd\', \'adam\'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    tol = hyperparams.Bounded[float](
        default=0.0001,
        lower=0,
        upper=None,
        description='Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to \'adaptive\', convergence is considered to be reached and training stops.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    warm_start = hyperparams.UniformBool(
        default=False,
        description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary <warm_start>`.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    validation_fraction = hyperparams.Bounded[float](
        default=0.1,
        lower=0,
        upper=None,
        description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.",
    )
    use_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.",
    )
    exclude_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'],
        default='https://metadata.datadrivendiscovery.org/types/PredictedTarget',
        description='Decides what semantic type to attach to generated output',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Example #4
0
class Hyperparams(hyperparams.Hyperparams):
    missing_values = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Hyperparameter[int](
                default=0,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'float': hyperparams.Hyperparameter[float](
                default=numpy.nan,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='float',
        description='The placeholder for the missing values. All occurrences of `missing_values` will be imputed.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
    strategy = hyperparams.Enumeration[str](
        default='mean',
        values=['median', 'most_frequent', 'mean', 'constant'],
        description='The imputation strategy.  - If "mean", then replace missing values using the mean along each column. Can only be used with numeric data. - If "median", then replace missing values using the median along each column. Can only be used with numeric data. - If "most_frequent", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. - If "constant", then replace missing values with fill_value. Can be used with strings or numeric data.  .. versionadded:: 0.20 strategy="constant" for fixed value imputation.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    add_indicator = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    fill_value = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Hyperparameter[int](
                default=0,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'none': hyperparams.Constant(
                default=None,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='none',
        description='When strategy == "constant", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and "missing_value" for strings or object data types.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
class Hyperparams(hyperparams.Hyperparams):

    rank = hyperparams.Hyperparameter[int](
        default=30,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="The factorization rank to achieve. Default is 30.",
    )

    seed = hyperparams.Enumeration(
        values=['nndsvd', 'random_c', 'random_vcol', 'random', 'fixed'],
        default='random',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="""Method to seed the computation of a factorization""",
    )

    W = hyperparams.Union(
        configuration=OrderedDict(
            {
                'ndarray':
                hyperparams.Hyperparameter[ndarray](
                    default=numpy.array([]),
                    semantic_types=[
                        'https://metadata.datadrivendiscovery.org/types/TuningParameter'
                    ],
                ),
                'none':
                hyperparams.Constant(
                    default=None,
                    semantic_types=[
                        'https://metadata.datadrivendiscovery.org/types/TuningParameter'
                    ],
                )
            }),
        default='none',
        description=
        'Score weight by dimensions. If None, [1,1,...,1] will be used.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])

    H = hyperparams.Union(
        configuration=OrderedDict(
            {
                'ndarray':
                hyperparams.Hyperparameter[ndarray](
                    default=numpy.array([]),
                    semantic_types=[
                        'https://metadata.datadrivendiscovery.org/types/TuningParameter'
                    ],
                ),
                'none':
                hyperparams.Constant(
                    default=None,
                    semantic_types=[
                        'https://metadata.datadrivendiscovery.org/types/TuningParameter'
                    ],
                )
            }),
        default='none',
        description=
        'Score weight by dimensions. If None, [1,1,...,1] will be used.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])

    update = hyperparams.Enumeration(
        values=['euclidean', 'divergence'],
        default='euclidean',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        """Type of update equations used in factorization. When specifying model parameter update can be assigned to:"			
					'euclidean' for classic Euclidean distance update equations,"
					'divergence' for divergence update equations."

					By default Euclidean update equations are used.""",
    )

    objective = hyperparams.Enumeration(
        values=['fro', 'div', 'conn'],
        default='fro',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        """Type of objective function used in factorization. When specifying model parameter :param:`objective` can be assigned to:

					‘fro’ for standard Frobenius distance cost function,
					‘div’ for divergence of target matrix from NMF estimate cost function (KL),
					‘conn’ for measuring the number of consecutive iterations in which the connectivity matrix has not changed.

					By default the standard Frobenius distance cost function is used.""",
    )

    max_iter = hyperparams.Hyperparameter[int](
        default=30,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Maximum number of factorization iterations. Note that the number of iterations depends on the speed of method convergence. Default is 30.",
    )

    learning_rate = hyperparams.Union[Union[float, None]](
        configuration=OrderedDict(
            limit=hyperparams.Bounded[float](
                lower=0,
                upper=None,
                default=0.01,
            ),
            unlimited=hyperparams.Constant(
                default=None,
                description='If nothing is give as a paramter',
            ),
        ),
        default='unlimited',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Minimal required improvement of the residuals from the previous iteration. They are computed between the target matrix and its MF estimate using the objective function associated to the MF algorithm. Default is None.",
    )

    # parameters for column
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(2, 3),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    return_semantic_type = hyperparams.Enumeration[str](
        values=[
            'https://metadata.datadrivendiscovery.org/types/Attribute',
            'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'
        ],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description=
        'Decides what semantic type to attach to generated attributes',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
Example #6
0
class Hyperparams(hyperparams.Hyperparams):
    """
    Hyper-parameters for this primitive.
    """
    input_dim = hyperparams.Hyperparameter[int](
        default=100,
        description="Dimensions of the input.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    output_dim = hyperparams.Hyperparameter[int](
        default=2,
        description='Dimensions of CNN output.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    depth = hyperparams.Hyperparameter[int](
        default=2,
        description='Total number of layers, including the output layer.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    width = hyperparams.Hyperparameter[int](
        default=64,
        description=
        'Number of units in each layer, except the last (output) layer, which is always equal to the output dimensions.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    use_batch_norm = hyperparams.UniformBool(
        default=False,
        description=
        "Whether to use batch norm after each layer except the last (output) layer.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    use_dropout = hyperparams.UniformBool(
        default=True,
        description="Whether to use dropout after each layer.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    activation_type = hyperparams.Enumeration[str](
        values=['linear', 'relu', 'leaky_relu', 'tanh', 'sigmoid', 'softmax'],
        default='relu',
        description=
        'Type of activation (non-linearity) following each layer excet the last one.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    last_activation_type = hyperparams.Enumeration[str](
        values=['linear', 'tanh', 'sigmoid', 'softmax'],
        default='softmax',
        description=
        'Type of activation (non-linearity) following the last layer.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    loss_type = hyperparams.Constant(
        default='crossentropy',
        description=
        'Type of loss used for the local training (fit) of this primitive.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    optimizer_type = hyperparams.Enumeration[str](
        values=['adam', 'sgd'],
        default='adam',
        description='Type of optimizer used during training (fit).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    minibatch_size = hyperparams.Hyperparameter[int](
        default=32,
        description='Minibatch size used during training (fit).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    learning_rate = hyperparams.Hyperparameter[float](
        default=0.0001,
        description='Learning rate used during training (fit).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
    )
    momentum = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=0.9,
        description=
        'Momentum used during training (fit), only for optimizer_type sgd.')
    weight_decay = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=0.0001,
        description=
        'Weight decay (L2 regularization) used during training (fit).')
    shuffle = hyperparams.UniformBool(
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=True,
        description='Shuffle minibatches in each epoch of training (fit).')
    fit_threshold = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-5,
        description='Threshold of loss value to early stop training (fit).')
    num_iterations = hyperparams.Hyperparameter[int](
        default=100,
        description="Number of iterations to train the model.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
Example #7
0
class Hyperparams(hyperparams.Hyperparams):

    axis = hyperparams.Hyperparameter[int](
        default=-1,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Axis over which to compute the FFT. If not given, the last axis is used.",
    )

    n = hyperparams.Union[Union[int, None]](
        configuration=OrderedDict(
            limit=hyperparams.Bounded[int](
                lower=1, 
                upper=None,
                default=10,
            ),
            unlimited=hyperparams.Constant(
                default=None,
                description='If n is not given, the length of the input along the axis specified by axis is used.',
            ),
        ),
        default='unlimited',
        description='Length of the transformed axis of the output. If n is smaller than the length of the input, the input is cropped. If it is larger, the input is padded with zeros.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    )

    norm = hyperparams.Enumeration(
        values=[None,"ortho"],
        default=None,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Normalization mode. Default is None, meaning no normalization on the forward transforms and scaling by 1/n on the ifft. For norm=""ortho"", both directions are scaled by 1/sqrt(n).",
    )

    overwrite_x = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
        description="If True, the contents of x can be destroyed; the default is False. See the notes below for more details.",

    )

    workers = hyperparams.Union[Union[float, None]](
        configuration=OrderedDict(
            limit=hyperparams.Bounded[int](
                lower=1,
                upper=None,
                default=10,
            ),
            unlimited=hyperparams.Constant(
                default=None,
                description='If nothing is give as a paramter',
            ),
        ),
        default='unlimited',
        description="Maximum number of workers to use for parallel computation. If negative, the value wraps around from os.cpu_count().",
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    )

    # TODO: Decide what to do with plan parameter how to work with it
    # plan

    # parameters for column
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute',
                'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Example #8
0
     #     upper=None,
     #     default=0,
     #     semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
     # ),
     'calculated':
     hyperparams.Enumeration[str](
         values=['auto', 'sqrt', 'log2'],
         default='auto',
         semantic_types=[
             'https://metadata.datadrivendiscovery.org/types/TuningParameter'
         ],
     ),
     'none':
     hyperparams.Constant(
         default=None,
         semantic_types=[
             'https://metadata.datadrivendiscovery.org/types/TuningParameter'
         ],
     ),
     'percent':
     hyperparams.Bounded[float](
         default=0.25,
         lower=0,
         upper=1,
         lower_inclusive=False,
         semantic_types=[
             'https://metadata.datadrivendiscovery.org/types/TuningParameter'
         ],
     )
 }),
 default='none',
 description=