class Hyperparams(hyperparams.Hyperparams):
    n_neighbors = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "number of neighbors on which to make classification decision",
    )
    distance_metric = hyperparams.Enumeration(
        default="euclidean",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["euclidean", "dtw"],
        description=
        "whether to use euclidean or dynamic time warping distance metric in KNN computation",
    )
    sample_weighting = hyperparams.Enumeration(
        default="uniform",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["uniform", "inverse_distance"],
        description=
        "whether to weight points uniformly or by the inverse of their distance",
    )
Esempio n. 2
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default="HDBSCAN",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["DBSCAN", "HDBSCAN"],
        description="type of clustering algorithm to use",
    )
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "maximum distance between two samples for them to be considered as in \
        the same neigborhood, used in DBSCAN algorithm",
    )
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="the minimum size of clusters",
    )
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "The number of samples in a neighbourhood for a point to be considered a core point.",
    )
    cluster_selection_method = hyperparams.Enumeration(
        default="eom",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["leaf", "eom"],
        description=
        "Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN",
    )
    required_output = hyperparams.Enumeration(
        default="feature",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["prediction", "feature"],
        description=
        "Determines whether the output is a dataframe with just predictions,\
            or an additional feature added to the input dataframe.",
    )
Esempio n. 3
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='HDBSCAN',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['DBSCAN', 'HDBSCAN'],
        description='type of clustering algorithm to use')
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'maximum distance between two samples for them to be considered as in the same neigborhood, \
        used in DBSCAN algorithm')
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='the minimum size of clusters')
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'The number of samples in a neighbourhood for a point to be considered a core point.'
    )
    cluster_selection_method = hyperparams.Enumeration(
        default='eom',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        values=['leaf', 'eom'],
        description=
        'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN'
    )
    required_output = hyperparams.Enumeration(
        default='feature',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['prediction', 'feature'],
        description=
        'Determines whether the output is a dataframe with just predictions,\
            or an additional feature added to the input dataframe.')
    pass
Esempio n. 4
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='HDBSCAN',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['DBSCAN', 'HDBSCAN'],
        description='type of clustering algorithm to use')
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'maximum distance between two samples for them to be considered as in the same neigborhood, \
        used in DBSCAN algorithm')
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='the minimum size of clusters')
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'The number of samples in a neighbourhood for a point to be considered a core point.'
    )
    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    cluster_selection_method = hyperparams.Enumeration(
        default='eom',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        values=['leaf', 'eom'],
        description=
        'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN'
    )
    pass
class Hyperparams(hyperparams.Hyperparams):
    n_clusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=8,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='The dimension of the projection space')

    n_init = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'Number of times the k-means algorithm will be run with different centroid seeds'
    )

    n_neighbors = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'Number of neighbors when constructing the affintiy matrix using n-neighbors, ignored for affinity="rbf"'
    )

    affinity = hyperparams.Enumeration(
        default='rbf',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        values=['rbf', 'nearest_neighbors'],
        description='method to construct affinity matrix')

    task_type = hyperparams.Enumeration(
        default='classification',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['clustering', 'classification'],
        description=
        'Determines whether the output is a dataframe with just predictions,\
            or an additional feature added to the input dataframe.')
    pass
Esempio n. 6
0
class Hyperparams(hyperparams.Hyperparams):
    # Added by Guanchu
    with_mean = hyperparams.UniformBool(
        default=True,
        description='If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    with_std = hyperparams.UniformBool(
        default=True,
        description='If True, scale the data to unit variance (or equivalently, unit standard deviation).',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    # copy = hyperparams.UniformBool(
    #     default=True,
    #     description='If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned.',
    #     semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    # )

    # Keep previous
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )

    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute',
                'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Esempio n. 7
0
class Hyperparams(hyperparams.Hyperparams):
    reduce_method = hyperparams.Enumeration(
        default = 'pca', 
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        values = ['pca', 'svd'],
        description = 'dimensionality reduction method that is applied to feature vectors'
    )
    reduce_dimension = hyperparams.UniformInt(
        lower=0,
        upper=1024,
        default=128,
        upper_inclusive=True,
        semantic_types=["https://metadata.datadrivendiscovery.org/types/ControlParameter"],
        description="number of dimensions in reduced feature vectors",
    )
    gem_p = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=1,
        upper_inclusive=True,
        semantic_types=["https://metadata.datadrivendiscovery.org/types/TuningParameter"],
        description="parameter p in generalized mean pooling; p > 1 increases the constrast of the \
                    pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \
                    max pooling.",
    )
Esempio n. 8
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default="TimeSeriesKMeans",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["GlobalAlignmentKernelKMeans", "TimeSeriesKMeans"],
        description="type of clustering algorithm to use",
    )
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="number of clusters to user in kernel kmeans algorithm",
    )
    n_init = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "Number of times the k-means algorithm will be run with different centroid seeds. \
            Final result will be the best output on n_init consecutive runs in terms of inertia",
    )
    time_col_index = hyperparams.Hyperparameter[Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Index of column in input dataframe containing timestamps.",
    )
    value_col_index = hyperparams.Hyperparameter[Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Index of column in input dataframe containing the values associated with the timestamps.",
    )
    grouping_col_index = hyperparams.Hyperparameter[Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Index of column in input dataframe containing the values used to mark timeseries groups",
    )
    output_col_name = hyperparams.Hyperparameter[str](
        default="__cluster",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Name to assign to cluster column that is appended to the input dataset",
    )
Esempio n. 9
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        'A set of column indices to not operate on. Applicable only if "use_columns" is not provided.',
    )
    return_result = hyperparams.Enumeration(
        values=["append", "replace", "new"],
        default="append",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?",
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        'Also include primary index columns if input data has them. Applicable only if "return_result" is set to "new".',
    )
    compress_data = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="If True, applies LZO compression algorithm to the data.\
                    Store a header consisting of the dtype character and the data shape as unsigned integers.\
                    Given c struct alignment, will occupy 16 bytes (1 + 4 + 4 + 4 + 3 ) padding",
    )
    n_jobs = hyperparams.Hyperparameter[int](
        default=64,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The value of the n_jobs parameter for the joblib library",
    )
    band_column = hyperparams.Hyperparameter[str](
        default="band",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Name of the band column used if no band semantic type is present.",
    )
Esempio n. 10
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='GlobalAlignmentKernelKMeans',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans'],
        description='type of clustering algorithm to use')
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of clusters \
        to user in kernel kmeans algorithm')
    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    pass
Esempio n. 11
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='TimeSeriesKMeans',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans'],
        description='type of clustering algorithm to use')
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of clusters \
        to user in kernel kmeans algorithm')
    n_init = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='Number of times the k-means algorithm \
        will be run with different centroid seeds. Final result will be the best output on n_init consecutive runs in terms of inertia'
    )
    pass
Esempio n. 12
0
class VoterHyperparameter(hyperparams.Hyperparams):
    classifier_voting_strategy = hyperparams.Enumeration(
        values=['random', 'majority'],
        default='majority',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="For classification problem, pick a prediction result if there are multiple results based on which strategy"
    )
Esempio n. 13
0
class UEncHyperparameter(hyperparams.Hyperparams):
    text2int = hyperparams.UniformBool(
        default=False,
        description='Whether to convert everything to numerical. For text columns, each row may get converted into a column',
        semantic_types=['http://schema.org/Boolean',
                        'https://metadata.datadrivendiscovery.org/types/ControlParameter'])

    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='replace',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
Esempio n. 14
0
class Hyperparams(hyperparams.Hyperparams):
    metric = hyperparams.Enumeration[str](
        values=classification_metrics + regression_metrics +
        clustering_metrics,
        default="f1Macro",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "The D3M scoring metric to use during the fit phase.  This can be any of the regression, classification or "
        + "clustering metrics.",
    )
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )

    encoder_type = hyperparams.Enumeration(
        default="svm",
        values=["svm", "tfidf"],
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Vectorization Strategy.",
    )
Esempio n. 15
0
class Hyperparams(hyperparams.Hyperparams):
    geocoding_resolution = hyperparams.Enumeration(
        default="city",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["city", "country", "state", "postcode"],
        description="type of clustering algorithm to use",
    )
    rampup_timeout = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=100,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "timeout, how much time to give elastic search database to startup, may vary based on infrastructure",
    )
    cache_size = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=2000,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="LRU cache size",
    )
Esempio n. 16
0
class EnsembleVotingHyperparams(hyperparams.Hyperparams):
    ensemble_method = hyperparams.Enumeration(
        values=['majority', 'mean', 'max', 'min', 'median', 'random'],
        default='majority',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="Controls which ensemble method should be used",
    )
Esempio n. 17
0
class Hyperparams(hyperparams.Hyperparams):
    # Added by Mia
    endog = hyperparams.Bounded[int](
        lower = 2,
        upper = None,
        default = 3,
        description='Array like time seires.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    # keep previous
    norm = hyperparams.Enumeration[str](
        default='l2',
        values=['l1', 'l2', 'max'],
        description='The norm to use to normalize each non zero sample.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Esempio n. 18
0
class MetricsHyperparams(hyperparams.Hyperparams, set_names=False):
    metric = hyperparams.Enumeration(
        values=[metric.name for metric in problem.PerformanceMetric],
        # Default is ignored.
        # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141
        default='ACCURACY',
    )
    pos_label = hyperparams.Hyperparameter[typing.Union[str, None]](None)
    k = hyperparams.Hyperparameter[typing.Union[int, None]](None)
Esempio n. 19
0
 def setup(self):
     self.numerical = hyperparams.Uniform(
         lower=0,
         upper=1,
         default=0.5,
     )
     self.enumeration = hyperparams.Enumeration(
         values=list(range(1000)),
         default=0,
     )
Esempio n. 20
0
class Hyperparams(hyperparams.Hyperparams):
	window_size = hyperparams.UniformInt(
		lower = 0,
		upper = 100,	#TODO: Define the correct the upper bound
		default=50,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="window size to calculate"
	)
	
	# Keep previous
	dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]](
		default=None,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.",
	)
	use_columns = hyperparams.Set(
		elements=hyperparams.Hyperparameter[int](-1),
		default=(2,),
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
	)
	exclude_columns = hyperparams.Set(
		elements=hyperparams.Hyperparameter[int](-1),
		default=(0,1,3,),
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
	)
	return_result = hyperparams.Enumeration(
		values=['append', 'replace', 'new'],
		default='new',
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
	)
	use_semantic_types = hyperparams.UniformBool(
		default=False,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
	)
	add_index_columns = hyperparams.UniformBool(
		default=False,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
	)
	error_on_no_input = hyperparams.UniformBool(
		default=True,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
	)
	return_semantic_type = hyperparams.Enumeration[str](
		values=['https://metadata.datadrivendiscovery.org/types/Attribute',
			'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
		default='https://metadata.datadrivendiscovery.org/types/Attribute',
		description='Decides what semantic type to attach to generated attributes',
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
	)
Esempio n. 21
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='text_rank',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['luhn', 'edmundson', 'lsa', 'text_rank', 'sum_basic', 'kl'],
        description='type of summarization algorithm to use')
    source_type = hyperparams.Enumeration(
        default='plain_text',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['plain_text', 'url'],
        description='type of source documents to be analyzed')
    language = hyperparams.Enumeration(
        default='english',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=[
            'danish', 'dutch', 'english', 'finnish', 'french', 'german',
            'hungarian', 'italian', 'norwegian', 'porter', 'portuguese',
            'romanian', 'russian', 'spanish', 'swedish'
        ],
        description='language to use for the NLTK stemming process')
    return_result = hyperparams.Enumeration(
        default='all',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['new', 'all', 'replace'],
        description='what data should be returned')
    nsentences = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=20,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description='number of summary sentences to return')
    pass
Esempio n. 22
0
class Hyperparams(hyperparams.Hyperparams):
    max_percent_null = hyperparams.Bounded[float](
        default=.5,
        lower=0,
        upper=1,
        description=
        'The maximum percentage of null values allowed in returned features. A lower value means features may have more null nulls.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    max_correlation = hyperparams.Bounded[float](
        default=.9,
        lower=0,
        upper=1,
        description=
        'The maximum allowed correlation between any two features returned. A lower value means features will be more uncorrelated',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    max_features = hyperparams.Hyperparameter[int](
        default=100,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "Cap the number of generated features to this number. If -1, no limit."
    )
Esempio n. 23
0
class Hyperparams(hyperparams.Hyperparams):
    parsing_semantics = hyperparams.Set(
        elements=hyperparams.Enumeration(
            values=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
                "https://metadata.datadrivendiscovery.org/types/CategoricalData",
            ],
            default="http://schema.org/Float",
        ),
        default=(
            "http://schema.org/Boolean",
            "http://schema.org/Integer",
            "http://schema.org/Float",
        ),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.",
    )
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description='A set of column indices to not operate on. Applicable only if "use_columns" is not provided.',
    )
    error_handling = hyperparams.Enumeration[str](
        default="coerce",
        values=("ignore", "raise", "coerce"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Setting to deal with error when converting a column to numeric value.",
    )
    fuzzy_time_parsing = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Use fuzzy time parsing.",
    )
Esempio n. 24
0
class Hyperparams(hyperparams.Hyperparams):
    reduce_method = hyperparams.Enumeration(
        default="pca",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["pca", "svd"],
        description=
        "dimensionality reduction method that is applied to feature vectors",
    )
    reduce_dimension = hyperparams.UniformInt(
        lower=0,
        upper=1024,
        default=128,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="number of dimensions in reduced feature vectors",
    )
    gem_p = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=1,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "parameter p in generalized mean pooling; p > 1 increases the constrast of the \
                    pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \
                    max pooling.",
    )
    denominator_min = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=5,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "only ranks according to positive annotations until this many negative \
                    annotations are obtained",
    )
    dot_products_cache = hyperparams.Hyperparameter[str](
        default="dot_product_cache",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "already computed dot products will be cached in this location",
    )
Esempio n. 25
0
class Hyperparams(hyperparams.Hyperparams):
    """
    
    """
    keep_option = hyperparams.Enumeration(
        values=['first', 'average'],
        default='first',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "When dropping rows, choose to keep the first one of duplicated data or calculate their average",
    )
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to force primitive to operate on. \
            If any specified column cannot be parsed, it is skipped.",
    )
    inference_model = hyperparams.Enumeration(
        default="moco",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["amdim", "moco"],
        description="type pretrained inference model to use",
    )
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=256,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="inference batch size",
    )
    pool_features = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to pool features across spatial dimensions in returned frame",
    )
    decompress_data = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="If True, applies LZ4 decompression algorithm to the data. \
                    Compressed data stores a header consisting of the dtype character and the \
                    data shape as unsigned integers. Given c struct alignment, will occupy \
                    16 bytes (1 + 4 + 4 + 4 + 3 ) padding",
    )
Esempio n. 27
0
class Hyperparams(hyperparams.Hyperparams):
    continuity_option = hyperparams.Enumeration(
        values=['ablation', 'imputation'],
        default='imputation',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="Choose ablation or imputation the original data",
    )

    interval = hyperparams.Uniform(
        default=1,
        lower=0.000000001,
        upper=10000000000,
        description='Only used in imputation, give the timestamp interval.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(default = 'Disparate_Impact_Remover', 
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        values = ['Disparate_Impact_Remover', 'Learning_Fair_Representations', 'Reweighing'],
        description = 'type of fairness pre-processing algorithm to use')
    protected_attribute_cols = hyperparams.List(
        elements=hyperparams.Hyperparameter[int](-1),
        default=[],
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to use as protected attributes.",
    )
    favorable_label = hyperparams.Bounded[float](
        lower=0.,
        upper=1., 
        default=1.,
        description='label value which is considered favorable (i.e. positive) in the binary label case',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    )
    pass
Esempio n. 29
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='GlobalAlignmentKernelKMeans',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=[
            'GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans', 'DBSCAN',
            'HDBSCAN'
        ],
        description='type of clustering algorithm to use')
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of clusters \
        to user in kernel kmeans algorithm')
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'maximum distance between two samples for them to be considered as in the same neigborhood, \
        used in DBSCAN algorithm')
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'number of samples in a neighborhood for a point to be considered as a core point, \
        used in DBSCAN and HDBSCAN algorithms')
    pass
Esempio n. 30
0
class Hyperparams(Hyperparams_ODBase):
    ######## Add more Hyperparamters #######

    n_neighbors = hyperparams.Hyperparameter[int](
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "Number of neighbors to use by default for k neighbors queries.",
    )

    method = hyperparams.Enumeration(
        values=['fast', 'default'],
        default='fast',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "'fast': fast ABOD. Only consider n_neighbors of training points    'default': original ABOD with all training points, which could be slow",
    )