class Hyperparams(hyperparams.Hyperparams):
    rampup_timeout = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=100,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "timeout, how much time to give elastic search database to startup, may vary based on infrastructure",
    )
    target_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "indices of column with geolocation formatted as text that should be converted to lat,lon pairs",
    )
    cache_size = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=2000,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="LRU cache size",
    )
Beispiel #2
0
class Hyperparams(hyperparams.Hyperparams):
    # control parameters determined once during pipeline building then fixed
    coresetmultiplier = hyperparams.UniformInt(
        default=4,
        lower=2,
        upper=7,
        description=
        "coreset size, as a multiple of the number of input features",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    eps = hyperparams.LogUniform(
        default=1e-6,
        lower=1e-14,
        upper=1e-2,
        description="relative error stopping tolerance for IRLS solver",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    maxIters = hyperparams.UniformInt(
        default=100,
        lower=50,
        upper=500,
        description="maximum iterations of IRLS",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
Beispiel #3
0
class SplitterHyperparameter(hyperparams.Hyperparams):
    threshold_column_length = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=300,
        description='The threshold value of amount of column in a dataframe, if the value is larger, it will be splitted (sampled).',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )

    threshold_row_length = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=100000,
        description='The threshold value of amount of row in a dataframe, if the value is larger, it will be splitted (sampled).',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )

    further_reduce_threshold_column_length = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=200,
        description='The threshold of column amount to further reduce the threshold_row_length value for the condition that both the amount of column and row are very large',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )

    further_reduce_ratio = hyperparams.Uniform(
        lower=0,
        upper=1,
        default=0.5,
        upper_inclusive = True,
        description='The ratio to further reduce the threshold_row_length value for the condition that both the amount of column and row are very large',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
class Hyperparams(hyperparams.Hyperparams):
    # search over these hyperparameters to tune performance
    q = hyperparams.UniformInt(
        default=3,
        lower=2,
        upper=10,
        description="degree of the polynomial to be fit",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    r = hyperparams.UniformInt(
        default=5,
        lower=2,
        upper=30,
        description="rank of the coefficient tensors to be fit",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    gamma = hyperparams.LogUniform(
        default=.01,
        lower=.0001,
        upper=10,
        description="l2 regularization to use on the tensor low-rank factors",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    alpha = hyperparams.LogUniform(
        default=.1,
        lower=.001,
        upper=1,
        description="variance of the random initialization of the factors",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    epochs = hyperparams.UniformInt(
        default=30,
        lower=1,
        upper=100,
        description="maximum iterations of LBFGS, or number of epochs of SFO",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    # control parameters determined once during pipeline building then fixed
    solver = hyperparams.Enumeration[str](
        default="LBFGS",
        values=["SFO", "LBFGS"],
        description=
        "solver to use: LBFGS better for small enough datasets, SFO does minibached stochastic quasi-Newton to scale to large dataset",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    preprocess = hyperparams.Enumeration[str](
        default="YES",
        values=["YES", "NO"],
        description=
        "whether to use a preprocessing that tends to work well for tensor machines",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
Beispiel #5
0
class Hyperparams(hyperparams.Hyperparams):
    geocoding_resolution = hyperparams.Enumeration(
        default="city",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["city", "country", "state", "postcode"],
        description="type of clustering algorithm to use",
    )
    rampup_timeout = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=100,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "timeout, how much time to give elastic search database to startup, may vary based on infrastructure",
    )
    cache_size = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=2000,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="LRU cache size",
    )
Beispiel #6
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='TimeSeriesKMeans',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans'],
        description='type of clustering algorithm to use')
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of clusters \
        to user in kernel kmeans algorithm')
    n_init = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='Number of times the k-means algorithm \
        will be run with different centroid seeds. Final result will be the best output on n_init consecutive runs in terms of inertia'
    )
    pass
class Hyperparams(hyperparams.Hyperparams):
    index = hyperparams.Hyperparameter[typing.Union[int, None]](
        default=0,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description='index of which suggestedTarget to predict')
    n_periods = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=29,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description='number of periods to predict')
    seasonal = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="seasonal ARIMA prediction")
    seasonal_differencing = hyperparams.UniformInt(
        lower=1,
        upper=365,
        default=12,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description='period of seasonal differencing')
    pass
Beispiel #8
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default="TimeSeriesKMeans",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["GlobalAlignmentKernelKMeans", "TimeSeriesKMeans"],
        description="type of clustering algorithm to use",
    )
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="number of clusters to user in kernel kmeans algorithm",
    )
    n_init = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "Number of times the k-means algorithm will be run with different centroid seeds. \
            Final result will be the best output on n_init consecutive runs in terms of inertia",
    )
    time_col_index = hyperparams.Hyperparameter[Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Index of column in input dataframe containing timestamps.",
    )
    value_col_index = hyperparams.Hyperparameter[Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Index of column in input dataframe containing the values associated with the timestamps.",
    )
    grouping_col_index = hyperparams.Hyperparameter[Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Index of column in input dataframe containing the values used to mark timeseries groups",
    )
    output_col_name = hyperparams.Hyperparameter[str](
        default="__cluster",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Name to assign to cluster column that is appended to the input dataset",
    )
Beispiel #9
0
class Hyperparams(hyperparams.Hyperparams):
    overwrite = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to overwrite manual annotations with SIMON annotations",
    )
    statistical_classification = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "whether to append categorical / ordinal annotations using rule-based classification",
    )
    multi_label_classification = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "whether to perfrom multi-label classification and append multiple annotations to metadata",
    )
    max_rows = hyperparams.UniformInt(
        lower=100,
        upper=2000,
        default=500,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "maximum number of rows to consider when classifying data type of specific column",
    )
    max_chars = hyperparams.UniformInt(
        lower=1,
        upper=100,
        default=20,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "maximum number of characters to consider when processing row",
    )
    p_threshold = hyperparams.Uniform(
        lower=0,
        upper=1.0,
        default=0.5,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        """probability threshold to use when decoding classification results. 
            Predictions above p_threshold will be returned""",
    )
Beispiel #10
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default="HDBSCAN",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["DBSCAN", "HDBSCAN"],
        description="type of clustering algorithm to use",
    )
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "maximum distance between two samples for them to be considered as in \
        the same neigborhood, used in DBSCAN algorithm",
    )
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="the minimum size of clusters",
    )
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "The number of samples in a neighbourhood for a point to be considered a core point.",
    )
    cluster_selection_method = hyperparams.Enumeration(
        default="eom",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["leaf", "eom"],
        description=
        "Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN",
    )
    required_output = hyperparams.Enumeration(
        default="feature",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["prediction", "feature"],
        description=
        "Determines whether the output is a dataframe with just predictions,\
            or an additional feature added to the input dataframe.",
    )
class Hyperparams(hyperparams.Hyperparams):  
    datetime_index = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],  
        description='if multiple datetime indices exist, this HP specifies which to apply to training data. If \
            None, the primitive assumes there is only one datetime index. This HP can also specify multiple indices \
            which should be concatenated to form datetime_index')
    datetime_index_unique = hyperparams.UniformBool(
        default = False, 
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="whether the datetime "
    )
    datetime_filter = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='index of column in input dataset that contain unique identifiers of \
            time series that have different datetime indices')
    filter_index = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='index of column in input dataset that contain unique identifiers of different time series')
    n_periods = hyperparams.UniformInt(
        lower = 1, 
        upper = sys.maxsize, 
        default = 61, 
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
       description='number of periods to predict')
    interval = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='interval with which to sample future predictions')
    datetime_interval_exception = hyperparams.Hyperparameter[typing.Union[str, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='to handle different prediction intervals (stock market dataset). \
            If this HP is set, primitive will just make next forecast for this datetime value \
            (not multiple forecasts at multiple intervals')
    max_lags = hyperparams.UniformInt(
        lower = 1, 
        upper = sys.maxsize, 
        default = 10, 
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='maximum lag order to evluate to find model - eval criterion = AIC')
    arma_p = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = 0,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],  
        description='The p order of the ARMA model in case some time series are univariate')
    arma_q = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = 0,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],  
        description='The q order of the ARMA model in case some time series are univariate')
    weights_filter_value = hyperparams.Hyperparameter[typing.Union[str, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='value to select a filter from column filter index for which to return correlation  \
            coefficient matrix.')
    pass
Beispiel #12
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='HDBSCAN',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['DBSCAN', 'HDBSCAN'],
        description='type of clustering algorithm to use')
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'maximum distance between two samples for them to be considered as in the same neigborhood, \
        used in DBSCAN algorithm')
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='the minimum size of clusters')
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'The number of samples in a neighbourhood for a point to be considered a core point.'
    )
    cluster_selection_method = hyperparams.Enumeration(
        default='eom',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        values=['leaf', 'eom'],
        description=
        'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN'
    )
    required_output = hyperparams.Enumeration(
        default='feature',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['prediction', 'feature'],
        description=
        'Determines whether the output is a dataframe with just predictions,\
            or an additional feature added to the input dataframe.')
    pass
Beispiel #13
0
class InceptionV3Hyperparams(hyperparams.Hyperparams):
    minimum_frame = hyperparams.UniformInt(
        lower=1,
        upper=100000,
        default=40,
        description=
        "Specify the least amount of the frame in a video, if the video is too short with less frame, we will not consider to use for training",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])
    maximum_frame = hyperparams.UniformInt(
        lower=1,
        upper=100000,
        default=300,
        description=
        "Specify the max amount of the frame in a video, if the video is too long, we will not consider to use for training",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])
    generate_metadata = hyperparams.UniformBool(
        default=True,
        description=
        "A control parameter to set whether to generate metada after the feature extraction. It will be very slow if the columns length is very large. For the default condition, it will turn off to accelerate the program running.",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    do_preprocess = hyperparams.UniformBool(
        default=True,
        description=
        "A control parameter to set whether to do preprocess step on input tensor, it normally should be set as true",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])

    do_resize = hyperparams.UniformBool(
        default=True,
        description=
        "A control parameter to set whether to resize the input tensor to be correct shape as input, it normally should be set as true",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
    use_limitation = hyperparams.UniformBool(
        default=True,
        description=
        "A control parameter to consider the limitation of the maximum/minimum frame amount during processing. If set False, we will ignore the input videos outsite the frame amount limitation.",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='HDBSCAN',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['DBSCAN', 'HDBSCAN'],
        description='type of clustering algorithm to use')
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'maximum distance between two samples for them to be considered as in the same neigborhood, \
        used in DBSCAN algorithm')
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='the minimum size of clusters')
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'The number of samples in a neighbourhood for a point to be considered a core point.'
    )
    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    cluster_selection_method = hyperparams.Enumeration(
        default='eom',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        values=['leaf', 'eom'],
        description=
        'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN'
    )
    pass
Beispiel #15
0
class Hyperparams(hyperparams.Hyperparams):
    reduce_method = hyperparams.Enumeration(
        default="pca",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["pca", "svd"],
        description=
        "dimensionality reduction method that is applied to feature vectors",
    )
    reduce_dimension = hyperparams.UniformInt(
        lower=0,
        upper=1024,
        default=128,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="number of dimensions in reduced feature vectors",
    )
    gem_p = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=1,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "parameter p in generalized mean pooling; p > 1 increases the constrast of the \
                    pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \
                    max pooling.",
    )
    denominator_min = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=5,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "only ranks according to positive annotations until this many negative \
                    annotations are obtained",
    )
    dot_products_cache = hyperparams.Hyperparameter[str](
        default="dot_product_cache",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "already computed dot products will be cached in this location",
    )
Beispiel #16
0
class Hyperparams(hyperparams.Hyperparams):
    shapelet_length = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.1,
        upper_inclusive=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'base shapelet length, expressed as fraction of length of time series')
    num_shapelet_lengths = hyperparams.UniformInt(
        lower=1,
        upper=100,
        default=2,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of different shapelet lengths')
    # default epoch size from https://tslearn.readthedocs.io/en/latest/auto_examples/plot_shapelets.html#sphx-glr-auto-examples-plot-shapelets-py
    epochs = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=200,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of training epochs')
    learning_rate = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.1,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of different shapelet lengths')
    weight_regularizer = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.01,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of different shapelet lengths')
    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    pass
Beispiel #17
0
class Hyperparams(hyperparams.Hyperparams):
    shapelet_length = hyperparams.LogUniform(lower = 0, upper = 1, default = 0.1, 
        upper_inclusive = False, semantic_types = [
       'https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
       description = 'base shapelet length, expressed as fraction of length of time series')
    num_shapelet_lengths = hyperparams.UniformInt(lower = 1, upper = 100, default = 2, semantic_types=[
       'https://metadata.datadrivendiscovery.org/types/TuningParameter'], 
       description = 'number of different shapelet lengths')
    # default epoch size from https://tslearn.readthedocs.io/en/latest/auto_examples/plot_shapelets.html#sphx-glr-auto-examples-plot-shapelets-py
    epochs = hyperparams.UniformInt(lower = 1, upper = sys.maxsize, default = 200, semantic_types=[
       'https://metadata.datadrivendiscovery.org/types/TuningParameter'], 
       description = 'number of training epochs')
    pass
Beispiel #18
0
class Hyperparams(hyperparams.Hyperparams):
    # search over these hyperparameters to tune performance
    rank = hyperparams.UniformInt(default=5, lower=1, upper=500,
                                  description="desired rank of the decomposition",
                                  semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'])

    # control parameters determined once during pipeline building then fixed
    rankMultiplier = hyperparams.UniformInt(default=5, lower=3, upper=12, 
                                      description="work in dimension that is this multiple of the desired final rank", 
                                      semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'])
    numReps = hyperparams.UniformInt(default=1, lower=1, upper=20, 
                                      description="repeat the approximation this many times and take the best approximation",
                                      semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'])
class Hyperparams(hyperparams.Hyperparams):
    n_clusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=8,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='The dimension of the projection space')

    n_init = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'Number of times the k-means algorithm will be run with different centroid seeds'
    )

    n_neighbors = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'Number of neighbors when constructing the affintiy matrix using n-neighbors, ignored for affinity="rbf"'
    )

    affinity = hyperparams.Enumeration(
        default='rbf',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        values=['rbf', 'nearest_neighbors'],
        description='method to construct affinity matrix')

    task_type = hyperparams.Enumeration(
        default='classification',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['clustering', 'classification'],
        description=
        'Determines whether the output is a dataframe with just predictions,\
            or an additional feature added to the input dataframe.')
    pass
class Hyperparams(hyperparams.Hyperparams):
    nbins = hyperparams.UniformInt(
            lower=2,
            upper=21,
            default=10,
            description = 'The number of bins for discretization.',
            semantic_types = ['https://metadata.datadrivendiscovery.org/types/TuningParameter']
            )
    N0 = hyperparams.UniformInt(
        lower = 1,
        upper = 11,
        default = 5,
        description='The hyperparameter specifing the power of the prior. The larger the N0, the stronger the power of prior',
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/TuningParameter']
        )
Beispiel #21
0
class ForecastingESRNNHyperparams(hyperparams.Hyperparams):
    max_epochs = hyperparams.UniformInt(
        default=50,
        lower=0,
        upper=sys.maxsize,
        description="epochs to do on fit process",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
    batch_size = hyperparams.UniformInt(
        default=8,
        lower=1,
        upper=10000,
        description="The batch size for RNN training",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
    learning_rate = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-3,
        description='Learning rate used during training (fit).')
    seasonality = hyperparams.UniformInt(
        default=30,
        lower=1,
        upper=10000,
        description="",  # TODO
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
    input_size = hyperparams.UniformInt(
        default=30,
        lower=1,
        upper=10000,
        description="",  # TODO
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
    output_size = hyperparams.UniformInt(
        default=60,
        lower=1,
        upper=10000,
        description="",  # TODO
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter",
        ])
Beispiel #22
0
class Hyperparams(hyperparams.Hyperparams):
    nbins = hyperparams.UniformInt(
            lower=2,
            upper=21,
            default=10,
            description = 'The number of bins for discretization.',
            semantic_types = ['https://metadata.datadrivendiscovery.org/types/TuningParameter']
            )
    method = hyperparams.Enumeration[str](
            values=['counting', 'pseudoBayesian','fullBayesian'],
            default='counting',
            description='The method for mutual information estimation.',
            semantic_types = ['https://metadata.datadrivendiscovery.org/types/TuningParameter']
            )
    strategy = hyperparams.Enumeration[str](
            values=['uniform', 'quantile'],
            default='uniform',
            description='The method for KBins Discretizer.',
            semantic_types = ['https://metadata.datadrivendiscovery.org/types/TuningParameter']
            )
    problem_type = hyperparams.Enumeration[str](
            values=['classification', 'regression'],
            default='classification',
            description='The task types',
            semantic_types = ['https://metadata.datadrivendiscovery.org/types/TuningParameter']
            )
class Hyperparams(hyperparams.Hyperparams):
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=256,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="inference batch size",
    )
    height = hyperparams.Hyperparameter[typing.Optional[int]](
        default=4,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Height of pooled images",
    )
    width = hyperparams.Hyperparameter[typing.Optional[int]](
        default=4,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Width of pooled images",
    )
Beispiel #24
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='GlobalAlignmentKernelKMeans',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans'],
        description='type of clustering algorithm to use')
    nclusters = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=3,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='number of clusters \
        to user in kernel kmeans algorithm')
    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    pass
Beispiel #25
0
class Hyperparams(hyperparams.Hyperparams):
    reduce_method = hyperparams.Enumeration(
        default = 'pca', 
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        values = ['pca', 'svd'],
        description = 'dimensionality reduction method that is applied to feature vectors'
    )
    reduce_dimension = hyperparams.UniformInt(
        lower=0,
        upper=1024,
        default=128,
        upper_inclusive=True,
        semantic_types=["https://metadata.datadrivendiscovery.org/types/ControlParameter"],
        description="number of dimensions in reduced feature vectors",
    )
    gem_p = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=1,
        upper_inclusive=True,
        semantic_types=["https://metadata.datadrivendiscovery.org/types/TuningParameter"],
        description="parameter p in generalized mean pooling; p > 1 increases the constrast of the \
                    pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \
                    max pooling.",
    )
class Hyperparams(hyperparams.Hyperparams):
    n_neighbors = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "number of neighbors on which to make classification decision",
    )
    distance_metric = hyperparams.Enumeration(
        default="euclidean",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["euclidean", "dtw"],
        description=
        "whether to use euclidean or dynamic time warping distance metric in KNN computation",
    )
    sample_weighting = hyperparams.Enumeration(
        default="uniform",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        values=["uniform", "inverse_distance"],
        description=
        "whether to weight points uniformly or by the inverse of their distance",
    )
Beispiel #27
0
class Hyperparams(hyperparams.Hyperparams):
    k = hyperparams.UniformInt(
        lower=1,
        upper=100,
        default=10,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "number of neighbors to use when constructing k-NN adjacency matrix",
    )
    alpha = hyperparams.Uniform(
        lower=0,
        upper=1,
        default=0.85,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="controls step size during label propagations",
    )
    n_iterations = hyperparams.UniformInt(
        lower=10,
        upper=100,
        default=50,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="number of iterations during label propagations",
    )
    all_scores = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to return scores for all classes from produce method",
    )
    normalize_features = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="whether to L2 normalize feature vectors",
    )
class Hyperparams(hyperparams.Hyperparams):
    nbins = hyperparams.UniformInt(
        lower=2,
        upper=21,
        default=10,
        description='The number of bins for discretization.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
Beispiel #29
0
class DataFrameToTensorHyperparams(hyperparams.Hyperparams):
    process_amount = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=1,
        description=
        "Specify number of current processes used to read in the images. Default is no multiprocessing.",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])

    resize_X = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=224,
        description="Specify the resized shape[0] of the resized image",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])

    resize_Y = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=224,
        description="Specify the resized shape[1] of the resized image",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])

    image_layer = hyperparams.UniformInt(
        lower=1,
        upper=3,
        upper_inclusive=True,
        default=3,
        description=
        "Specify the output image layer, default value is 3 which corresponds to color images",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
Beispiel #30
0
class DataFrameToTensorHyperparams(hyperparams.Hyperparams):
    process_amount = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=os.cpu_count(),
        description=
        "Specify number of processes that generated in same time when reading images, default value will be all cores in the system",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    resize_X = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=224,
        description="Specify the resized shape[0] of the resized image",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    resize_Y = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=224,
        description="Specify the resized shape[1] of the resized image",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    image_layer = hyperparams.UniformInt(
        lower=1,
        upper=3,
        upper_inclusive=True,
        default=3,
        description=
        "Specify the output image layer, default value is 3 which corresponds to color images",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])