class CleaningFeaturizerHyperparameter(hyperparams.Hyperparams): features = hyperparams.Hyperparameter[typing.Union[str, None]]( None, description= 'Select one or more operations to perform: "split_phone_number_column", "split_date_column", "split_alpha_numeric_column", "split_multi_value_column"', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) num_threshold = hyperparams.Uniform( default=0.1, lower=0.1, upper=0.5, upper_inclusive=True, description='Threshold for number character density of a column', semantic_types=[ 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) common_threshold = hyperparams.Uniform( default=0.9, lower=0.7, upper=0.9, upper_inclusive=True, description='Threshold for rows containing specific punctuation', semantic_types=[ 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ])
class IQRHyperparams(hyperparams.Hyperparams): quantile_range_lowerbound = hyperparams.Uniform( lower=0.0, upper=25.0, default=25.0, upper_inclusive=True, description="IQR - Quantile range used to calculate scale", semantic_types=["http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/TuningParameter"] ) quantile_range_upperbound = hyperparams.Uniform( lower=75.0, upper=100.0, default=75.0, upper_inclusive=True, description="IQR - Quantile range used to calculate scale", semantic_types=["http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/TuningParameter"] ) with_centering = hyperparams.UniformBool( default=True, description=" If True, center the data before scaling ", semantic_types=["http://schema.org/Boolean", "https://metadata.datadrivendiscovery.org/types/TuningParameter"] ) with_scaling = hyperparams.UniformBool( default=True, description="If True, scale the data to unit variance (or equivalently, unit standard deviation).", semantic_types=["http://schema.org/Boolean", "https://metadata.datadrivendiscovery.org/types/TuningParameter"] )
class Hyperparams(hyperparams.Hyperparams): shapelet_length = hyperparams.Uniform( lower=0.0, upper=1.0, default=0.1, upper_inclusive=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'base shapelet length, expressed as fraction of length of time series') num_shapelet_lengths = hyperparams.UniformInt( lower=1, upper=100, default=2, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of different shapelet lengths') # default epoch size from https://tslearn.readthedocs.io/en/latest/auto_examples/plot_shapelets.html#sphx-glr-auto-examples-plot-shapelets-py epochs = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=200, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of training epochs') learning_rate = hyperparams.Uniform( lower=0.0, upper=1.0, default=0.1, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of different shapelet lengths') weight_regularizer = hyperparams.Uniform( lower=0.0, upper=1.0, default=0.01, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of different shapelet lengths') long_format = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "whether the input dataset is already formatted in long format or not") pass
class Hyperparams(hyperparams.Hyperparams): reduce_method = hyperparams.Enumeration( default = 'pca', semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], values = ['pca', 'svd'], description = 'dimensionality reduction method that is applied to feature vectors' ) reduce_dimension = hyperparams.UniformInt( lower=0, upper=1024, default=128, upper_inclusive=True, semantic_types=["https://metadata.datadrivendiscovery.org/types/ControlParameter"], description="number of dimensions in reduced feature vectors", ) gem_p = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=1, upper_inclusive=True, semantic_types=["https://metadata.datadrivendiscovery.org/types/TuningParameter"], description="parameter p in generalized mean pooling; p > 1 increases the constrast of the \ pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \ max pooling.", )
class Hyperparams(hyperparams.Hyperparams): n_components = hyperparams.Hyperparameter[typing.Optional[int]]( default=None, description= 'Number of components (< n_classes - 1) for dimensionality reduction.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], ) learning_rate = hyperparams.Uniform( lower=0.01, upper=2, default=0.1, description= 'Learning rate shrinks the contribution of each classifier by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter', 'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', ], ) array1 = hyperparams.Hyperparameter[container.ndarray]( default=container.ndarray(numpy.array([[1, 2], [3, 4]]), generate_metadata=True), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], ) array2 = hyperparams.Hyperparameter[container.DataFrame]( default=container.DataFrame([[1, 2], [3, 4]], generate_metadata=True), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], )
class SplitterHyperparameter(hyperparams.Hyperparams): threshold_column_length = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=300, description='The threshold value of amount of column in a dataframe, if the value is larger, it will be splitted (sampled).', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] ) threshold_row_length = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=100000, description='The threshold value of amount of row in a dataframe, if the value is larger, it will be splitted (sampled).', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] ) further_reduce_threshold_column_length = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=200, description='The threshold of column amount to further reduce the threshold_row_length value for the condition that both the amount of column and row are very large', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] ) further_reduce_ratio = hyperparams.Uniform( lower=0, upper=1, default=0.5, upper_inclusive = True, description='The ratio to further reduce the threshold_row_length value for the condition that both the amount of column and row are very large', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class DataFeaturizerHyperparameter(hyperparams.Hyperparams): create_year = hyperparams.UniformBool( default = True, description = 'define whether to create the year column or not', semantic_types=['http://schema.org/Boolean','https://metadata.datadrivendiscovery.org/types/ControlParameter']) create_month = hyperparams.UniformBool( default = True, description = 'define whether to create the month column or not', semantic_types=['http://schema.org/Boolean','https://metadata.datadrivendiscovery.org/types/ControlParameter']) create_day = hyperparams.UniformBool( default = True, description = 'define whether to create the day column or not', semantic_types=['http://schema.org/Boolean','https://metadata.datadrivendiscovery.org/types/ControlParameter']) create_day_of_week = hyperparams.UniformBool( default = True, description = 'define whether to create the day of week column or not', semantic_types=['http://schema.org/Boolean','https://metadata.datadrivendiscovery.org/types/ControlParameter']) min_threshold = hyperparams.Uniform( default = 0.9, lower = 0.0, upper = 1.0, upper_inclusive = True, description = 'Fraction of values required to be parsed as dates in order to featurize the column', semantic_types=['http://schema.org/Float','https://metadata.datadrivendiscovery.org/types/ControlParameter']) extractor_settings = hyperparams.Hyperparameter[typing.Union[str, None]]( None, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="[Dict] Extractor settings for the date parser ", )
class Hyperparams(hyperparams.Hyperparams): threshold = hyperparams.Uniform(lower = 0.0, upper = 1.0, default = 0.0, upper_inclusive = False, semantic_types = [ 'https://metadata.datadrivendiscovery.org/types/TuningParameter'], description = 'pca score threshold for feature selection') only_numeric_cols = hyperparams.UniformBool(default = True, semantic_types = [ 'https://metadata.datadrivendiscovery.org/types/TuningParameter'], description="consider only numeric columns for feature selection")
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default="HDBSCAN", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["DBSCAN", "HDBSCAN"], description="type of clustering algorithm to use", ) eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "maximum distance between two samples for them to be considered as in \ the same neigborhood, used in DBSCAN algorithm", ) min_cluster_size = hyperparams.UniformInt( lower=2, upper=sys.maxsize, default=5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="the minimum size of clusters", ) min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "The number of samples in a neighbourhood for a point to be considered a core point.", ) cluster_selection_method = hyperparams.Enumeration( default="eom", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], values=["leaf", "eom"], description= "Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN", ) required_output = hyperparams.Enumeration( default="feature", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["prediction", "feature"], description= "Determines whether the output is a dataframe with just predictions,\ or an additional feature added to the input dataframe.", )
class Hyperparams(hyperparams.Hyperparams): overwrite = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to overwrite manual annotations with SIMON annotations", ) statistical_classification = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "whether to append categorical / ordinal annotations using rule-based classification", ) multi_label_classification = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "whether to perfrom multi-label classification and append multiple annotations to metadata", ) max_rows = hyperparams.UniformInt( lower=100, upper=2000, default=500, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "maximum number of rows to consider when classifying data type of specific column", ) max_chars = hyperparams.UniformInt( lower=1, upper=100, default=20, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "maximum number of characters to consider when processing row", ) p_threshold = hyperparams.Uniform( lower=0, upper=1.0, default=0.5, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= """probability threshold to use when decoding classification results. Predictions above p_threshold will be returned""", )
class Hyperparams(hyperparams.Hyperparams): records_fraction = hyperparams.Uniform( lower=0, upper=1, default=1, upper_inclusive=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='percentage of records to sub-sample from the data frame')
def setup(self): self.numerical = hyperparams.Uniform( lower=0, upper=1, default=0.5, ) self.enumeration = hyperparams.Enumeration( values=list(range(1000)), default=0, )
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='HDBSCAN', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['DBSCAN', 'HDBSCAN'], description='type of clustering algorithm to use') eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'maximum distance between two samples for them to be considered as in the same neigborhood, \ used in DBSCAN algorithm') min_cluster_size = hyperparams.UniformInt( lower=2, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='the minimum size of clusters') min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'The number of samples in a neighbourhood for a point to be considered a core point.' ) cluster_selection_method = hyperparams.Enumeration( default='eom', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], values=['leaf', 'eom'], description= 'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN' ) required_output = hyperparams.Enumeration( default='feature', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['prediction', 'feature'], description= 'Determines whether the output is a dataframe with just predictions,\ or an additional feature added to the input dataframe.') pass
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='HDBSCAN', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['DBSCAN', 'HDBSCAN'], description='type of clustering algorithm to use') eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'maximum distance between two samples for them to be considered as in the same neigborhood, \ used in DBSCAN algorithm') min_cluster_size = hyperparams.UniformInt( lower=2, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='the minimum size of clusters') min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'The number of samples in a neighbourhood for a point to be considered a core point.' ) long_format = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "whether the input dataset is already formatted in long format or not") cluster_selection_method = hyperparams.Enumeration( default='eom', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], values=['leaf', 'eom'], description= 'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN' ) pass
class Hyperparams(hyperparams.Hyperparams): reduce_method = hyperparams.Enumeration( default="pca", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["pca", "svd"], description= "dimensionality reduction method that is applied to feature vectors", ) reduce_dimension = hyperparams.UniformInt( lower=0, upper=1024, default=128, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="number of dimensions in reduced feature vectors", ) gem_p = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=1, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "parameter p in generalized mean pooling; p > 1 increases the constrast of the \ pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \ max pooling.", ) denominator_min = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=5, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "only ranks according to positive annotations until this many negative \ annotations are obtained", ) dot_products_cache = hyperparams.Hyperparameter[str]( default="dot_product_cache", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "already computed dot products will be cached in this location", )
class Hyperparams(hyperparams.Hyperparams): split_on_column_with_avg_len = hyperparams.Uniform( default=30, lower=10, upper=100, upper_inclusive=True, description='Threshold of avg column length for splitting punctuation or alphanumeric', semantic_types=['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/ControlParameter']) metafeatures = hyperparams.Set( metafeature_hyperparam, default_metafeatures, min_size=1, max_size=len(computable_metafeatures), description="Compute metadata descriptions of the dataset", semantic_types=['https://metadata.datadrivendiscovery.org/types/MetafeatureParameter'])
class CorexContinuous_Hyperparams(hyperparams.Hyperparams): n_hidden = Union( OrderedDict([ ('n_hidden int', hyperparams.Uniform( lower=1, upper=50, default=2, q=1, description='number of hidden factors learned')), ('n_hidden pct', hyperparams.Uniform( lower=0, upper=.50, default=.2, q=.05, description= 'number of hidden factors as percentage of # input columns')) ]), default='n_hidden pct', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])
class Hyperparams(hyperparams.Hyperparams): proportion_of_features = hyperparams.Uniform( lower=0.0, upper=1.0, default=1.0, upper_inclusive=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='proportion of top features from input dataset to keep') only_numeric_cols = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="consider only numeric columns for feature selection")
class Hyperparams(hyperparams.Hyperparams): k = hyperparams.UniformInt( lower=1, upper=100, default=10, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "number of neighbors to use when constructing k-NN adjacency matrix", ) alpha = hyperparams.Uniform( lower=0, upper=1, default=0.85, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="controls step size during label propagations", ) n_iterations = hyperparams.UniformInt( lower=10, upper=100, default=50, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="number of iterations during label propagations", ) all_scores = hyperparams.UniformBool( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to return scores for all classes from produce method", ) normalize_features = hyperparams.UniformBool( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="whether to L2 normalize feature vectors", )
class Hyperparams(hyperparams.Hyperparams): continuity_option = hyperparams.Enumeration( values=['ablation', 'imputation'], default='imputation', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="Choose ablation or imputation the original data", ) interval = hyperparams.Uniform( default=1, lower=0.000000001, upper=10000000000, description='Only used in imputation, give the timestamp interval.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='GlobalAlignmentKernelKMeans', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=[ 'GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans', 'DBSCAN', 'HDBSCAN' ], description='type of clustering algorithm to use') nclusters = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=3, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of clusters \ to user in kernel kmeans algorithm') eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'maximum distance between two samples for them to be considered as in the same neigborhood, \ used in DBSCAN algorithm') min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'number of samples in a neighborhood for a point to be considered as a core point, \ used in DBSCAN and HDBSCAN algorithms') pass
class Hyperparams(hyperparams.Hyperparams): ''' eps : Maximum distortion rate as defined by the Johnson-Lindenstrauss lemma. ''' eps = hyperparams.Uniform( lower=0.1, upper=0.5, default=0.2, semantic_types=[ "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/TuningParameter" ]) generate_metadata = hyperparams.UniformBool( default=True, description= "A control parameter to set whether to generate metada after the feature extraction. It will be very slow if the columns length is very large. For the default condition, it will turn off to accelerate the program running.", semantic_types=[ "http://schema.org/Boolean", "https://metadata.datadrivendiscovery.org/types/ControlParameter" ])
class IterativeLabelingHyperparams(hyperparams.Hyperparams): iters = hyperparams.UniformInt( lower=1, upper=100, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='The number of iterations of labeling') frac = hyperparams.Uniform( lower=0.01, upper=1.0, default=0.2, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='The fraction of unlabeled item to label') blackbox = hyperparams.Primitive[SupervisedLearnerPrimitiveBase]( primitive_families=[PrimitiveFamily.CLASSIFICATION], default=SKRandomForestClassifier, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='Black box model for the classification')
class CleaningFeaturizerHyperparameter(hyperparams.Hyperparams): features = hyperparams.Hyperparameter[Union[str, None]]( None, description= 'Select one or more operations to perform: "split_date_column", "split_phone_number_column", "split_alpha_numeric_column", "split_multi_value_column"', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) split_on_column_with_avg_len = hyperparams.Uniform( default=30, lower=10, upper=100, upper_inclusive=True, description= 'Threshold of avg column length for splitting punctuation or alphanumeric', semantic_types=[ 'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) num_threshold = hyperparams.Uniform( default=0.1, lower=0.1, upper=0.5, upper_inclusive=True, description='Threshold for number character density of a column', semantic_types=[ 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) common_threshold = hyperparams.Uniform( default=0.9, lower=0.7, upper=0.9, upper_inclusive=True, description='Threshold for rows containing specific punctuation', semantic_types=[ 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='replace', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", )
class Hyperparams(hyperparams.Hyperparams): weights_filepath = hyperparams.Hyperparameter[str]( default='model_weights.pth', semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="weights of trained model will be saved to this filepath", ) image_dim = hyperparams.UniformInt( lower=1, upper=512, default=120, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="input dimension of image (height and width)", ) feature_dim = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=2048, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "feature dimension after reshaping flattened feature vector", ) batch_size = hyperparams.UniformInt( lower=1, upper=512, default=256, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="training and inference batch size", ) epochs = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=25, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'how many epochs for which to finetune classification head (happens first)' ) learning_rate = hyperparams.Uniform( lower=0.0, upper=1.0, default=0.1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="learning rate", ) # explanation_method = hyperparams.Enumeration( # default = 'gradcam', # semantic_types = [ # 'https://metadata.datadrivendiscovery.org/types/ControlParameter' # ], # values = [ # 'gradcam', # 'gradcam-gbprop' # ], # description = 'Determines whether the output is a dataframe with just predictions,\ # or an additional feature added to the input dataframe.' # ) explain_all_classes = hyperparams.UniformBool( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to return explanations for all classes or only the predicted class" ) all_confidences = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to return explanations all classes and all confidences from produce method" )
class Hyperparams(hyperparams.Hyperparams): # Tuning lags = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(1, ), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="Set of lag indices to use in model.", ) K = hyperparams.UniformInt( lower=0, upper=100000000, default=2, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Length of latent embedding dimension.", ) lambda_f = hyperparams.Uniform( lower=0, upper=100000000, default=1.0, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Regularization parameter used for matrix F.", ) lambda_x = hyperparams.Uniform( lower=0, upper=100000000, default=1.0, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Regularization parameter used for matrix X.", ) lambda_w = hyperparams.Uniform( lower=0, upper=100000000, default=1.0, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Regularization parameter used for matrix W.", ) alpha = hyperparams.Uniform( lower=0, upper=100000000, default=1000.0, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= "Regularization parameter used for make the sum of lag coefficient close to 1. That helps to avoid big deviations when forecasting.", ) eta = hyperparams.Uniform( lower=0, upper=100000000, default=1.0, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= "Regularization parameter used for X when undercovering autoregressive dependencies.", ) max_iter = hyperparams.UniformInt( lower=0, upper=100000000, default=1000, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Number of iterations of updating matrices F, X and W.", ) F_step = hyperparams.Uniform( lower=0, upper=100000000, default=0.0001, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Step of gradient descent when updating matrix F.", ) X_step = hyperparams.Uniform( lower=0, upper=100000000, default=0.0001, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Step of gradient descent when updating matrix X.", ) W_step = hyperparams.Uniform( lower=0, upper=100000000, default=0.0001, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description="Step of gradient descent when updating matrix W.", ) # Control use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='append', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=[ 'https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute' ], default='https://metadata.datadrivendiscovery.org/types/Attribute', description= 'Decides what semantic type to attach to generated attributes', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ])
class Hyperparams(hyperparams.Hyperparams): weights_filepath = hyperparams.Hyperparameter[str]( default='model_weights.h5', semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="weights of trained model will be saved to this filepath", ) emb_dim = hyperparams.UniformInt( lower=8, upper=256, default=32, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "number of cells to use in the categorical embedding component of the model", ) lstm_dim = hyperparams.UniformInt( lower=8, upper=256, default=32, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="number of cells to use in the lstm component of the model", ) epochs = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=10, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="number of training epochs", ) steps_per_epoch = hyperparams.UniformInt( lower=5, upper=200, default=10, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="number of steps to do per epoch", ) early_stopping_patience = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "number of epochs to wait before invoking early stopping criterion", ) early_stopping_delta = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=0, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "early stopping will interpret change of < delta in desired direction " + "will increment early stopping counter state", ) learning_rate = hyperparams.Uniform( lower=0.0, upper=1.0, default=1e-3, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="learning rate", ) batch_size = hyperparams.UniformInt( lower=1, upper=256, default=64, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="batch size", ) dropout_rate = hyperparams.Uniform( lower=0.0, upper=1.0, default=0.2, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "dropout to use in lstm model (input and recurrent transform)", ) count_data = hyperparams.Union[typing.Union[bool, None]]( configuration=collections.OrderedDict( user_selected=hyperparams.UniformBool(default=True), auto_selected=hyperparams.Hyperparameter[None](default=None), ), default="auto_selected", description= "Whether we should label the target column as real or count (positive) " + "based on user input or automatic selection. For example, user might want to specify " + "positive only count data if target column is real-valued, but domain is > 0", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], ) window_size = hyperparams.UniformInt( lower=10, upper=sys.maxsize, default=20, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="window size of sampled time series in training process", ) negative_obs = hyperparams.UniformInt( lower=0, upper=10, default=1, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "whether to sample time series with padded observations before t=0 in training ", ) val_split = hyperparams.Uniform( lower=0.0, upper=1.0, default=0, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "proportion of training records to set aside for validation. Ignored " + "if iterations flag in `fit` method is not None", ) # seed_predictions_with_all_data = hyperparams.UniformBool( # default=True, # semantic_types=[ # "https://metadata.datadrivendiscovery.org/types/TuningParameter" # ], # description="whether to pass all batches of training data through model before making test predictions " # + "otherwise only one batch of training data (of length window size) will be passed through model", # ) confidence_interval_horizon = hyperparams.UniformInt( lower=1, upper=100, default=2, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "horizon for confidence interval forecasts. Exposed through auxiliary " + "'produce_confidence_intervals' method", ) confidence_interval_alpha = hyperparams.Uniform( lower=0.01, upper=1, default=0.1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "significance level for confidence interval, i.e. alpha = 0.05 " + "returns a 95%% confdience interval from alpha / 2 to 1 - (alpha / 2) " + "Exposed through auxiliary 'produce_confidence_intervals' method ", ) confidence_interval_samples = hyperparams.UniformInt( lower=1, upper=1000, default=100, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "number of samples to draw at each timestep, which will be used to calculate " + "confidence intervals", )
class Hyperparams(hyperparams.Hyperparams): max_lag_order = hyperparams.Union[Union[int, None]]( configuration=collections.OrderedDict( user_selected=hyperparams.UniformInt(lower=0, upper=100, default=1), auto_selected=hyperparams.Hyperparameter[None]( default=None, description="Lag order of regressions automatically selected", ), ), default="user_selected", description= "The lag order to apply to regressions. If user-selected, the same lag will be " + "applied to all regressions. If auto-selected, different lags can be selected for different " + "regressions.", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], ) seasonal = hyperparams.UniformBool( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to perform ARIMA prediction with seasonal component", ) seasonal_differencing = hyperparams.UniformInt( lower=1, upper=365, default=1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "period of seasonal differencing to use in ARIMA prediction", ) dynamic = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to perform dynamic in-sample prediction with ARIMA model", ) interpret_value = hyperparams.Enumeration( default="lag_order", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["series", "lag_order"], description= "whether to return weight coefficients for each series or each lag order " + "separately in the regression", ) interpret_pooling = hyperparams.Enumeration( default="avg", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["avg", "max"], description="whether to pool weight coefficients via average or max", ) confidence_interval_horizon = hyperparams.UniformInt( lower=1, upper=100, default=2, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "horizon for confidence interval forecasts. Exposed through auxiliary " + "'produce_confidence_intervals' method", ) confidence_interval_alpha = hyperparams.Uniform( lower=0.01, upper=1, default=0.1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "significance level for confidence interval, i.e. alpha = 0.05 " + "returns a 95%% confdience interval from alpha / 2 to 1 - (alpha / 2) . " + "Exposed through auxiliary 'produce_confidence_intervals' method", )
class IQRHyperparams(hyperparams.Hyperparams): quantile_range_lowerbound = hyperparams.Uniform( lower=0.0, upper=25.0, default=25.0, upper_inclusive=True, description="IQR - Quantile range used to calculate scale", semantic_types=[ "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/TuningParameter" ]) quantile_range_upperbound = hyperparams.Uniform( lower=75.0, upper=100.0, default=75.0, upper_inclusive=True, description="IQR - Quantile range used to calculate scale", semantic_types=[ "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/TuningParameter" ]) with_centering = hyperparams.UniformBool( default=True, description=" If True, center the data before scaling ", semantic_types=[ "http://schema.org/Boolean", "https://metadata.datadrivendiscovery.org/types/TuningParameter" ]) with_scaling = hyperparams.UniformBool( default=True, description="If True, scale the data to unit variance " "(or equivalently, unit standard deviation).", semantic_types=[ "http://schema.org/Boolean", "https://metadata.datadrivendiscovery.org/types/TuningParameter" ]) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='replace', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", )
class Hyperparams(hyperparams.Hyperparams): weights_filepath = hyperparams.Hyperparameter[str]( default="model_weights.pth", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="weights of trained model will be saved to this filepath", ) image_dim = hyperparams.UniformInt( lower=1, upper=512, default=120, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="input dimension of image (height and width)", ) feature_dim = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=2048, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "feature dimension after reshaping flattened feature vector", ) batch_size = hyperparams.UniformInt( lower=1, upper=512, default=256, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="training and inference batch size", ) epochs = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=25, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "how many epochs for which to finetune classification head (happens first)", ) learning_rate = hyperparams.Uniform( lower=0.0, upper=1.0, default=0.1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="learning rate", ) explain_all_classes = hyperparams.UniformBool( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to return explanations for all classes or only the predicted class", ) all_confidences = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to return explanations for all classes and all confidences from produce method", )