class UEncHyperparameter(hyperparams.Hyperparams): text2int = hyperparams.UniformBool( default=False, description='Whether to convert everything to numerical. For text columns, each row may get converted into a column', semantic_types=['http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/ControlParameter']) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='replace', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. The default is all columns.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) drop_non_numeric_columns = hyperparams.Hyperparameter[bool]( default=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="If True, drop all non-numeric columns", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= 'A set of column indices to not operate on. Applicable only if "use_columns" is not provided.', ) return_result = hyperparams.Enumeration( values=["append", "replace", "new"], default="append", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?", ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= 'Also include primary index columns if input data has them. Applicable only if "return_result" is set to "new".', ) compress_data = hyperparams.Hyperparameter[bool]( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="If True, applies LZO compression algorithm to the data.\ Store a header consisting of the dtype character and the data shape as unsigned integers.\ Given c struct alignment, will occupy 16 bytes (1 + 4 + 4 + 4 + 3 ) padding", ) n_jobs = hyperparams.Hyperparameter[int]( default=64, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="The value of the n_jobs parameter for the joblib library", ) band_column = hyperparams.Hyperparameter[str]( default="band", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Name of the band column used if no band semantic type is present.", )
class Hyperparams(hyperparams.Hyperparams): """ Hyper-parameters """ use_row_iter = hyperparams.UniformBool( default=False, description= "Whether or not to use row iteration inplace of column interation on dataframe", semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ]) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be cast to the type, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", )
class Hyperparams(hyperparams.Hyperparams): # Added by Guanchu with_mean = hyperparams.UniformBool( default=True, description='If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) with_std = hyperparams.UniformBool( default=True, description='If True, scale the data to unit variance (or equivalently, unit standard deviation).', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) # copy = hyperparams.UniformBool( # default=True, # description='If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned.', # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] # ) # Keep previous use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], default='https://metadata.datadrivendiscovery.org/types/Attribute', description='Decides what semantic type to attach to generated attributes', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class Hyperparams(hyperparams.Hyperparams): # Added by Mia endog = hyperparams.Bounded[int]( lower = 2, upper = None, default = 3, description='Array like time seires.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) # keep previous norm = hyperparams.Enumeration[str]( default='l2', values=['l1', 'l2', 'max'], description='The norm to use to normalize each non zero sample.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], default='https://metadata.datadrivendiscovery.org/types/Attribute', description='Decides what semantic type to attach to generated attributes', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class Hyperparams(hyperparams.Hyperparams): window_size = hyperparams.UniformInt( lower = 0, upper = 100, #TODO: Define the correct the upper bound default=50, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="window size to calculate" ) # Keep previous dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]]( default=None, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.", ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(2,), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(0,1,3,), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], default='https://metadata.datadrivendiscovery.org/types/Attribute', description='Decides what semantic type to attach to generated attributes', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class Hyperparams(hyperparams.Hyperparams): max_percent_null = hyperparams.Bounded[float]( default=.5, lower=0, upper=1, description= 'The maximum percentage of null values allowed in returned features. A lower value means features may have more null nulls.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) max_correlation = hyperparams.Bounded[float]( default=.9, lower=0, upper=1, description= 'The maximum allowed correlation between any two features returned. A lower value means features will be more uncorrelated', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) max_features = hyperparams.Hyperparameter[int]( default=100, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= "Cap the number of generated features to this number. If -1, no limit." )
class Hyperparams(hyperparams.Hyperparams): parsing_semantics = hyperparams.Set( elements=hyperparams.Enumeration( values=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", "https://metadata.datadrivendiscovery.org/types/CategoricalData", ], default="http://schema.org/Float", ), default=( "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", ), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.", ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description='A set of column indices to not operate on. Applicable only if "use_columns" is not provided.', ) error_handling = hyperparams.Enumeration[str]( default="coerce", values=("ignore", "raise", "coerce"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Setting to deal with error when converting a column to numeric value.", ) fuzzy_time_parsing = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Use fuzzy time parsing.", )
class UnfoldHyperparams(hyperparams.Hyperparams): unfold_semantic_types = hyperparams.Set( elements=hyperparams.Hyperparameter[str]("str"), default=[ "https://metadata.datadrivendiscovery.org/types/PredictedTarget" ], semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description=""" A set of semantic types that the primitive will unfold. Only 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' by default. """, ) use_pipeline_id_semantic_type = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description=""" Controls whether semantic_type will be used for finding pipeline id column in input dataframe. If true, it will look for 'https://metadata.datadrivendiscovery.org/types/PipelineId' for pipeline id column, and create attribute columns using header: attribute_{pipeline_id}. eg. 'binaryClass_{a3180751-33aa-4790-9e70-c79672ce1278}' If false, create attribute columns using header: attribute_{0,1,2,...}. eg. 'binaryClass_0', 'binaryClass_1' """, )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) n_clusters = hyperparams.Hyperparameter[int]( default=8, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Number of clusters to generate", ) cluster_col_name = hyperparams.Hyperparameter[str]( default="__cluster", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "The name of created cluster column in the returned dataframe", )
class Hyperparams(hyperparams.Hyperparams): metric = hyperparams.Enumeration[str]( values=classification_metrics + regression_metrics + clustering_metrics, default="f1Macro", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "The D3M scoring metric to use during the fit phase. This can be any of the regression, classification or " + "clustering metrics.", ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) encoder_type = hyperparams.Enumeration( default="svm", values=["svm", "tfidf"], semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Vectorization Strategy.", )
class Hyperparams(hyperparams.Hyperparams): rampup_timeout = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=100, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "timeout, how much time to give elastic search database to startup, may vary based on infrastructure", ) target_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "indices of column with geolocation formatted as text that should be converted to lat,lon pairs", ) cache_size = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=2000, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="LRU cache size", )
class Hyperparams(hyperparams.Hyperparams): datetime_index = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='if multiple datetime indices exist, this HP specifies which to apply to training data. If \ None, the primitive assumes there is only one datetime index. This HP can also specify multiple indices \ which should be concatenated to form datetime_index') datetime_index_unique = hyperparams.UniformBool( default = False, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="whether the datetime " ) datetime_filter = hyperparams.Hyperparameter[typing.Union[int, None]]( default = None, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='index of column in input dataset that contain unique identifiers of \ time series that have different datetime indices') filter_index = hyperparams.Hyperparameter[typing.Union[int, None]]( default = None, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='index of column in input dataset that contain unique identifiers of different time series') n_periods = hyperparams.UniformInt( lower = 1, upper = sys.maxsize, default = 61, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='number of periods to predict') interval = hyperparams.Hyperparameter[typing.Union[int, None]]( default = None, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='interval with which to sample future predictions') datetime_interval_exception = hyperparams.Hyperparameter[typing.Union[str, None]]( default = None, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='to handle different prediction intervals (stock market dataset). \ If this HP is set, primitive will just make next forecast for this datetime value \ (not multiple forecasts at multiple intervals') max_lags = hyperparams.UniformInt( lower = 1, upper = sys.maxsize, default = 10, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='maximum lag order to evluate to find model - eval criterion = AIC') arma_p = hyperparams.Hyperparameter[typing.Union[int, None]]( default = 0, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='The p order of the ARMA model in case some time series are univariate') arma_q = hyperparams.Hyperparameter[typing.Union[int, None]]( default = 0, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='The q order of the ARMA model in case some time series are univariate') weights_filter_value = hyperparams.Hyperparameter[typing.Union[str, None]]( default = None, semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='value to select a filter from column filter index for which to return correlation \ coefficient matrix.') pass
class Hyperparams(hyperparams.Hyperparams): grouping_key_col = hyperparams.Hyperparameter[typing.Union[int, None]]( default=None, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="The GroupKey column index for the time series data.", ) time_col = hyperparams.Hyperparameter[typing.Union[int, None]]( default=None, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="", ) value_cols = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Columns needed in the dataset; all other columns will be removed.", ) granularity = hyperparams.Enumeration[str]( default="months", values=("seconds", "minutes", "hours", "days", "weeks", "months", "years"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="The granularity of the time series timestamp values.", ) binning_operation = hyperparams.Enumeration[str]( default="sum", values=("sum", "mean", "min", "max"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Setting operation to bin time series data with.", ) binning_size = hyperparams.Hyperparameter[int]( default=5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "If time is numeric, this will be the size to comebine row values.", ) binning_starting_value = hyperparams.Enumeration[str]( default="zero", values=("zero", "min"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Where to start binning intervals from. min starts from min of dataset.", )
class Hyperparams(hyperparams.Hyperparams): target_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[str](''), default=(), max_size=sys.maxsize, min_size=0, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='names of columns with image paths' ) output_labels = hyperparams.Set( elements=hyperparams.Hyperparameter[str](''), default=(), max_size=sys.maxsize, min_size=0, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description='desired names for croc output columns' )
class Hyperparams(hyperparams.Hyperparams): metrics = hyperparams.Set( elements=MetricsHyperparams, default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of metrics to compute.", ) all_labels = hyperparams.Set( elements=AllLabelsHyperparams, default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="All labels available in a dataset, per target column. When provided for a target column, it overrides all labels from metadata or data for that target column.", ) add_normalized_scores = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Add additional column with normalized scores?" )
class Hyperparams(hyperparams.Hyperparams): metafeatures = hyperparams.Set( metafeature_hyperparam, default_metafeatures, min_size=1, max_size=len(computable_metafeatures), description="Compute metadata descriptions of the dataset", semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/MetafeatureParameter' ])
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", )
class AllLabelsHyperparams(hyperparams.Hyperparams, set_names=False): # Default is ignored. # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 column_name = hyperparams.Hyperparameter[str]('') labels = hyperparams.Set( # Default is ignored. # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 elements=hyperparams.Hyperparameter[str](''), default=(), )
class Hyperparams(hyperparams.Hyperparams): split_on_column_with_avg_len = hyperparams.Uniform( default=30, lower=10, upper=100, upper_inclusive=True, description='Threshold of avg column length for splitting punctuation or alphanumeric', semantic_types=['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/ControlParameter']) metafeatures = hyperparams.Set( metafeature_hyperparam, default_metafeatures, min_size=1, max_size=len(computable_metafeatures), description="Compute metadata descriptions of the dataset", semantic_types=['https://metadata.datadrivendiscovery.org/types/MetafeatureParameter'])
class FileReaderHyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='append', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?", ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", )
class Hyperparams(hyperparams.Hyperparams): vector_col_index = hyperparams.Hyperparameter[typing.Optional[int]]( default=None, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description='Index of source vector column') labels = hyperparams.Set( elements=hyperparams.Hyperparameter[str](''), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= 'Labels for created columns. If none supplied, labels will auto-generate.' )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. \ If any specified column cannot be parsed, it is skipped.", ) inference_model = hyperparams.Enumeration( default="moco", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["amdim", "moco"], description="type pretrained inference model to use", ) batch_size = hyperparams.UniformInt( lower=1, upper=512, default=256, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="inference batch size", ) pool_features = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to pool features across spatial dimensions in returned frame", ) decompress_data = hyperparams.Hyperparameter[bool]( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="If True, applies LZ4 decompression algorithm to the data. \ Compressed data stores a header consisting of the dtype character and the \ data shape as unsigned integers. Given c struct alignment, will occupy \ 16 bytes (1 + 4 + 4 + 4 + 3 ) padding", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) max_one_hot = hyperparams.Hyperparameter[int]( default=16, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="Max number of unique labels a column can have for encoding. If the value is surpassed, the column is skipped.", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) min_binary = hyperparams.Hyperparameter[int]( default=17, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Min number of unique labels a column can have for binary encoding. If a column has fewer, it will be skipped.", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) replace = hyperparams.Hyperparameter[bool]( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Whether or not to replace enriched DateTime columns or append them", )
class HorizontalConcatHyperparams(hyperparams.Hyperparams): ignore_index = hyperparams.UniformBool( default=True, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether new df should use original index or not" ) to_semantic_types = hyperparams.Set( elements=hyperparams.Hyperparameter[str](""), default=("https://metadata.datadrivendiscovery.org/types/Attribute", "https://metadata.datadrivendiscovery.org/types/OrdinalData", "https://metadata.datadrivendiscovery.org/types/CategoricalData"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter"], description="Sementic typer to add for output dataframe" ) column_name = hyperparams.Hyperparameter[int]( semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter'], default=100, description="the control params for index name" )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) strategy = hyperparams.Enumeration[str]( default="most_frequent", values=("most_frequent", "constant"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Replacement strategy. 'most_frequent' will replace missing values with the mode of the column, 'constant' uses 'fill_value'", ) fill_value = hyperparams.Hyperparameter[str]( default=MISSING_VALUE_INDICATOR, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Value to replace missing values with. Only applied when strategy is set to 'constant'", ) error_on_empty = hyperparams.Hyperparameter[bool]( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "If True will raise an exception when a column consisting only of empty values is found." + "If False, will apply the 'fill_value' to the entire column.", )
class Hyperparams(hyperparams.Hyperparams): n_jobs = hyperparams.Hyperparameter[int]( default=-1, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="The value of the n_jobs parameter for the joblib library", ) left_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]]( configuration=collections.OrderedDict( set=hyperparams.Set( elements=hyperparams.Hyperparameter[str]( default="", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Name of the column.", ), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ), str=hyperparams.Hyperparameter[str]( default="", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Name of the column.", ), ), default="str", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Columns to join on from left dataframe", ) right_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]]( configuration=collections.OrderedDict( set=hyperparams.Set( elements=hyperparams.Hyperparameter[str]( default="", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Name of the column.", ), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ), str=hyperparams.Hyperparameter[str]( default="", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Name of the column.", ), ), default="str", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Columns to join on from right dataframe", ) accuracy = hyperparams.Union[typing.Union[float, typing.Sequence[float]]]( configuration=collections.OrderedDict( set=hyperparams.List( elements=hyperparams.Hyperparameter[float](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A list of accuracies, corresponding respectively to the columns to join on.", ), float=hyperparams.Hyperparameter[float](0), ), default="float", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Required accuracy of join ranging from 0.0 to 1.0, where 1.0 is an exact match.", ) join_type = hyperparams.Enumeration[str]( default="left", values=("left", "right", "outer", "inner", "cross"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="The type of join between two dataframes.", ) absolute_accuracy = hyperparams.Union[typing.Union[bool, typing.Sequence[bool]]]( configuration=collections.OrderedDict( set=hyperparams.List( elements=hyperparams.UniformBool(False), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A list of flags for absolute values, corresponding respectively to the columns to join on.", ), bool=hyperparams.UniformBool(False), ), default="bool", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Used for numeric to use absolute comparison instead of percentage.", )