예제 #1
0
class UEncHyperparameter(hyperparams.Hyperparams):
    text2int = hyperparams.UniformBool(
        default=False,
        description='Whether to convert everything to numerical. For text columns, each row may get converted into a column',
        semantic_types=['http://schema.org/Boolean',
                        'https://metadata.datadrivendiscovery.org/types/ControlParameter'])

    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='replace',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
예제 #2
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. The default is all columns.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    drop_non_numeric_columns = hyperparams.Hyperparameter[bool](
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="If True, drop all non-numeric columns",
    )
예제 #3
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        'A set of column indices to not operate on. Applicable only if "use_columns" is not provided.',
    )
    return_result = hyperparams.Enumeration(
        values=["append", "replace", "new"],
        default="append",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?",
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        'Also include primary index columns if input data has them. Applicable only if "return_result" is set to "new".',
    )
    compress_data = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="If True, applies LZO compression algorithm to the data.\
                    Store a header consisting of the dtype character and the data shape as unsigned integers.\
                    Given c struct alignment, will occupy 16 bytes (1 + 4 + 4 + 4 + 3 ) padding",
    )
    n_jobs = hyperparams.Hyperparameter[int](
        default=64,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The value of the n_jobs parameter for the joblib library",
    )
    band_column = hyperparams.Hyperparameter[str](
        default="band",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Name of the band column used if no band semantic type is present.",
    )
예제 #4
0
class Hyperparams(hyperparams.Hyperparams):
    """
    Hyper-parameters
    """
    use_row_iter = hyperparams.UniformBool(
        default=False,
        description=
        "Whether or not to use row iteration inplace of column interation on dataframe",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be cast to the type, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
예제 #5
0
class Hyperparams(hyperparams.Hyperparams):
    # Added by Guanchu
    with_mean = hyperparams.UniformBool(
        default=True,
        description='If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    with_std = hyperparams.UniformBool(
        default=True,
        description='If True, scale the data to unit variance (or equivalently, unit standard deviation).',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    # copy = hyperparams.UniformBool(
    #     default=True,
    #     description='If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned.',
    #     semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    # )

    # Keep previous
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )

    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute',
                'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
예제 #6
0
class Hyperparams(hyperparams.Hyperparams):
    # Added by Mia
    endog = hyperparams.Bounded[int](
        lower = 2,
        upper = None,
        default = 3,
        description='Array like time seires.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    # keep previous
    norm = hyperparams.Enumeration[str](
        default='l2',
        values=['l1', 'l2', 'max'],
        description='The norm to use to normalize each non zero sample.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
예제 #7
0
class Hyperparams(hyperparams.Hyperparams):
	window_size = hyperparams.UniformInt(
		lower = 0,
		upper = 100,	#TODO: Define the correct the upper bound
		default=50,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="window size to calculate"
	)
	
	# Keep previous
	dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]](
		default=None,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.",
	)
	use_columns = hyperparams.Set(
		elements=hyperparams.Hyperparameter[int](-1),
		default=(2,),
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
	)
	exclude_columns = hyperparams.Set(
		elements=hyperparams.Hyperparameter[int](-1),
		default=(0,1,3,),
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
	)
	return_result = hyperparams.Enumeration(
		values=['append', 'replace', 'new'],
		default='new',
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
	)
	use_semantic_types = hyperparams.UniformBool(
		default=False,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
	)
	add_index_columns = hyperparams.UniformBool(
		default=False,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
	)
	error_on_no_input = hyperparams.UniformBool(
		default=True,
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
		description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
	)
	return_semantic_type = hyperparams.Enumeration[str](
		values=['https://metadata.datadrivendiscovery.org/types/Attribute',
			'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
		default='https://metadata.datadrivendiscovery.org/types/Attribute',
		description='Decides what semantic type to attach to generated attributes',
		semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
	)
예제 #8
0
class Hyperparams(hyperparams.Hyperparams):
    max_percent_null = hyperparams.Bounded[float](
        default=.5,
        lower=0,
        upper=1,
        description=
        'The maximum percentage of null values allowed in returned features. A lower value means features may have more null nulls.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    max_correlation = hyperparams.Bounded[float](
        default=.9,
        lower=0,
        upper=1,
        description=
        'The maximum allowed correlation between any two features returned. A lower value means features will be more uncorrelated',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    max_features = hyperparams.Hyperparameter[int](
        default=100,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "Cap the number of generated features to this number. If -1, no limit."
    )
예제 #9
0
class Hyperparams(hyperparams.Hyperparams):
    parsing_semantics = hyperparams.Set(
        elements=hyperparams.Enumeration(
            values=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
                "https://metadata.datadrivendiscovery.org/types/CategoricalData",
            ],
            default="http://schema.org/Float",
        ),
        default=(
            "http://schema.org/Boolean",
            "http://schema.org/Integer",
            "http://schema.org/Float",
        ),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.",
    )
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description='A set of column indices to not operate on. Applicable only if "use_columns" is not provided.',
    )
    error_handling = hyperparams.Enumeration[str](
        default="coerce",
        values=("ignore", "raise", "coerce"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Setting to deal with error when converting a column to numeric value.",
    )
    fuzzy_time_parsing = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Use fuzzy time parsing.",
    )
예제 #10
0
class UnfoldHyperparams(hyperparams.Hyperparams):
    unfold_semantic_types = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str]("str"),
        default=[
            "https://metadata.datadrivendiscovery.org/types/PredictedTarget"
        ],
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="""
        A set of semantic types that the primitive will unfold.
        Only 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' by default.
        """,
    )
    use_pipeline_id_semantic_type = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="""
        Controls whether semantic_type will be used for finding pipeline id column in input dataframe.
        If true, it will look for 'https://metadata.datadrivendiscovery.org/types/PipelineId' for pipeline id column,
        and create attribute columns using header: attribute_{pipeline_id}. 
        eg. 'binaryClass_{a3180751-33aa-4790-9e70-c79672ce1278}'
        If false, create attribute columns using header: attribute_{0,1,2,...}.
        eg. 'binaryClass_0', 'binaryClass_1'
        """,
    )
예제 #11
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    n_clusters = hyperparams.Hyperparameter[int](
        default=8,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Number of clusters to generate",
    )
    cluster_col_name = hyperparams.Hyperparameter[str](
        default="__cluster",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "The name of created cluster column in the returned dataframe",
    )
예제 #12
0
class Hyperparams(hyperparams.Hyperparams):
    metric = hyperparams.Enumeration[str](
        values=classification_metrics + regression_metrics +
        clustering_metrics,
        default="f1Macro",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "The D3M scoring metric to use during the fit phase.  This can be any of the regression, classification or "
        + "clustering metrics.",
    )
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )

    encoder_type = hyperparams.Enumeration(
        default="svm",
        values=["svm", "tfidf"],
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Vectorization Strategy.",
    )
class Hyperparams(hyperparams.Hyperparams):
    rampup_timeout = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=100,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "timeout, how much time to give elastic search database to startup, may vary based on infrastructure",
    )
    target_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "indices of column with geolocation formatted as text that should be converted to lat,lon pairs",
    )
    cache_size = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=2000,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="LRU cache size",
    )
예제 #14
0
class Hyperparams(hyperparams.Hyperparams):  
    datetime_index = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],  
        description='if multiple datetime indices exist, this HP specifies which to apply to training data. If \
            None, the primitive assumes there is only one datetime index. This HP can also specify multiple indices \
            which should be concatenated to form datetime_index')
    datetime_index_unique = hyperparams.UniformBool(
        default = False, 
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="whether the datetime "
    )
    datetime_filter = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='index of column in input dataset that contain unique identifiers of \
            time series that have different datetime indices')
    filter_index = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='index of column in input dataset that contain unique identifiers of different time series')
    n_periods = hyperparams.UniformInt(
        lower = 1, 
        upper = sys.maxsize, 
        default = 61, 
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
       description='number of periods to predict')
    interval = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='interval with which to sample future predictions')
    datetime_interval_exception = hyperparams.Hyperparameter[typing.Union[str, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='to handle different prediction intervals (stock market dataset). \
            If this HP is set, primitive will just make next forecast for this datetime value \
            (not multiple forecasts at multiple intervals')
    max_lags = hyperparams.UniformInt(
        lower = 1, 
        upper = sys.maxsize, 
        default = 10, 
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='maximum lag order to evluate to find model - eval criterion = AIC')
    arma_p = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = 0,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],  
        description='The p order of the ARMA model in case some time series are univariate')
    arma_q = hyperparams.Hyperparameter[typing.Union[int, None]](
        default = 0,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'],  
        description='The q order of the ARMA model in case some time series are univariate')
    weights_filter_value = hyperparams.Hyperparameter[typing.Union[str, None]](
        default = None,
        semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], 
        description='value to select a filter from column filter index for which to return correlation  \
            coefficient matrix.')
    pass
class Hyperparams(hyperparams.Hyperparams):
    grouping_key_col = hyperparams.Hyperparameter[typing.Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The GroupKey column index for the time series data.",
    )
    time_col = hyperparams.Hyperparameter[typing.Union[int, None]](
        default=None,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="",
    )
    value_cols = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Columns needed in the dataset; all other columns will be removed.",
    )
    granularity = hyperparams.Enumeration[str](
        default="months",
        values=("seconds", "minutes", "hours", "days", "weeks", "months",
                "years"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The granularity of the time series timestamp values.",
    )
    binning_operation = hyperparams.Enumeration[str](
        default="sum",
        values=("sum", "mean", "min", "max"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Setting operation to bin time series data with.",
    )
    binning_size = hyperparams.Hyperparameter[int](
        default=5,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "If time is numeric, this will be the size to comebine row values.",
    )
    binning_starting_value = hyperparams.Enumeration[str](
        default="zero",
        values=("zero", "min"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Where to start binning intervals from. min starts from min of dataset.",
    )
예제 #16
0
class Hyperparams(hyperparams.Hyperparams):
    target_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str](''),
        default=(),
        max_size=sys.maxsize,
        min_size=0,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description='names of columns with image paths'
    )

    output_labels = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str](''),
        default=(),
        max_size=sys.maxsize,
        min_size=0,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description='desired names for croc output columns'
    )
예제 #17
0
class Hyperparams(hyperparams.Hyperparams):
    metrics = hyperparams.Set(
        elements=MetricsHyperparams,
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of metrics to compute.",
    )
    all_labels = hyperparams.Set(
        elements=AllLabelsHyperparams,
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="All labels available in a dataset, per target column. When provided for a target column, it overrides all labels from metadata or data for that target column.",
    )
    add_normalized_scores = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Add additional column with normalized scores?"
    )
예제 #18
0
class Hyperparams(hyperparams.Hyperparams):
    metafeatures = hyperparams.Set(
        metafeature_hyperparam,
        default_metafeatures,
        min_size=1,
        max_size=len(computable_metafeatures),
        description="Compute metadata descriptions of the dataset",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/MetafeatureParameter'
        ])
예제 #19
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
예제 #20
0
class AllLabelsHyperparams(hyperparams.Hyperparams, set_names=False):
    # Default is ignored.
    # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141
    column_name = hyperparams.Hyperparameter[str]('')
    labels = hyperparams.Set(
        # Default is ignored.
        # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141
        elements=hyperparams.Hyperparameter[str](''),
        default=(),
    )
예제 #21
0
class Hyperparams(hyperparams.Hyperparams):
    split_on_column_with_avg_len = hyperparams.Uniform(
        default=30,
        lower=10,
        upper=100,
        upper_inclusive=True,
        description='Threshold of avg column length for splitting punctuation or alphanumeric',
        semantic_types=['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/ControlParameter'])

    metafeatures = hyperparams.Set(
        metafeature_hyperparam, default_metafeatures, min_size=1, max_size=len(computable_metafeatures),
        description="Compute metadata descriptions of the dataset",
        semantic_types=['https://metadata.datadrivendiscovery.org/types/MetafeatureParameter'])
예제 #22
0
class FileReaderHyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='append',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?",
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
예제 #23
0
class Hyperparams(hyperparams.Hyperparams):
    vector_col_index = hyperparams.Hyperparameter[typing.Optional[int]](
        default=None,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description='Index of source vector column')
    labels = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str](''),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        'Labels for created columns.  If none supplied, labels will auto-generate.'
    )
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to force primitive to operate on. \
            If any specified column cannot be parsed, it is skipped.",
    )
    inference_model = hyperparams.Enumeration(
        default="moco",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["amdim", "moco"],
        description="type pretrained inference model to use",
    )
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=256,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="inference batch size",
    )
    pool_features = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to pool features across spatial dimensions in returned frame",
    )
    decompress_data = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="If True, applies LZ4 decompression algorithm to the data. \
                    Compressed data stores a header consisting of the dtype character and the \
                    data shape as unsigned integers. Given c struct alignment, will occupy \
                    16 bytes (1 + 4 + 4 + 4 + 3 ) padding",
    )
예제 #25
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )

    max_one_hot = hyperparams.Hyperparameter[int](
        default=16,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="Max number of unique labels a column can have for encoding.  If the value is surpassed, the column is skipped.",
    )
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )

    min_binary = hyperparams.Hyperparameter[int](
        default=17,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Min number of unique labels a column can have for binary encoding.  If a column has fewer, it will be skipped.",
    )
예제 #27
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    replace = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Whether or not to replace enriched DateTime columns or append them",
    )
예제 #28
0
class HorizontalConcatHyperparams(hyperparams.Hyperparams):
    ignore_index = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether new df should use original index or not"
    )
    to_semantic_types = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str](""),
        default=("https://metadata.datadrivendiscovery.org/types/Attribute",
                 "https://metadata.datadrivendiscovery.org/types/OrdinalData",
                 "https://metadata.datadrivendiscovery.org/types/CategoricalData"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"],
        description="Sementic typer to add for output dataframe"
    )
    column_name = hyperparams.Hyperparameter[int](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        default=100,
        description="the control params for index name"
    )
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )

    strategy = hyperparams.Enumeration[str](
        default="most_frequent",
        values=("most_frequent", "constant"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Replacement strategy.  'most_frequent' will replace missing values with the mode of the column, 'constant' uses 'fill_value'",
    )

    fill_value = hyperparams.Hyperparameter[str](
        default=MISSING_VALUE_INDICATOR,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "Value to replace missing values with.  Only applied when strategy is set to 'constant'",
    )

    error_on_empty = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "If True will raise an exception when a column consisting only of empty values is found."
        + "If False, will apply the 'fill_value' to the entire column.",
    )
예제 #30
0
class Hyperparams(hyperparams.Hyperparams):
    n_jobs = hyperparams.Hyperparameter[int](
        default=-1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The value of the n_jobs parameter for the joblib library",
    )
    left_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]](
        configuration=collections.OrderedDict(
            set=hyperparams.Set(
                elements=hyperparams.Hyperparameter[str](
                    default="",
                    semantic_types=[
                        "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                    ],
                    description="Name of the column.",
                ),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
            ),
            str=hyperparams.Hyperparameter[str](
                default="",
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="Name of the column.",
            ),
        ),
        default="str",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Columns to join on from left dataframe",
    )
    right_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]](
        configuration=collections.OrderedDict(
            set=hyperparams.Set(
                elements=hyperparams.Hyperparameter[str](
                    default="",
                    semantic_types=[
                        "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                    ],
                    description="Name of the column.",
                ),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
            ),
            str=hyperparams.Hyperparameter[str](
                default="",
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="Name of the column.",
            ),
        ),
        default="str",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Columns to join on from right dataframe",
    )
    accuracy = hyperparams.Union[typing.Union[float, typing.Sequence[float]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.Hyperparameter[float](-1),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A list of accuracies, corresponding respectively to the columns to join on.",
            ),
            float=hyperparams.Hyperparameter[float](0),
        ),
        default="float",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Required accuracy of join ranging from 0.0 to 1.0, where 1.0 is an exact match.",
    )
    join_type = hyperparams.Enumeration[str](
        default="left",
        values=("left", "right", "outer", "inner", "cross"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The type of join between two dataframes.",
    )
    absolute_accuracy = hyperparams.Union[typing.Union[bool, typing.Sequence[bool]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.UniformBool(False),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A list of flags for absolute values, corresponding respectively to the columns to join on.",
            ),
            bool=hyperparams.UniformBool(False),
        ),
        default="bool",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Used for numeric to use absolute comparison instead of percentage.",
    )