Esempio n. 1
0
def test_automl_time_series_classification_pickle_generated_pipeline(
        mock_binary_fit, mock_multi_fit, mock_binary_score,
        mock_multiclass_score, problem_type, X_y_binary, X_y_multi):
    if problem_type == ProblemTypes.TIME_SERIES_BINARY:
        X, y = X_y_binary
        pipeline = GeneratedPipelineTimeSeriesBinary
    else:
        X, y = X_y_multi
        pipeline = GeneratedPipelineTimeSeriesMulticlass

    configuration = {
        "gap": 0,
        "max_delay": 0,
        'delay_target': False,
        'delay_features': True
    }
    a = AutoMLSearch(X_train=X,
                     y_train=y,
                     problem_type=problem_type,
                     problem_configuration=configuration)
    a.search()

    for i, row in a.rankings.iterrows():
        assert a.get_pipeline(row['id']).__class__ == pipeline
        assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))
def test_automl_time_series_regression_pickle_generated_pipeline(mock_fit, mock_score, X_y_regression):
    X, y = X_y_regression
    configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True}
    a = AutoMLSearch(X_train=X, y_train=y, problem_type="time series regression", problem_configuration=configuration)
    a.search()

    for i, row in a.rankings.iterrows():
        assert a.get_pipeline(row['id']).__class__ == GeneratedPipelineTimeSeriesRegression
        assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))
Esempio n. 3
0
def test_init(X_y_regression):
    X, y = X_y_regression

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='regression',
                          objective="R2",
                          max_iterations=3,
                          n_jobs=1)
    automl.search()

    assert automl.n_jobs == 1
    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    automl.best_pipeline.predict(X)

    # test with dataframes
    automl = AutoMLSearch(pd.DataFrame(X),
                          pd.Series(y),
                          problem_type='regression',
                          objective="R2",
                          max_iterations=3,
                          n_jobs=1)
    automl.search()

    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.full_rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    automl.best_pipeline.predict(X)
    assert isinstance(automl.get_pipeline(0), PipelineBase)
def test_init(X_y_binary):
    X, y = X_y_binary

    automl = AutoMLSearch(X_train=X,
                          y_train=y,
                          problem_type='binary',
                          max_iterations=1,
                          n_jobs=1)
    automl.search()

    assert automl.n_jobs == 1
    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    automl.best_pipeline.predict(X)

    # test with dataframes
    automl = AutoMLSearch(pd.DataFrame(X),
                          pd.Series(y),
                          problem_type='binary',
                          max_iterations=1,
                          n_jobs=1)
    automl.search()

    assert isinstance(automl.rankings, pd.DataFrame)
    assert isinstance(automl.full_rankings, pd.DataFrame)
    assert isinstance(automl.best_pipeline, PipelineBase)
    assert isinstance(automl.get_pipeline(0), PipelineBase)
    assert automl.objective.name == 'Log Loss Binary'
    automl.best_pipeline.predict(X)
Esempio n. 5
0
def test_automl_pickle_generated_pipeline(mock_binary_score, mock_binary_fit, mock_multi_score, mock_multi_fit,
                                          problem_type, X_y_binary, X_y_multi):
    if problem_type == ProblemTypes.BINARY:
        X, y = X_y_binary
        pipeline = GeneratedPipelineBinary

    elif problem_type == ProblemTypes.MULTICLASS:
        X, y = X_y_multi
        pipeline = GeneratedPipelineMulticlass

    a = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type)
    a.search()

    for i, row in a.rankings.iterrows():
        assert a.get_pipeline(row['id']).__class__ == pipeline
        assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))
def test_automl_pickle_generated_pipeline(mock_regression_score, mock_regression_fit, X_y_regression):
    class RegressionPipelineCustoms(RegressionPipeline):
        custom_name = "Custom Regression Name"
        component_graph = ["Imputer", "Linear Regressor"]
        custom_hyperparameters = {"Imputer": {"numeric_impute_strategy": "most_frequent"}}

    X, y = X_y_regression
    pipeline = GeneratedPipelineRegression

    a = AutoMLSearch(X_train=X, y_train=y, problem_type='regression')
    a.search()
    a.add_to_rankings(RegressionPipelineCustoms({}))
    seen_name = False
    for i, row in a.rankings.iterrows():
        automl_pipeline = a.get_pipeline(row['id'])
        assert automl_pipeline.__class__ == pipeline
        assert pickle.loads(pickle.dumps(automl_pipeline))
        if automl_pipeline.custom_name == RegressionPipelineCustoms.custom_name:
            seen_name = True
            assert automl_pipeline.custom_hyperparameters == RegressionPipelineCustoms.custom_hyperparameters
            assert automl_pipeline.component_graph == RegressionPipelineCustoms.component_graph
    assert seen_name
Esempio n. 7
0
def test_automl_pickle_generated_pipeline(mock_regression_fit,
                                          mock_regression_score,
                                          X_y_regression):
    mock_regression_score.return_value = {"R2": 1.0}

    class RegressionPipelineCustom(RegressionPipeline):
        custom_name = "Custom Regression Name"
        component_graph = ["Imputer", "Linear Regressor"]
        custom_hyperparameters = {
            "Imputer": {
                "numeric_impute_strategy": "most_frequent"
            }
        }

    X, y = X_y_regression
    pipeline = GeneratedPipelineRegression

    allowed_estimators = get_estimators('regression')
    allowed_pipelines = [
        make_pipeline(X, y, estimator, problem_type='regression')
        for estimator in allowed_estimators
    ]
    allowed_pipelines.append(RegressionPipelineCustom)
    a = AutoMLSearch(X_train=X,
                     y_train=y,
                     problem_type='regression',
                     allowed_pipelines=allowed_pipelines)
    a.search()
    a.add_to_rankings(RegressionPipelineCustom({}))
    seen_name = False
    for i, row in a.rankings.iterrows():
        automl_pipeline = a.get_pipeline(row['id'])
        assert automl_pipeline.__class__ == pipeline
        assert pickle.loads(pickle.dumps(automl_pipeline))
        if automl_pipeline.custom_name == RegressionPipelineCustom.custom_name:
            seen_name = True
            assert automl_pipeline.custom_hyperparameters == RegressionPipelineCustom.custom_hyperparameters
            assert automl_pipeline.component_graph == RegressionPipelineCustom.component_graph
    assert seen_name