def test_automl_time_series_classification_pickle_generated_pipeline( mock_binary_fit, mock_multi_fit, mock_binary_score, mock_multiclass_score, problem_type, X_y_binary, X_y_multi): if problem_type == ProblemTypes.TIME_SERIES_BINARY: X, y = X_y_binary pipeline = GeneratedPipelineTimeSeriesBinary else: X, y = X_y_multi pipeline = GeneratedPipelineTimeSeriesMulticlass configuration = { "gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True } a = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type, problem_configuration=configuration) a.search() for i, row in a.rankings.iterrows(): assert a.get_pipeline(row['id']).__class__ == pipeline assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))
def test_automl_time_series_regression_pickle_generated_pipeline(mock_fit, mock_score, X_y_regression): X, y = X_y_regression configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True} a = AutoMLSearch(X_train=X, y_train=y, problem_type="time series regression", problem_configuration=configuration) a.search() for i, row in a.rankings.iterrows(): assert a.get_pipeline(row['id']).__class__ == GeneratedPipelineTimeSeriesRegression assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))
def test_init(X_y_regression): X, y = X_y_regression automl = AutoMLSearch(X_train=X, y_train=y, problem_type='regression', objective="R2", max_iterations=3, n_jobs=1) automl.search() assert automl.n_jobs == 1 assert isinstance(automl.rankings, pd.DataFrame) assert isinstance(automl.best_pipeline, PipelineBase) automl.best_pipeline.predict(X) # test with dataframes automl = AutoMLSearch(pd.DataFrame(X), pd.Series(y), problem_type='regression', objective="R2", max_iterations=3, n_jobs=1) automl.search() assert isinstance(automl.rankings, pd.DataFrame) assert isinstance(automl.full_rankings, pd.DataFrame) assert isinstance(automl.best_pipeline, PipelineBase) automl.best_pipeline.predict(X) assert isinstance(automl.get_pipeline(0), PipelineBase)
def test_init(X_y_binary): X, y = X_y_binary automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_iterations=1, n_jobs=1) automl.search() assert automl.n_jobs == 1 assert isinstance(automl.rankings, pd.DataFrame) assert isinstance(automl.best_pipeline, PipelineBase) automl.best_pipeline.predict(X) # test with dataframes automl = AutoMLSearch(pd.DataFrame(X), pd.Series(y), problem_type='binary', max_iterations=1, n_jobs=1) automl.search() assert isinstance(automl.rankings, pd.DataFrame) assert isinstance(automl.full_rankings, pd.DataFrame) assert isinstance(automl.best_pipeline, PipelineBase) assert isinstance(automl.get_pipeline(0), PipelineBase) assert automl.objective.name == 'Log Loss Binary' automl.best_pipeline.predict(X)
def test_automl_pickle_generated_pipeline(mock_binary_score, mock_binary_fit, mock_multi_score, mock_multi_fit, problem_type, X_y_binary, X_y_multi): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary pipeline = GeneratedPipelineBinary elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi pipeline = GeneratedPipelineMulticlass a = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type) a.search() for i, row in a.rankings.iterrows(): assert a.get_pipeline(row['id']).__class__ == pipeline assert pickle.loads(pickle.dumps(a.get_pipeline(row['id'])))
def test_automl_pickle_generated_pipeline(mock_regression_score, mock_regression_fit, X_y_regression): class RegressionPipelineCustoms(RegressionPipeline): custom_name = "Custom Regression Name" component_graph = ["Imputer", "Linear Regressor"] custom_hyperparameters = {"Imputer": {"numeric_impute_strategy": "most_frequent"}} X, y = X_y_regression pipeline = GeneratedPipelineRegression a = AutoMLSearch(X_train=X, y_train=y, problem_type='regression') a.search() a.add_to_rankings(RegressionPipelineCustoms({})) seen_name = False for i, row in a.rankings.iterrows(): automl_pipeline = a.get_pipeline(row['id']) assert automl_pipeline.__class__ == pipeline assert pickle.loads(pickle.dumps(automl_pipeline)) if automl_pipeline.custom_name == RegressionPipelineCustoms.custom_name: seen_name = True assert automl_pipeline.custom_hyperparameters == RegressionPipelineCustoms.custom_hyperparameters assert automl_pipeline.component_graph == RegressionPipelineCustoms.component_graph assert seen_name
def test_automl_pickle_generated_pipeline(mock_regression_fit, mock_regression_score, X_y_regression): mock_regression_score.return_value = {"R2": 1.0} class RegressionPipelineCustom(RegressionPipeline): custom_name = "Custom Regression Name" component_graph = ["Imputer", "Linear Regressor"] custom_hyperparameters = { "Imputer": { "numeric_impute_strategy": "most_frequent" } } X, y = X_y_regression pipeline = GeneratedPipelineRegression allowed_estimators = get_estimators('regression') allowed_pipelines = [ make_pipeline(X, y, estimator, problem_type='regression') for estimator in allowed_estimators ] allowed_pipelines.append(RegressionPipelineCustom) a = AutoMLSearch(X_train=X, y_train=y, problem_type='regression', allowed_pipelines=allowed_pipelines) a.search() a.add_to_rankings(RegressionPipelineCustom({})) seen_name = False for i, row in a.rankings.iterrows(): automl_pipeline = a.get_pipeline(row['id']) assert automl_pipeline.__class__ == pipeline assert pickle.loads(pickle.dumps(automl_pipeline)) if automl_pipeline.custom_name == RegressionPipelineCustom.custom_name: seen_name = True assert automl_pipeline.custom_hyperparameters == RegressionPipelineCustom.custom_hyperparameters assert automl_pipeline.component_graph == RegressionPipelineCustom.component_graph assert seen_name