def test_serialization(X_y_binary, ts_data, tmpdir, helper_functions): path = os.path.join(str(tmpdir), 'component.pkl') for component_class in all_components(): print('Testing serialization of component {}'.format(component_class.name)) try: component = helper_functions.safe_init_component_with_njobs_1(component_class) except EnsembleMissingPipelinesError: if (component_class == StackedEnsembleClassifier): component = component_class(input_pipelines=[make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.BINARY)], n_jobs=1) elif (component_class == StackedEnsembleRegressor): component = component_class(input_pipelines=[make_pipeline_from_components([RandomForestRegressor()], ProblemTypes.REGRESSION)], n_jobs=1) if isinstance(component, Estimator) and ProblemTypes.TIME_SERIES_REGRESSION in component.supported_problem_types: X, y = ts_data else: X, y = X_y_binary component.fit(X, y) for pickle_protocol in range(cloudpickle.DEFAULT_PROTOCOL + 1): component.save(path, pickle_protocol=pickle_protocol) loaded_component = ComponentBase.load(path) assert component.parameters == loaded_component.parameters assert component.describe(return_dict=True) == loaded_component.describe(return_dict=True) if (issubclass(component_class, Estimator) and not (isinstance(component, StackedEnsembleClassifier) or isinstance(component, StackedEnsembleRegressor))): assert (component.feature_importance == loaded_component.feature_importance).all()
def test_stacked_different_input_pipelines_regression(): input_pipelines = [ make_pipeline_from_components([RandomForestRegressor()], ProblemTypes.REGRESSION), make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.BINARY) ] with pytest.raises(ValueError, match="All pipelines must have the same problem type."): StackedEnsembleRegressor(input_pipelines=input_pipelines)
def test_stacked_estimator_in_pipeline(problem_type, X_y_binary, X_y_multi, X_y_regression, stackable_classifiers, stackable_regressors, logistic_regression_binary_pipeline_class, logistic_regression_multiclass_pipeline_class, linear_regression_pipeline_class): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary base_pipeline_class = BinaryClassificationPipeline stacking_component_name = StackedEnsembleClassifier.name input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] comparison_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) objective = 'Log Loss Binary' elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi base_pipeline_class = MulticlassClassificationPipeline stacking_component_name = StackedEnsembleClassifier.name input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] comparison_pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) objective = 'Log Loss Multiclass' elif problem_type == ProblemTypes.REGRESSION: X, y = X_y_regression base_pipeline_class = RegressionPipeline stacking_component_name = StackedEnsembleRegressor.name input_pipelines = [make_pipeline_from_components([regressor], problem_type) for regressor in stackable_regressors] comparison_pipeline = linear_regression_pipeline_class(parameters={"Linear Regressor": {"n_jobs": 1}}) objective = 'R2' parameters = { stacking_component_name: { "input_pipelines": input_pipelines, "n_jobs": 1 } } graph = ['Simple Imputer', stacking_component_name] class StackedPipeline(base_pipeline_class): component_graph = graph model_family = ModelFamily.ENSEMBLE pipeline = StackedPipeline(parameters=parameters) pipeline.fit(X, y) comparison_pipeline.fit(X, y) assert not np.isnan(pipeline.predict(X).to_series()).values.any() pipeline_score = pipeline.score(X, y, [objective])[objective] comparison_pipeline_score = comparison_pipeline.score(X, y, [objective])[objective] if problem_type == ProblemTypes.BINARY or problem_type == ProblemTypes.MULTICLASS: assert not np.isnan(pipeline.predict_proba(X).to_dataframe()).values.any() assert (pipeline_score <= comparison_pipeline_score) else: assert (pipeline_score >= comparison_pipeline_score)
def test_ensemble_data(mock_fit, mock_score, dummy_binary_pipeline_class, stackable_classifiers): X = pd.DataFrame({"a": [i for i in range(100)]}) y = pd.Series([i % 2 for i in range(100)]) automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_batches=19, ensembling=True, _ensembling_split_size=0.25) mock_should_continue_callback = MagicMock(return_value=True) mock_pre_evaluation_callback = MagicMock() mock_post_evaluation_callback = MagicMock() training_indices, ensembling_indices, _, _ = split_data( ww.DataTable(np.arange(X.shape[0])), y, problem_type='binary', test_size=0.25, random_seed=0) training_indices, ensembling_indices = training_indices.to_dataframe( )[0].tolist(), ensembling_indices.to_dataframe()[0].tolist() engine = SequentialEngine( X_train=infer_feature_types(X), y_train=infer_feature_types(y), ensembling_indices=ensembling_indices, automl=automl, should_continue_callback=mock_should_continue_callback, pre_evaluation_callback=mock_pre_evaluation_callback, post_evaluation_callback=mock_post_evaluation_callback) pipeline1 = [dummy_binary_pipeline_class({'Mock Classifier': {'a': 1}})] engine.evaluate_batch(pipeline1) # check the fit length is correct, taking into account the data splits assert len(mock_fit.call_args[0][0]) == int(2 / 3 * len(training_indices)) input_pipelines = [ make_pipeline_from_components([classifier], problem_type='binary') for classifier in stackable_classifiers ] pipeline2 = [ make_pipeline_from_components( [StackedEnsembleClassifier(input_pipelines, n_jobs=1)], problem_type='binary', custom_name="Stacked Ensemble Classification Pipeline") ] engine.evaluate_batch(pipeline2) assert len(mock_fit.call_args[0][0]) == int(2 / 3 * len(ensembling_indices))
def test_stacked_fit_predict_classification(X_y_binary, X_y_multi, stackable_classifiers, problem_type): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary num_classes = 2 elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi num_classes = 3 input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, ww.DataColumn) assert not np.isnan(y_pred.to_series()).all() y_pred_proba = clf.predict_proba(X) assert isinstance(y_pred_proba, ww.DataTable) assert y_pred_proba.shape == (len(y), num_classes) assert not np.isnan(y_pred_proba.to_dataframe()).all().all() clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, final_estimator=RandomForestClassifier(), n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, ww.DataColumn) assert not np.isnan(y_pred.to_series()).all() y_pred_proba = clf.predict_proba(X) assert y_pred_proba.shape == (len(y), num_classes) assert isinstance(y_pred_proba, ww.DataTable) assert not np.isnan(y_pred_proba.to_dataframe()).all().all()
def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression): for estimator in [ estimator for estimator in _all_estimators() if estimator.model_family != ModelFamily.ENSEMBLE ]: for problem_type in estimator.supported_problem_types: if problem_type == ProblemTypes.BINARY: X, y = X_y_binary num_classes = 2 elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi num_classes = 3 elif problem_type == ProblemTypes.REGRESSION: X, y = X_y_regression elif problem_type in [ ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY ]: # Skipping because make_pipeline_from_components does not yet work for time series. continue evalml_pipeline = make_pipeline_from_components([estimator()], problem_type) scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline) scikit_estimator.fit(X, y) y_pred = scikit_estimator.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred).all() if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]: y_pred_proba = scikit_estimator.predict_proba(X) assert y_pred_proba.shape == (len(y), num_classes) assert not np.isnan(y_pred_proba).all().all()
def test_scikit_learn_wrapper_invalid_problem_type(): evalml_pipeline = make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.MULTICLASS) evalml_pipeline.problem_type = None with pytest.raises( ValueError, match="Could not wrap EvalML object in scikit-learn wrapper."): scikit_learn_wrapped_estimator(evalml_pipeline)
def test_stacked_feature_importance(mock_fit, X_y_regression, stackable_regressors): X, y = X_y_regression input_pipelines = [make_pipeline_from_components([regressor], ProblemTypes.REGRESSION) for regressor in stackable_regressors] clf = StackedEnsembleRegressor(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) mock_fit.assert_called() clf._is_fitted = True with pytest.raises(NotImplementedError, match="feature_importance is not implemented"): clf.feature_importance
def test_stacked_ensemble_nonstackable_model_families(): with pytest.raises( ValueError, match= "Pipelines with any of the following model families cannot be used as base pipelines" ): StackedEnsembleRegressor(input_pipelines=[ make_pipeline_from_components([BaselineRegressor()], ProblemTypes.REGRESSION) ])
def test_generate_code_errors(): with pytest.raises(ValueError, match="Element must be a component instance"): generate_component_code(make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.BINARY)) with pytest.raises(ValueError, match="Element must be a component instance"): generate_component_code(LinearRegressor) with pytest.raises(ValueError, match="Element must be a component instance"): generate_component_code(Imputer) with pytest.raises(ValueError, match="Element must be a component instance"): generate_component_code(ComponentBase)
def test_stacked_feature_importance(mock_fit, X_y_binary, X_y_multi, stackable_classifiers, problem_type): if problem_type == ProblemTypes.BINARY: X, y = X_y_binary elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi input_pipelines = [make_pipeline_from_components([classifier], problem_type) for classifier in stackable_classifiers] clf = StackedEnsembleClassifier(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) mock_fit.assert_called() clf._is_fitted = True with pytest.raises(NotImplementedError, match="feature_importance is not implemented"): clf.feature_importance
def test_evaluate_pipeline_handles_ensembling_indices(mock_fit, mock_score, dummy_binary_pipeline_class, stackable_classifiers): X = ww.DataTable(pd.DataFrame({"a": [i for i in range(100)]})) y = ww.DataColumn(pd.Series([i % 2 for i in range(100)])) automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_batches=19, ensembling=True, _ensembling_split_size=0.25) training_indices, ensembling_indices, _, _ = split_data(ww.DataTable(np.arange(X.shape[0])), y, problem_type='binary', test_size=0.25, random_seed=0) training_indices, ensembling_indices = training_indices.to_dataframe()[0].tolist(), ensembling_indices.to_dataframe()[0].tolist() pipeline1 = dummy_binary_pipeline_class({'Mock Classifier': {'a': 1}}) _ = evaluate_pipeline(pipeline1, automl, X, y, logger=MagicMock()) # check the fit length is correct, taking into account the data splits assert len(mock_fit.call_args[0][0]) == int(2 / 3 * len(training_indices)) input_pipelines = [make_pipeline_from_components([classifier], problem_type='binary') for classifier in stackable_classifiers] pipeline2 = make_pipeline_from_components([StackedEnsembleClassifier(input_pipelines, n_jobs=1)], problem_type='binary', custom_name="Stacked Ensemble Classification Pipeline") _ = evaluate_pipeline(pipeline2, automl, X, y, logger=MagicMock()) assert len(mock_fit.call_args[0][0]) == int(2 / 3 * len(ensembling_indices))
def test_stacked_fit_predict_regression(X_y_regression, stackable_regressors): X, y = X_y_regression input_pipelines = [make_pipeline_from_components([regressor], ProblemTypes.REGRESSION) for regressor in stackable_regressors] clf = StackedEnsembleRegressor(input_pipelines=input_pipelines, n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, pd.Series) assert not np.isnan(y_pred).all() clf = StackedEnsembleRegressor(input_pipelines=input_pipelines, final_estimator=RandomForestRegressor(), n_jobs=1) clf.fit(X, y) y_pred = clf.predict(X) assert len(y_pred) == len(y) assert isinstance(y_pred, pd.Series) assert not np.isnan(y_pred).all()
def test_make_pipeline_from_components(X_y_binary, logistic_regression_binary_pipeline_class): with pytest.raises(ValueError, match="Pipeline needs to have an estimator at the last position of the component list"): make_pipeline_from_components([Imputer()], problem_type='binary') with pytest.raises(KeyError, match="Problem type 'invalid_type' does not exist"): make_pipeline_from_components([RandomForestClassifier()], problem_type='invalid_type') with pytest.raises(TypeError, match="Custom pipeline name must be a string"): make_pipeline_from_components([RandomForestClassifier()], problem_type='binary', custom_name=True) with pytest.raises(TypeError, match="Every element of `component_instances` must be an instance of ComponentBase"): make_pipeline_from_components([RandomForestClassifier], problem_type='binary') with pytest.raises(TypeError, match="Every element of `component_instances` must be an instance of ComponentBase"): make_pipeline_from_components(['RandomForestClassifier'], problem_type='binary') imp = Imputer(numeric_impute_strategy='median', random_seed=5) est = RandomForestClassifier(random_seed=7) pipeline = make_pipeline_from_components([imp, est], ProblemTypes.BINARY, custom_name='My Pipeline', random_seed=15) assert [c.__class__ for c in pipeline] == [Imputer, RandomForestClassifier] assert [(c.random_seed == 15) for c in pipeline] assert pipeline.problem_type == ProblemTypes.BINARY assert pipeline.custom_name == 'My Pipeline' expected_parameters = { 'Imputer': { 'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'median', 'categorical_fill_value': None, 'numeric_fill_value': None}, 'Random Forest Classifier': { 'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1} } assert pipeline.parameters == expected_parameters assert pipeline.random_seed == 15 class DummyEstimator(Estimator): name = "Dummy!" model_family = "foo" supported_problem_types = [ProblemTypes.BINARY] parameters = {'bar': 'baz'} random_seed = 42 pipeline = make_pipeline_from_components([DummyEstimator(random_seed=3)], ProblemTypes.BINARY, random_seed=random_seed) components_list = [c for c in pipeline] assert len(components_list) == 1 assert isinstance(components_list[0], DummyEstimator) assert components_list[0].random_seed == random_seed expected_parameters = {'Dummy!': {'bar': 'baz'}} assert pipeline.parameters == expected_parameters assert pipeline.random_seed == random_seed X, y = X_y_binary pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}, random_seed=42) component_instances = [c for c in pipeline] new_pipeline = make_pipeline_from_components(component_instances, ProblemTypes.BINARY) pipeline.fit(X, y) predictions = pipeline.predict(X) new_pipeline.fit(X, y) new_predictions = new_pipeline.predict(X) assert np.array_equal(predictions, new_predictions) assert np.array_equal(pipeline.feature_importance, new_pipeline.feature_importance) assert new_pipeline.name == 'Templated Pipeline' assert pipeline.parameters == new_pipeline.parameters for component, new_component in zip(pipeline._component_graph, new_pipeline._component_graph): assert isinstance(new_component, type(component)) assert pipeline.describe() == new_pipeline.describe()
def test_score_batch_works(mock_score, pipeline_score_side_effect, X_y_binary, dummy_binary_pipeline_class, stackable_classifiers, caplog): exceptions_to_check = [] expected_scores = {} for i, e in enumerate(pipeline_score_side_effect): # Ensemble pipeline has different name pipeline_name = f"Pipeline {i}" if i < len( pipeline_score_side_effect) - 1 else "Templated Pipeline" scores = no_exception_scores if isinstance(e, PipelineScoreError): scores = {"F1": np.nan, "AUC": np.nan, "Log Loss Binary": np.nan} scores.update(e.scored_successfully) exceptions_to_check.append(f"Score error for {pipeline_name}") expected_scores[pipeline_name] = scores X, y = X_y_binary automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_iterations=1, allowed_pipelines=[dummy_binary_pipeline_class]) engine = SequentialEngine(X_train=automl.X_train, y_train=automl.y_train, automl=automl) def make_pipeline_name(index): class DummyPipeline(dummy_binary_pipeline_class): custom_name = f"Pipeline {index}" return DummyPipeline({'Mock Classifier': {'a': index}}) pipelines = [ make_pipeline_name(i) for i in range(len(pipeline_score_side_effect) - 1) ] ensemble_input_pipelines = [ make_pipeline_from_components([classifier], problem_type="binary") for classifier in stackable_classifiers[:2] ] ensemble = make_pipeline_from_components( [StackedEnsembleClassifier(ensemble_input_pipelines, n_jobs=1)], problem_type="binary") pipelines.append(ensemble) def score_batch_and_check(): caplog.clear() with patch('evalml.pipelines.BinaryClassificationPipeline.score' ) as mock_score: mock_score.side_effect = pipeline_score_side_effect scores = engine.score_batch( pipelines, X, y, objectives=["Log Loss Binary", "F1", "AUC"]) assert scores == expected_scores for exception in exceptions_to_check: assert exception in caplog.text # Test scoring before search score_batch_and_check() automl.search() # Test scoring after search score_batch_and_check()
def test_train_batch_works(mock_score, pipeline_fit_side_effect, X_y_binary, dummy_binary_pipeline_class, stackable_classifiers, caplog): exceptions_to_check = [ str(e) for e in pipeline_fit_side_effect if isinstance(e, Exception) ] X, y = X_y_binary automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', max_time=1, max_iterations=2, train_best_pipeline=False, n_jobs=1) engine = SequentialEngine(X_train=automl.X_train, y_train=automl.y_train, automl=automl) def make_pipeline_name(index): class DummyPipeline(dummy_binary_pipeline_class): custom_name = f"Pipeline {index}" return DummyPipeline({'Mock Classifier': {'a': index}}) pipelines = [ make_pipeline_name(i) for i in range(len(pipeline_fit_side_effect) - 1) ] ensemble_input_pipelines = [ make_pipeline_from_components([classifier], problem_type="binary") for classifier in stackable_classifiers[:2] ] ensemble = make_pipeline_from_components( [StackedEnsembleClassifier(ensemble_input_pipelines, n_jobs=1)], problem_type="binary") pipelines.append(ensemble) def train_batch_and_check(): caplog.clear() with patch('evalml.pipelines.BinaryClassificationPipeline.fit' ) as mock_fit: mock_fit.side_effect = pipeline_fit_side_effect trained_pipelines = engine.train_batch(pipelines) assert len(trained_pipelines) == len( pipeline_fit_side_effect) - len(exceptions_to_check) assert mock_fit.call_count == len(pipeline_fit_side_effect) for exception in exceptions_to_check: assert exception in caplog.text # Test training before search is run train_batch_and_check() # Test training after search. automl.search() train_batch_and_check()