def test_all_estimators_check_fit(X_y_binary, ts_data, test_estimator_needs_fitting_false, helper_functions): estimators_to_check = [estimator for estimator in _all_estimators() if estimator not in [StackedEnsembleClassifier, StackedEnsembleRegressor, TimeSeriesBaselineEstimator]] + [test_estimator_needs_fitting_false] for component_class in estimators_to_check: if not component_class.needs_fitting: continue if ProblemTypes.TIME_SERIES_REGRESSION in component_class.supported_problem_types: X, y = ts_data else: X, y = X_y_binary component = helper_functions.safe_init_component_with_njobs_1(component_class) with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'): component.predict(X) if ProblemTypes.BINARY in component.supported_problem_types or ProblemTypes.MULTICLASS in component.supported_problem_types: with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'): component.predict_proba(X) with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'): component.feature_importance component.fit(X, y) if ProblemTypes.BINARY in component.supported_problem_types or ProblemTypes.MULTICLASS in component.supported_problem_types: component.predict_proba(X) component.predict(X) component.feature_importance
def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression, ts_data): for estimator in [estimator for estimator in _all_estimators() if estimator.model_family != ModelFamily.ENSEMBLE]: for problem_type in estimator.supported_problem_types: if problem_type == ProblemTypes.BINARY: X, y = X_y_binary num_classes = 2 pipeline_class = BinaryClassificationPipeline elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi num_classes = 3 pipeline_class = MulticlassClassificationPipeline elif problem_type == ProblemTypes.REGRESSION: X, y = X_y_regression pipeline_class = RegressionPipeline elif problem_type in [ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY]: continue evalml_pipeline = pipeline_class([estimator]) scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline) scikit_estimator.fit(X, y) y_pred = scikit_estimator.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred).all() if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]: y_pred_proba = scikit_estimator.predict_proba(X) assert y_pred_proba.shape == (len(y), num_classes) assert not np.isnan(y_pred_proba).all().all()
def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression): for estimator in [ estimator for estimator in _all_estimators() if estimator.model_family != ModelFamily.ENSEMBLE ]: for problem_type in estimator.supported_problem_types: if problem_type == ProblemTypes.BINARY: X, y = X_y_binary num_classes = 2 elif problem_type == ProblemTypes.MULTICLASS: X, y = X_y_multi num_classes = 3 elif problem_type == ProblemTypes.REGRESSION: X, y = X_y_regression elif problem_type in [ ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY ]: # Skipping because make_pipeline_from_components does not yet work for time series. continue evalml_pipeline = make_pipeline_from_components([estimator()], problem_type) scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline) scikit_estimator.fit(X, y) y_pred = scikit_estimator.predict(X) assert len(y_pred) == len(y) assert not np.isnan(y_pred).all() if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]: y_pred_proba = scikit_estimator.predict_proba(X) assert y_pred_proba.shape == (len(y), num_classes) assert not np.isnan(y_pred_proba).all().all()
def test_all_transformers_needs_fitting(): for component_class in _all_transformers() + _all_estimators(): if component_class.__name__ in [ 'DropColumns', 'SelectColumns', 'DelayedFeatureTransformer' ]: assert not component_class.needs_fitting else: assert component_class.needs_fitting
def stackable_regressors(helper_functions): stackable_regressors = [] for estimator_class in _all_estimators(): supported_problem_types = [handle_problem_types(pt) for pt in estimator_class.supported_problem_types] if (set(supported_problem_types) == {ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION} and estimator_class.model_family not in _nonstackable_model_families and estimator_class.model_family != ModelFamily.ENSEMBLE): stackable_regressors.append(helper_functions.safe_init_component_with_njobs_1(estimator_class)) return stackable_regressors
def stackable_classifiers(helper_functions): stackable_classifiers = [] for estimator_class in _all_estimators(): supported_problem_types = [handle_problem_types(pt) for pt in estimator_class.supported_problem_types] if (set(supported_problem_types) == {ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS} and estimator_class.model_family not in _nonstackable_model_families and estimator_class.model_family != ModelFamily.ENSEMBLE): stackable_classifiers.append(estimator_class) return stackable_classifiers
def all_pipeline_classes(): all_possible_pipeline_classes = [] for estimator in [ estimator for estimator in _all_estimators() if estimator != StackedEnsembleClassifier and estimator != StackedEnsembleRegressor ]: for problem_type in estimator.supported_problem_types: all_possible_pipeline_classes.append( create_mock_pipeline(estimator, problem_type)) return all_possible_pipeline_classes
def test_serialization_protocol(mock_cloudpickle_dump, tmpdir): path = os.path.join(str(tmpdir), 'pipe.pkl') component = LogisticRegressionClassifier() component.save(path) assert len(mock_cloudpickle_dump.call_args_list) == 1 assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == cloudpickle.DEFAULT_PROTOCOL mock_cloudpickle_dump.reset_mock() component.save(path, pickle_protocol=42) assert len(mock_cloudpickle_dump.call_args_list) == 1 assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == 42 @pytest.mark.parametrize("estimator_class", _all_estimators()) def test_estimators_accept_all_kwargs(estimator_class, logistic_regression_binary_pipeline_class, linear_regression_pipeline_class): try: estimator = estimator_class() except EnsembleMissingPipelinesError: if estimator_class == StackedEnsembleClassifier: estimator = estimator_class(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})]) elif estimator_class == StackedEnsembleRegressor: estimator = estimator_class(input_pipelines=[linear_regression_pipeline_class(parameters={})]) if estimator._component_obj is None: pytest.skip(f"Skipping {estimator_class} because does not have component object.") if estimator_class.model_family == ModelFamily.ENSEMBLE: params = estimator.parameters else: