Python _all_estimators 예제들, evalml.pipelines.components.utils._all_estimators Python 예제들

예제 #1

0

파일 보기

def test_all_estimators_check_fit(X_y_binary, ts_data, test_estimator_needs_fitting_false, helper_functions):
    estimators_to_check = [estimator for estimator in _all_estimators() if estimator not in [StackedEnsembleClassifier, StackedEnsembleRegressor, TimeSeriesBaselineEstimator]] + [test_estimator_needs_fitting_false]
    for component_class in estimators_to_check:
        if not component_class.needs_fitting:
            continue

        if ProblemTypes.TIME_SERIES_REGRESSION in component_class.supported_problem_types:
            X, y = ts_data
        else:
            X, y = X_y_binary

        component = helper_functions.safe_init_component_with_njobs_1(component_class)
        with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
            component.predict(X)
        if ProblemTypes.BINARY in component.supported_problem_types or ProblemTypes.MULTICLASS in component.supported_problem_types:
            with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
                component.predict_proba(X)

        with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
            component.feature_importance

        component.fit(X, y)

        if ProblemTypes.BINARY in component.supported_problem_types or ProblemTypes.MULTICLASS in component.supported_problem_types:
            component.predict_proba(X)

        component.predict(X)
        component.feature_importance

예제 #2

0

파일 보기

def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression, ts_data):
    for estimator in [estimator for estimator in _all_estimators() if estimator.model_family != ModelFamily.ENSEMBLE]:
        for problem_type in estimator.supported_problem_types:
            if problem_type == ProblemTypes.BINARY:
                X, y = X_y_binary
                num_classes = 2
                pipeline_class = BinaryClassificationPipeline
            elif problem_type == ProblemTypes.MULTICLASS:
                X, y = X_y_multi
                num_classes = 3
                pipeline_class = MulticlassClassificationPipeline
            elif problem_type == ProblemTypes.REGRESSION:
                X, y = X_y_regression
                pipeline_class = RegressionPipeline

            elif problem_type in [ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY]:
                continue

            evalml_pipeline = pipeline_class([estimator])
            scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline)
            scikit_estimator.fit(X, y)
            y_pred = scikit_estimator.predict(X)
            assert len(y_pred) == len(y)
            assert not np.isnan(y_pred).all()
            if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
                y_pred_proba = scikit_estimator.predict_proba(X)
                assert y_pred_proba.shape == (len(y), num_classes)
                assert not np.isnan(y_pred_proba).all().all()

예제 #3

0

파일 보기

파일: test_utils.py 프로젝트: actuarial-tools/evalml

def test_scikit_learn_wrapper(X_y_binary, X_y_multi, X_y_regression):
    for estimator in [
            estimator for estimator in _all_estimators()
            if estimator.model_family != ModelFamily.ENSEMBLE
    ]:
        for problem_type in estimator.supported_problem_types:
            if problem_type == ProblemTypes.BINARY:
                X, y = X_y_binary
                num_classes = 2
            elif problem_type == ProblemTypes.MULTICLASS:
                X, y = X_y_multi
                num_classes = 3
            elif problem_type == ProblemTypes.REGRESSION:
                X, y = X_y_regression
            elif problem_type in [
                    ProblemTypes.TIME_SERIES_REGRESSION,
                    ProblemTypes.TIME_SERIES_MULTICLASS,
                    ProblemTypes.TIME_SERIES_BINARY
            ]:
                # Skipping because make_pipeline_from_components does not yet work for time series.
                continue

            evalml_pipeline = make_pipeline_from_components([estimator()],
                                                            problem_type)
            scikit_estimator = scikit_learn_wrapped_estimator(evalml_pipeline)
            scikit_estimator.fit(X, y)
            y_pred = scikit_estimator.predict(X)
            assert len(y_pred) == len(y)
            assert not np.isnan(y_pred).all()
            if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
                y_pred_proba = scikit_estimator.predict_proba(X)
                assert y_pred_proba.shape == (len(y), num_classes)
                assert not np.isnan(y_pred_proba).all().all()

예제 #4

0

파일 보기

def test_all_transformers_needs_fitting():
    for component_class in _all_transformers() + _all_estimators():
        if component_class.__name__ in [
                'DropColumns', 'SelectColumns', 'DelayedFeatureTransformer'
        ]:
            assert not component_class.needs_fitting
        else:
            assert component_class.needs_fitting

예제 #5

0

파일 보기

def stackable_regressors(helper_functions):
    stackable_regressors = []
    for estimator_class in _all_estimators():
        supported_problem_types = [handle_problem_types(pt) for pt in estimator_class.supported_problem_types]
        if (set(supported_problem_types) == {ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION} and
            estimator_class.model_family not in _nonstackable_model_families and
                estimator_class.model_family != ModelFamily.ENSEMBLE):
            stackable_regressors.append(helper_functions.safe_init_component_with_njobs_1(estimator_class))
    return stackable_regressors

예제 #6

0

파일 보기

파일: conftest.py 프로젝트: passion4energy/evalml

def stackable_classifiers(helper_functions):
    stackable_classifiers = []
    for estimator_class in _all_estimators():
        supported_problem_types = [handle_problem_types(pt) for pt in estimator_class.supported_problem_types]
        if (set(supported_problem_types) == {ProblemTypes.BINARY, ProblemTypes.MULTICLASS,
                                             ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS} and
            estimator_class.model_family not in _nonstackable_model_families and
                estimator_class.model_family != ModelFamily.ENSEMBLE):
            stackable_classifiers.append(estimator_class)
    return stackable_classifiers

예제 #7

0

파일 보기

파일: conftest.py 프로젝트: stjordanis/evalml

def all_pipeline_classes():
    all_possible_pipeline_classes = []
    for estimator in [
            estimator for estimator in _all_estimators()
            if estimator != StackedEnsembleClassifier
            and estimator != StackedEnsembleRegressor
    ]:
        for problem_type in estimator.supported_problem_types:
            all_possible_pipeline_classes.append(
                create_mock_pipeline(estimator, problem_type))
    return all_possible_pipeline_classes

예제 #8

0

파일 보기

def test_serialization_protocol(mock_cloudpickle_dump, tmpdir):
    path = os.path.join(str(tmpdir), 'pipe.pkl')
    component = LogisticRegressionClassifier()

    component.save(path)
    assert len(mock_cloudpickle_dump.call_args_list) == 1
    assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == cloudpickle.DEFAULT_PROTOCOL

    mock_cloudpickle_dump.reset_mock()

    component.save(path, pickle_protocol=42)
    assert len(mock_cloudpickle_dump.call_args_list) == 1
    assert mock_cloudpickle_dump.call_args_list[0][1]['protocol'] == 42


@pytest.mark.parametrize("estimator_class", _all_estimators())
def test_estimators_accept_all_kwargs(estimator_class,
                                      logistic_regression_binary_pipeline_class,
                                      linear_regression_pipeline_class):
    try:
        estimator = estimator_class()
    except EnsembleMissingPipelinesError:
        if estimator_class == StackedEnsembleClassifier:
            estimator = estimator_class(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})])
        elif estimator_class == StackedEnsembleRegressor:
            estimator = estimator_class(input_pipelines=[linear_regression_pipeline_class(parameters={})])
    if estimator._component_obj is None:
        pytest.skip(f"Skipping {estimator_class} because does not have component object.")
    if estimator_class.model_family == ModelFamily.ENSEMBLE:
        params = estimator.parameters
    else: