Python SmartCorrelatedSelection.fit Examples

Programming Language: Python

Namespace/Package Name: feature_engine.selection

Method/Function: fit

Examples at hotexamples.com: 3

Python SmartCorrelatedSelection.fit - 3 examples found. These are the top rated real world Python examples of feature_engine.selection.SmartCorrelatedSelection.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SmartCorrelatedSelection(15)

fit_transform(7)

fit(3)

transform(3)

Example #1

Show file

File: test_smart_correlation_selection.py Project: solegalli/feature_engine

def test_error_if_select_model_performance_and_y_is_none(df_single):
    X, y = df_single

    transformer = SmartCorrelatedSelection(
        selection_method="model_performance",
        estimator=RandomForestClassifier(n_estimators=10, random_state=1),
        scoring="roc_auc",
    )

    with pytest.raises(ValueError):
        transformer.fit(X)

Example #2

Show file

File: test_smart_correlation_selection.py Project: tajinderpalsingh61/feature_engine

def test_error_if_select_model_performance_and_y_is_none(df_single):
    X, y = df_single

    transformer = SmartCorrelatedSelection(
        variables=None,
        method="pearson",
        threshold=0.8,
        missing_values="raise",
        selection_method="model_performance",
        estimator=RandomForestClassifier(n_estimators=10, random_state=1),
        scoring="roc_auc",
        cv=3,
    )

    with pytest.raises(ValueError):
        transformer.fit(X)

Example #3

Show file

File: test_smart_correlation_selection.py Project: thibaultbl/feature_engine

def test_KFold_generators(df_test):
    X, y = df_test

    # Kfold
    sel = SmartCorrelatedSelection(
        variables=None,
        method="pearson",
        threshold=0.8,
        missing_values="raise",
        selection_method="model_performance",
        estimator=RandomForestClassifier(n_estimators=10, random_state=1),
        scoring="roc_auc",
        cv=KFold(n_splits=3),
    )
    sel.fit(X, y)
    Xtransformed = sel.transform(X)

    # test fit attrs
    assert isinstance(sel.features_to_drop_, list)
    assert all([x for x in sel.features_to_drop_ if x in X.columns])
    assert len(sel.features_to_drop_) < X.shape[1]
    assert not Xtransformed.empty
    assert all(
        [x for x in Xtransformed.columns if x not in sel.features_to_drop_])

    # Stratfied
    sel = SmartCorrelatedSelection(
        variables=None,
        method="pearson",
        threshold=0.8,
        missing_values="raise",
        selection_method="model_performance",
        estimator=RandomForestClassifier(n_estimators=10, random_state=1),
        scoring="roc_auc",
        cv=StratifiedKFold(n_splits=3),
    )
    sel.fit(X, y)
    Xtransformed = sel.transform(X)

    # test fit attrs
    assert isinstance(sel.features_to_drop_, list)
    assert all([x for x in sel.features_to_drop_ if x in X.columns])
    assert len(sel.features_to_drop_) < X.shape[1]
    assert not Xtransformed.empty
    assert all(
        [x for x in Xtransformed.columns if x not in sel.features_to_drop_])

    # None
    sel = SmartCorrelatedSelection(
        variables=None,
        method="pearson",
        threshold=0.8,
        missing_values="raise",
        selection_method="model_performance",
        estimator=RandomForestClassifier(n_estimators=10, random_state=1),
        scoring="roc_auc",
        cv=None,
    )
    sel.fit(X, y)
    Xtransformed = sel.transform(X)

    # test fit attrs
    assert isinstance(sel.features_to_drop_, list)
    assert all([x for x in sel.features_to_drop_ if x in X.columns])
    assert len(sel.features_to_drop_) < X.shape[1]
    assert not Xtransformed.empty
    assert all(
        [x for x in Xtransformed.columns if x not in sel.features_to_drop_])