def test_non_fitted_error(df_single): X, y = df_single # when fit is not called prior to transform with pytest.raises(NotFittedError): transformer = SmartCorrelatedSelection() transformer.transform(X) transformer = SmartCorrelatedSelection( variables=None, method="pearson", threshold=0.8, missing_values="raise", selection_method="model_performance", estimator=RandomForestClassifier(n_estimators=10, random_state=1), scoring="roc_auc", cv=3, )
def test_non_fitted_error(df_single): X, y = df_single # when fit is not called prior to transform with pytest.raises(NotFittedError): transformer = SmartCorrelatedSelection() transformer.transform(X)
def test_KFold_generators(df_test): X, y = df_test # Kfold sel = SmartCorrelatedSelection( variables=None, method="pearson", threshold=0.8, missing_values="raise", selection_method="model_performance", estimator=RandomForestClassifier(n_estimators=10, random_state=1), scoring="roc_auc", cv=KFold(n_splits=3), ) sel.fit(X, y) Xtransformed = sel.transform(X) # test fit attrs assert isinstance(sel.features_to_drop_, list) assert all([x for x in sel.features_to_drop_ if x in X.columns]) assert len(sel.features_to_drop_) < X.shape[1] assert not Xtransformed.empty assert all( [x for x in Xtransformed.columns if x not in sel.features_to_drop_]) # Stratfied sel = SmartCorrelatedSelection( variables=None, method="pearson", threshold=0.8, missing_values="raise", selection_method="model_performance", estimator=RandomForestClassifier(n_estimators=10, random_state=1), scoring="roc_auc", cv=StratifiedKFold(n_splits=3), ) sel.fit(X, y) Xtransformed = sel.transform(X) # test fit attrs assert isinstance(sel.features_to_drop_, list) assert all([x for x in sel.features_to_drop_ if x in X.columns]) assert len(sel.features_to_drop_) < X.shape[1] assert not Xtransformed.empty assert all( [x for x in Xtransformed.columns if x not in sel.features_to_drop_]) # None sel = SmartCorrelatedSelection( variables=None, method="pearson", threshold=0.8, missing_values="raise", selection_method="model_performance", estimator=RandomForestClassifier(n_estimators=10, random_state=1), scoring="roc_auc", cv=None, ) sel.fit(X, y) Xtransformed = sel.transform(X) # test fit attrs assert isinstance(sel.features_to_drop_, list) assert all([x for x in sel.features_to_drop_ if x in X.columns]) assert len(sel.features_to_drop_) < X.shape[1] assert not Xtransformed.empty assert all( [x for x in Xtransformed.columns if x not in sel.features_to_drop_])