Beispiel #1
0
def test_df_cat_and_num_variables_roc_auc(df_test_num_cat):
    X, y = df_test_num_cat

    sel = SelectByTargetMeanPerformance(
        variables=None,
        scoring="roc_auc_score",
        threshold=0.6,
        bins=3,
        strategy="equal_width",
        cv=2,
        random_state=1,
    )

    sel.fit(X, y)

    # expected result
    Xtransformed = X[["var_A", "var_B"]]
    performance_dict = {"var_A": 0.841, "var_B": 0.776, "var_C": 0.481, "var_D": 0.496}

    # test init params
    assert sel.variables == list(X.columns)
    assert sel.scoring == "roc_auc_score"
    assert sel.threshold == 0.60
    assert sel.cv == 2
    assert sel.random_state == 1

    # test fit attrs
    assert sel.variables_categorical_ == ["var_A", "var_B"]
    assert sel.variables_numerical_ == ["var_C", "var_D"]
    assert sel.selected_features_ == ["var_A", "var_B"]
    assert all(
        np.round(sel.feature_performance_[f], 3) == performance_dict[f]
        for f in sel.feature_performance_.keys()
    )
    # test transform output
    pd.testing.assert_frame_equal(sel.transform(X), Xtransformed)
def test_categorical_variables_roc_auc(df_test_num_cat):
    X, y = df_test_num_cat
    X = X[["var_A", "var_B"]]

    sel = SelectByTargetMeanPerformance(
        variables=None,
        scoring="roc_auc_score",
        threshold=0.78,
        cv=2,
        random_state=1,
    )

    sel.fit(X, y)

    # expected result
    Xtransformed = X["var_A"].to_frame()
    # performance_dict = {"var_A": 0.841, "var_B": 0.776}

    # test init params
    assert sel.variables is None
    assert sel.scoring == "roc_auc_score"
    assert sel.threshold == 0.78
    assert sel.cv == 2
    assert sel.random_state == 1

    # test fit attrs
    assert sel.variables_ == list(X.columns)
    assert sel.variables_categorical_ == list(X.columns)
    assert sel.variables_numerical_ == []
    assert sel.features_to_drop_ == ["var_B"]
    # assert all(
    #     np.round(sel.feature_performance_[f], 3) == performance_dict[f]
    #     for f in sel.feature_performance_.keys()
    # )
    # test transform output
    pd.testing.assert_frame_equal(sel.transform(X), Xtransformed)