コード例 #1
0
def test_default_model():
    df = generate_unstructured_test_data(1000)
    df_checkpoint = df.copy()

    features = ["A", "B", "C", "D", "E"]

    lofo = LOFOImportance(df,
                          features,
                          'target',
                          cv=4,
                          scoring='neg_mean_absolute_error')
    importance_df = lofo.get_importance()
    assert len(features) == importance_df.shape[
        0], "Missing importance value for some features!"

    lofo = LOFOImportance(df,
                          features,
                          'binary_target',
                          cv=4,
                          scoring='roc_auc')
    importance_df = lofo.get_importance()

    assert df.equals(df_checkpoint), "LOFOImportance mutated the dataframe!"
    assert importance_df["feature"].values[
        0] == "E", "Most important feature is different than E!"
コード例 #2
0
def test_default_model():
    df = generate_unstructured_test_data(1000)
    features = ["A", "B", "C", "D", "E"]
    dataset = Dataset(df=df, target="target", features=features)

    lofo = LOFOImportance(dataset, cv=4, scoring='neg_mean_absolute_error')
    importance_df = lofo.get_importance()
    assert "E" in lofo.fit_params[
        "categorical_feature"], "Categorical feature is not detected!"
    assert len(features) == importance_df.shape[
        0], "Missing importance value for some features!"

    df_checkpoint = df.copy()

    dataset = Dataset(df=df, target="binary_target", features=features)
    lofo = LOFOImportance(dataset, cv=4, scoring='roc_auc')
    importance_df = lofo.get_importance()

    assert "E" in lofo.fit_params[
        "categorical_feature"], "Categorical feature is not detected!"
    assert df.equals(df_checkpoint), "LOFOImportance mutated the dataframe!"
    assert importance_df["feature"].values[
        0] == "E", "Most important feature is different than E!"
コード例 #3
0
def test_dataset():
    df = generate_unstructured_test_data(1000, text=True)
    features = ["A", "B", "C", "D", "D2", "E"]

    # Exception: feature group row count is not equal to the features' row count
    feature_groups = {
        "interactions": df[["A", "B"]].values[:10] * df[["C", "D"]].values[:10]
    }
    with pytest.raises(Exception):
        assert Dataset(df=df,
                       target="binary_target",
                       features=features,
                       feature_groups=feature_groups)

    # Exception: Feature group name A is in use by other features
    feature_groups = {"A": df[["A", "B"]].values * df[["C", "D"]].values}
    with pytest.raises(Exception):
        assert Dataset(df=df,
                       target="binary_target",
                       features=features,
                       feature_groups=feature_groups)

    # Exception: Feature group type is not numpy.ndarray or scipy.csr.csr_matrix
    feature_groups = {"F": df[["A", "B"]]}
    with pytest.raises(Exception):
        assert Dataset(df=df,
                       target="binary_target",
                       features=features,
                       feature_groups=feature_groups)

    d = Dataset(df=df,
                target="binary_target",
                features=features,
                feature_groups={"F": df[["A", "B"]].values},
                auto_group_threshold=0.5)
    assert "D" not in d.feature_names and "D2" not in d.feature_names
    assert "D & D2" in d.feature_names and "F" in d.feature_groups.keys()