def test_default_model(): df = generate_unstructured_test_data(1000) df_checkpoint = df.copy() features = ["A", "B", "C", "D", "E"] lofo = LOFOImportance(df, features, 'target', cv=4, scoring='neg_mean_absolute_error') importance_df = lofo.get_importance() assert len(features) == importance_df.shape[ 0], "Missing importance value for some features!" lofo = LOFOImportance(df, features, 'binary_target', cv=4, scoring='roc_auc') importance_df = lofo.get_importance() assert df.equals(df_checkpoint), "LOFOImportance mutated the dataframe!" assert importance_df["feature"].values[ 0] == "E", "Most important feature is different than E!"
def test_default_model(): df = generate_unstructured_test_data(1000) features = ["A", "B", "C", "D", "E"] dataset = Dataset(df=df, target="target", features=features) lofo = LOFOImportance(dataset, cv=4, scoring='neg_mean_absolute_error') importance_df = lofo.get_importance() assert "E" in lofo.fit_params[ "categorical_feature"], "Categorical feature is not detected!" assert len(features) == importance_df.shape[ 0], "Missing importance value for some features!" df_checkpoint = df.copy() dataset = Dataset(df=df, target="binary_target", features=features) lofo = LOFOImportance(dataset, cv=4, scoring='roc_auc') importance_df = lofo.get_importance() assert "E" in lofo.fit_params[ "categorical_feature"], "Categorical feature is not detected!" assert df.equals(df_checkpoint), "LOFOImportance mutated the dataframe!" assert importance_df["feature"].values[ 0] == "E", "Most important feature is different than E!"
def test_dataset(): df = generate_unstructured_test_data(1000, text=True) features = ["A", "B", "C", "D", "D2", "E"] # Exception: feature group row count is not equal to the features' row count feature_groups = { "interactions": df[["A", "B"]].values[:10] * df[["C", "D"]].values[:10] } with pytest.raises(Exception): assert Dataset(df=df, target="binary_target", features=features, feature_groups=feature_groups) # Exception: Feature group name A is in use by other features feature_groups = {"A": df[["A", "B"]].values * df[["C", "D"]].values} with pytest.raises(Exception): assert Dataset(df=df, target="binary_target", features=features, feature_groups=feature_groups) # Exception: Feature group type is not numpy.ndarray or scipy.csr.csr_matrix feature_groups = {"F": df[["A", "B"]]} with pytest.raises(Exception): assert Dataset(df=df, target="binary_target", features=features, feature_groups=feature_groups) d = Dataset(df=df, target="binary_target", features=features, feature_groups={"F": df[["A", "B"]].values}, auto_group_threshold=0.5) assert "D" not in d.feature_names and "D2" not in d.feature_names assert "D & D2" in d.feature_names and "F" in d.feature_groups.keys()