def test_classifier(iris_train): X, y, feature_names, target_names = iris_train clf = LogisticRegression().fit(X, y) assert is_classifier(clf) perm = PermutationImportance(clf, random_state=42).fit(X, y) assert is_classifier(perm) assert (perm.classes_ == [0, 1, 2]).all() assert np.allclose(clf.predict(X), perm.predict(X)) assert np.allclose(clf.predict_proba(X), perm.predict_proba(X)) assert np.allclose(clf.predict_log_proba(X), perm.predict_log_proba(X)) assert np.allclose(clf.decision_function(X), perm.decision_function(X))
top=None, # show permutation importances for all features feature_names=feature_names ) from sklearn.metrics import mean_squared_error, r2_score # Coefficient of determination r2 for the training set pipeline_score = permuter.score(X_train_transformed,y_train) print("Coefficient of determination r2 for the training set.: ", pipeline_score) # Coefficient of determination r2 for the validation set pipeline_score = permuter.score(X_val_transformed,y_val) print("Coefficient of determination r2 for the validation set.: ", pipeline_score) # The mean squared error y_pred = permuter.predict(X_val_transformed) print("Mean squared error: %.2f"% mean_squared_error(y_val, y_pred)) # Thus, Density remains important according to feature permutation than according to feature importance in the Random Fo # Use importances for feature selection print('Shape before removing features:', X_train.shape) # Remove features of 0 importance zero_importance = 0.0 mask = permuter.feature_importances_ > zero_importance features1 = X_train.columns[mask] X_train = X_train[features1] print('Shape after removing features:', X_train.shape) # Random forest classifier with 22 features X_val = X_val[features1]