def test_permutation_importance_correlated_feature_regression(n_jobs): # Make sure that feature highly correlated to the target have a higher # importance rng = np.random.RandomState(42) n_repeats = 5 X, y = load_boston(return_X_y=True) y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape( -1, 1) X = np.hstack([X, y_with_little_noise]) clf = RandomForestRegressor(n_estimators=10, random_state=42) clf.fit(X, y) result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng, n_jobs=n_jobs) assert result.importances.shape == (X.shape[1], n_repeats) # the correlated feature with y was added as the last column and should # have the highest importance assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
from mrex import datasets from mrex.ensemble import GradientBoostingRegressor from mrex.ensemble import RandomForestRegressor from mrex.linear_model import LinearRegression from mrex.ensemble import VotingRegressor # Loading some example data X, y = datasets.load_boston(return_X_y=True) # Training classifiers reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) reg1.fit(X, y) reg2.fit(X, y) reg3.fit(X, y) ereg.fit(X, y) xt = X[:20] plt.figure() plt.plot(reg1.predict(xt), 'gd', label='GradientBoostingRegressor') plt.plot(reg2.predict(xt), 'b^', label='RandomForestRegressor') plt.plot(reg3.predict(xt), 'ys', label='LinearRegression') plt.plot(ereg.predict(xt), 'r*', label='VotingRegressor') plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
y, train_size=400, test_size=200, random_state=4) max_depth = 30 regr_multirf = MultiOutputRegressor( RandomForestRegressor(n_estimators=100, max_depth=max_depth, random_state=0)) regr_multirf.fit(X_train, y_train) regr_rf = RandomForestRegressor(n_estimators=100, max_depth=max_depth, random_state=2) regr_rf.fit(X_train, y_train) # Predict on new data y_multirf = regr_multirf.predict(X_test) y_rf = regr_rf.predict(X_test) # Plot the results plt.figure() s = 50 a = 0.4 plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k', c="navy", s=s, marker="s",