def test_feature_importances_w_subestimators(): rf = Mock() tree_1 = Mock() tree_1.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03]) tree_2 = Mock() tree_2.feature_importances_ = np.array([0.10, 0.10, 0.8, 0.06, 0.01]) tree_3 = Mock() tree_3.feature_importances_ = np.array([0.09, 0.01, 0.9, 0.12, 0.02]) tree_4 = Mock() tree_4.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.01]) rf.estimators_ = [tree_1, tree_2, tree_3, tree_4] rf.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03]) plot.feature_importances(rf)
u'seaborn-colorblind', u'seaborn-deep', u'seaborn-whitegrid', u'seaborn-bright', u'seaborn-poster', u'seaborn-muted', u'seaborn-paper', u'seaborn-white', u'seaborn-pastel', u'seaborn-dark', u'seaborn-dark-palette'] ''' my_dpi = 96 plt.style.use('ggplot') plt.rcParams["figure.figsize"] = (11.69, 8.27) plot.feature_importances(classifier, feature_names=feature_names) # plt.xlabel('Feature Names') plt.ylabel('Feature Importance Score (%)') # plt.title('Features Importance') plt.gca().xaxis.set_minor_formatter(ticker.NullFormatter()) plt.xticks(rotation=90) f = plt.gcf() f.subplots_adjust(bottom=0.4) plt.savefig("out-stats-graphs/RF_Feature_Importance1.pdf") plt.style.use('fivethirtyeight') plt.rcParams["figure.figsize"] = (1164 / my_dpi, 1024 / my_dpi) plot.feature_importances(classifier, feature_names=feature_names) plt.ylabel('Feature Importance Score (%)') plt.xlabel('Feature Names') plt.gca().xaxis.set_minor_formatter(ticker.NullFormatter())
def test_feature_importances_feature_names(): feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03]) feature_names = ['thing_a', 'thing_b', 'thing_c', 'thing_d', 'thing_e'] plot.feature_importances(feature_importances, feature_names=feature_names)
def test_feature_importances_top3(): feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03]) plot.feature_importances(feature_importances, top_n=3)
def test_feature_importances_from_array(): feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03]) plot.feature_importances(feature_importances)
def test_feature_importances(): model = Mock() model.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03]) plot.feature_importances(model)
def test_feature_importances(self): with self.assertRaisesRegexp(ValueError, "needed to plot"): plot.feature_importances(None)
""" Feature importances plot """ import matplotlib.pyplot as plt from sklearn import datasets from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn_evaluation import plot X, y = datasets.make_classification(200, 20, n_informative=5, class_sep=0.65) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RandomForestClassifier(n_estimators=1) model.fit(X_train, y_train) # plot all features ax = plot.feature_importances(model) plt.show() # only top 5 plot.feature_importances(model, top_n=5) plt.show()
import matplotlib.pyplot as plt from sklearn import datasets from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import train_test_split from sklearn_evaluation import plot data = datasets.make_classification(200, 10, 5, class_sep=0.65) X = data[0] y = data[1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) est = RandomForestClassifier() est.fit(X_train, y_train) plot.feature_importances(est, top_n=5) plt.show()