-
Notifications
You must be signed in to change notification settings - Fork 0
/
Random Forest.py
56 lines (47 loc) · 1.99 KB
/
Random Forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 13 21:18:03 2018
@author: sherry
"""
import sklearn
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
df = pd.read_csv('/Users/sherry/Downloads/wine.csv')
#df.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium','Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']
#print('Class labels', np.unique(df['Class label']))
df.head()
X, y = df.iloc[:, 1:].values,df.iloc[:, 13].values
X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.1,stratify=y)
from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier( n_estimators=500)
for i in [1,2,5,10,25,50,100,500]:
forest.n_estimators=i
# y_pred=forest.predict(X_test)
score=cross_val_score(estimator=forest,X=X_train,y=y_train,cv=10,scoring='accuracy')
# score=metrics.accuracy_score(y_test, y_pred)
print(np.mean(score))
forest.fit(X_train, y_train)
feat_labels = df.columns[:-1]
from sklearn.model_selection import GridSearchCV
param_grid={'n_estimators':[1,2,5,10,25,50,100,500]}
gs = GridSearchCV(estimator=forest,param_grid=param_grid,scoring='accuracy',cv=10)
gs.fit(X_train,y_train)
gs.best_estimator_.fit(X_train, y_train)
print(gs.best_params_)
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
for f in range(X_train.shape[1]):
print("%2d) %-*s %f" % (f + 1, 30, feat_labels[indices[f]], importances[indices[f]]))
plt.title('Feature Importance')
plt.bar(range(X_train.shape[1]), importances[indices], align='center')
plt.xticks(range(X_train.shape[1]),feat_labels[indices], rotation=90)
plt.xlim([-1, X_train.shape[1]])
plt.tight_layout()
plt.show()