/
AdaboostTest.py
49 lines (41 loc) · 2.01 KB
/
AdaboostTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as AS
import numpy as np
import pandas as pd
from sklearn.ensemble.weight_boosting import AdaBoostClassifier
import xgboost as xgb
from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
dataset = pd.read_csv('newPimaIndians.csv')
groups=dataset.groupby('outcome')
field=['glucose','blood pressure','bmi','pedigree','age']
#this for loop is for adding missing values
for f in field:
temp=groups[f].median()
for i in range(0,768):
if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==0):
dataset.loc[i,f]=temp[0]
if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==1):
dataset.loc[i,f]=temp[1]
dataset = dataset.values
X = dataset[:,0:len(dataset[0]) -1]
Y = dataset[:, (len(dataset[0])-1)]
#this is for decision tree
data=[[0,0,0,0,0]]
df=pd.DataFrame(data,columns=['feats','depth','split','max_leaf','acc'])
for feats in range(2, 7):
for dept in range(2, 6):
acc = 0
for split in range(5,40,5):
for leaf in range(7,10):
for i in range(20):
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
classifier=AdaBoostClassifier(DecisionTreeClassifier(max_depth=dept, max_features=feats,min_samples_split=split,splitter="best",criterion="entropy",max_leaf_nodes=leaf),learning_rate=1.0)
classifier.fit(X_train, Y_train)
res = classifier.score(X_test, Y_test)
acc = acc + res
acc = acc / 20
print('feats:', feats, 'Depth:', dept,'split:',split,'max_leaf',leaf, 'acc:', acc*100)
df=df.append({'feats':feats,'depth':dept,'split':split,'max_leaf':leaf,'acc':acc},ignore_index=True)
df.to_csv('Adaboost_result.csv', sep=',')