temp=groups[f].median() for i in range(0,768): if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==0): dataset.loc[i,f]=temp[0] if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==1): dataset.loc[i,f]=temp[1] dataset = dataset.values X = dataset[:,0:len(dataset[0]) -1] Y = dataset[:, (len(dataset[0])-1)] #this is for decision tree data=[[0,0,0,0,0]] df=pd.DataFrame(data,columns=['feats','depth','split','max_leaf','acc']) for feats in range(2, 7): for dept in range(2, 6): acc = 0 for split in range(5,40,5): for leaf in range(7,10): for i in range(20): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3) classifier=AdaBoostClassifier(DecisionTreeClassifier(max_depth=dept, max_features=feats,min_samples_split=split,splitter="best",criterion="entropy",max_leaf_nodes=leaf),learning_rate=1.0) classifier.fit(X_train, Y_train) res = classifier.score(X_test, Y_test) acc = acc + res acc = acc / 20 print('feats:', feats, 'Depth:', dept,'split:',split,'max_leaf',leaf, 'acc:', acc*100) df=df.append({'feats':feats,'depth':dept,'split':split,'max_leaf':leaf,'acc':acc},ignore_index=True) df.to_csv('Adaboost_result.csv', sep=',')
for f in field: print("field", f) temp = groups[f].median() for i in range(0, 100945): if (isnull(dataset.loc[i, f])): condition = dataset.loc[i, '_conds'] dataset.loc[i, f] = temp[condition] print("values: ", dataset.loc[i, f], " ; ", temp[condition]) dataset['_heatindexm'].fillna(dataset['_heatindexm'].median(), inplace=True) dataset['_hum'].fillna(dataset['_hum'].median(), inplace=True) dataset['_tempm'].fillna(dataset['_tempm'].median(), inplace=True) dataset['_vism'].fillna(dataset['_vism'].median(), inplace=True) dataset = dataset.values X = dataset[:, 1:len(dataset[0])] Y = dataset[:, 0] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3) for dept in range(5, 8): for feats in range(5, 8): classifier = AdaBoostClassifier(DecisionTreeClassifier( max_depth=dept, max_features=feats, splitter="best", criterion="entropy"), learning_rate=1.0) classifier.fit(X_train, Y_train) print("depth: ", dept, "features: ", feats) print("Score", classifier.score(X_train, Y_train))