temp=groups[f].median()
    for i in range(0,768):
        if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==0):
            dataset.loc[i,f]=temp[0]
        if (dataset.loc[i,f]==0) & (dataset.loc[i,'outcome']==1):
            dataset.loc[i,f]=temp[1]


dataset = dataset.values
X = dataset[:,0:len(dataset[0]) -1]
Y = dataset[:, (len(dataset[0])-1)]


#this is for decision tree
data=[[0,0,0,0,0]]
df=pd.DataFrame(data,columns=['feats','depth','split','max_leaf','acc'])
for feats in range(2, 7):
    for dept in range(2, 6):
        acc = 0
        for split in range(5,40,5):
            for leaf in range(7,10):
                for i in range(20):
                    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
                    classifier=AdaBoostClassifier(DecisionTreeClassifier(max_depth=dept, max_features=feats,min_samples_split=split,splitter="best",criterion="entropy",max_leaf_nodes=leaf),learning_rate=1.0)
                    classifier.fit(X_train, Y_train)
                    res = classifier.score(X_test, Y_test)
                    acc = acc + res
                acc = acc / 20    
                print('feats:', feats, 'Depth:', dept,'split:',split,'max_leaf',leaf, 'acc:', acc*100)
                df=df.append({'feats':feats,'depth':dept,'split':split,'max_leaf':leaf,'acc':acc},ignore_index=True)
df.to_csv('Adaboost_result.csv', sep=',')
Example #2
0
for f in field:
    print("field", f)
    temp = groups[f].median()
    for i in range(0, 100945):
        if (isnull(dataset.loc[i, f])):
            condition = dataset.loc[i, '_conds']
            dataset.loc[i, f] = temp[condition]
            print("values: ", dataset.loc[i, f], " ; ", temp[condition])

dataset['_heatindexm'].fillna(dataset['_heatindexm'].median(), inplace=True)
dataset['_hum'].fillna(dataset['_hum'].median(), inplace=True)
dataset['_tempm'].fillna(dataset['_tempm'].median(), inplace=True)
dataset['_vism'].fillna(dataset['_vism'].median(), inplace=True)

dataset = dataset.values
X = dataset[:, 1:len(dataset[0])]
Y = dataset[:, 0]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
for dept in range(5, 8):
    for feats in range(5, 8):
        classifier = AdaBoostClassifier(DecisionTreeClassifier(
            max_depth=dept,
            max_features=feats,
            splitter="best",
            criterion="entropy"),
                                        learning_rate=1.0)
        classifier.fit(X_train, Y_train)
        print("depth: ", dept, "features: ", feats)
        print("Score", classifier.score(X_train, Y_train))