Esempio n. 1
0
def experiment():
    accuracy = [[], []]
    for i in range(2):
        for j in range(0, 6):
            cost_mode = "gini-index" if i == 0 else "cross-entropy"
            acc = main(cost_mode=cost_mode, max_depth=j)
            accuracy[i].append(acc)
    print(accuracy)
    return accuracy
df_train = pd.DataFrame(X_train)
df_train.columns = feature_names
df_train['class'] = y_train
df_train.to_csv(data_file_train)

df_valid = pd.DataFrame(X_valid)
df_valid.columns = feature_names
df_valid['class'] = y_valid
df_valid.to_csv(data_file_valid)

df_test = pd.DataFrame(X_test)
df_test.columns = feature_names
df_test['class'] = y_test
df_test.to_csv(data_file_test)

##### learning the tree and testing
# add command line in debug/run arguments as: https://github.com/ryanmadden/decision-tree
import decision_tree
decision_tree.main()

# the result -> results.csv
from sklearn import metrics
df_result = pd.read_csv(open('results.csv', 'r'))
y_pred = df_result['class'].values
accuracy = metrics.accuracy_score(y_test, y_pred)  
print('accuracy of C4.5 tree: %.3f' % accuracy)

print(' - PY131 -')

Esempio n. 3
0
def main():
    decision_tree.main()
    neural_network.main()
    k_nearest_neighbors.main()
    boosted_dtree.main()
    svm.main()
Esempio n. 4
0
def train_decision_tree(X, y, tscv, grid_search=False):
    import decision_tree as dtree
    dtree.main(X, y, tscv.split(X))
    if grid_search:
        dtree.grid_search(X, y, tscv.split(X))