コード例 #1
0
# -*- coding: utf-8 -*
import pandas as pd
import decision_tree

# data
data_file_encode = "gb18030"
with open("../data/watermelon_3.csv", mode='r',
          encoding=data_file_encode) as data_file:
    df = pd.read_csv(data_file)
'''
implementation of ID3
'''
root = decision_tree.TreeGenerate(df)

accuracy_scores = []

# k-folds cross prediction
n = len(df.index)
k = 5
for i in range(k):
    m = int(n / k)
    test = []
    for j in range(i * m, i * m + m):
        test.append(j)

    df_train = df.drop(test)
    df_test = df.iloc[test]
    root = decision_tree.TreeGenerate(df_train)

    # accuracy
    pred_true = 0
コード例 #2
0
with open("../data/watermelon_2.csv", mode='r',
          encoding=data_file_encode) as data_file:
    df = pd.read_csv(data_file)
'''
implementation of CART rely on decision_tree.py
'''
import decision_tree

# dicision tree visualization using pydotplus.graphviz
index_train = [0, 1, 2, 5, 6, 9, 13, 14, 15, 16]

df_train = df.iloc[index_train]
df_test = df.drop(index_train)

# generate a full tree
root = decision_tree.TreeGenerate(df_train)
decision_tree.DrawPNG(root, "decision_tree_full.png")
print("accuracy of full tree: %.3f" %
      decision_tree.PredictAccuracy(root, df_test))

# pre-purning
root = decision_tree.PrePurn(df_train, df_test)
decision_tree.DrawPNG(root, "decision_tree_pre.png")
print("accuracy of pre-purning tree: %.3f" %
      decision_tree.PredictAccuracy(root, df_test))

# # post-puring
root = decision_tree.TreeGenerate(df_train)
decision_tree.PostPurn(root, df_test)
decision_tree.DrawPNG(root, "decision_tree_post.png")
print("accuracy of post-purning tree: %.3f" %