# shannonEnt = Trees.calcShannonEnt(myDat) # print shannonEnt # retDataSet = Trees.splitDataSet(myDat, 0, 1) # print retDataSet # bestFeature = Trees.chooseBestFeatureToSplit(myDat) # print bestFeature # tree = Trees.createTree(myDat, labels) # print tree ''' # TreePlotter ''' # TreePlotter.createPlot() tree = TreePlotter.retrieveTree(0) # numLeafs = TreePlotter.getNumLeafs(tree) # depth = TreePlotter.getTreeDepth(tree) # print "leafs nums: %d, depth: %d" % (numLeafs, depth) TreePlotter.createPlot(tree) ''' # Classify ''' # myDat, labels = Trees.createDataSet() # myTree = TreePlotter.retrieveTree(0)
0: 'lef node', 1: { 'level 2': { 0: 'leaf node', 1: 'leaf node' } }, 2: { 'lead 2': { 0: 'leaf node', 1: 'leaf node' } } } } tp.createPlot(myTree) # 图与网格结构的可视化 data = np.mat([[0.1, 0.1], [0.9, 0.5], [0.3, 0.6], [0.7, 0.2], [0.1, 0.7], [0.5, 0.1]]) m, n = np.shape(data) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(data.T[0].tolist(), data.T[1].tolist(), color='blue', marker='o') for point in data.tolist(): plt.annotate("(" + str(point[0]) + "," + str(point[1]) + ")", xy=(point[0], point[1])) xList = [] yList = [] for px, py in zip(data.T.tolist()[0], data.T.tolist()[1]):
2. Prepare: Parse tab-delimited lines. 3. Analyze: Quickly review data visually to make sure it was parsed properly. The final tree will be plotted with createPlot(). 4. Train: Use createTree() from section 3.1. 5. Test: Write a function to descend the tree for a given instance. 6. Use: Persist the tree data structure so it can be recalled without building the tree; then use it in any application. ''' from numpy import * import LoadData as ld import DicisionTree as dts import TreePlotter as tplt dataSet, labels = ld.createDataSet('lenses.txt') #testLabels = zeros(len(labels),1)#也不能采用 #testLabels = labels# 不能采用 lensesTree = dts.createTree(dataSet, labels) print lensesTree tplt.createPlot(lensesTree) print labels #利用训练数据做测试数据 dataSet, testLabels = ld.createDataSet('lenses.txt') errorCount = 0 numTestVec = 0 for testVec in dataSet: numTestVec += 1.0 classLabel = dts.classify(lensesTree, testLabels, testVec) if classLabel != testVec[-1]: errorCount += 1 errorRate = (float(errorCount) / numTestVec) print "the error rate of this test is: %f" % errorRate
#encoding=utf-8 import TreePlotter import Decide_Tree_library fr = open("lenses.txt") lenses = [line.strip().split("\t") for line in fr.readlines()] #readlines返回的是每一个列表 #变量列表元素,用strip形成新的列表作为主列表元素 lensesLabels = ["age", "prescript", "astigmatic", "tearrate"] lensesTree = Decide_Tree_library.createTree(lenses, lensesLabels) print lensesTree TreePlotter.createPlot(lensesTree)
pickle.dump(obj, file) file.close() def readDump(path): file = open(path, "rb") data = pickle.load(file) file.close() return data dtree = ID3Tree() if not os.path.exists(ID3SavePath): # 不是文件夹 print("生成数据") dtree.dataSet = loadDataSet(ID3LoadPath) dtree.labels = ["age", "revenue", "student", "credit"] # dtree.dataSet = dataSet # dtree.labels = labels print("训练数据") dtree.train() print("持久化数据") saveDump(ID3SavePath, dtree.getDumpData()) else: print("读取持久化") dtree.loadDumpData(readDump(ID3SavePath)) print("正在生成树") tp.createPlot(dtree.tree) print("预测结果为:", dtree.predict(dtree.tree, [0, 0, 0, 0]))
import operator import os import Trees myData,labels=Trees.createDataSet() print(myData) print(labels) print(Trees.calcShannonEnt(myData)) myData[0][-1]='maybe' print(myData) print(labels) print(Trees.calcShannonEnt(myData)) myDat,labels=Trees.createDataSet() print(myDat) print(Trees.splitDataSet(myDat,0,1)) print(Trees.splitDataSet(myData,0,0)) print(Trees.chooseBestFeatureToSplit(myDat)) myTree=Trees.createTree(myDat,labels) print(myTree) import TreePlotter TreePlotter.createPlot()
fw.close() def grabTree(filename): import pickle fr = open(filename,"rb") return pickle.load(fr) dataSet,labels = createDataSet() # shannonEnt = calcShannoEnt(dataSet) # print(shannonEnt) # retDataSet = splitDataSet(dataSet,0,1) # 取出第0个特征为1的数据,并去掉该特征 # print(dataSet) # print(retDataSet) # bestFeature = chooseBestFeatureToSplit(dataSet) # print(bestFeature) # myTree = createTree(dataSet,labels) # print(myTree) # 使用算法 fr = open('/Users/lixiwei-mac/Documents/IdeaProjects/MachineLearningInAction/DecisionTree/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age','prescript','astigmatic','tearRage'] lensesTree = createTree(lenses,lensesLabels) print(lensesTree) treePlotter.createPlot(lensesTree)
# 获取信息增益最大的特征及其增益 highest_gain_feature, highest_gain = get_feature_with_highest_Gain( data_set) # 增益小于ε,单一节点,返回实例数最大的类 if highest_gain < eps: return get_most_common_class(data_set) # 构建树 decision_tree_dict = {highest_gain_feature: {}} # 对每个最高增益特征的取值进行分割数据集,并进行递归调用生成树 feature_values = set(data_set[highest_gain_feature]) for one_value in feature_values: # 分割D divided_data_set = data_set[data_set[highest_gain_feature] == one_value] # 去除列,A = A - {Ak}i divided_data_set = divided_data_set.drop(labels=highest_gain_feature, axis=1) # 生成子树 decision_tree_dict[highest_gain_feature][ one_value] = generate_decision_tree(divided_data_set, eps) return decision_tree_dict if __name__ == '__main__': data_set = init_data('resources/lenses.txt') decision_tree = generate_decision_tree(data_set, eps=0.0001) print(decision_tree) TreePlotter.createPlot(decision_tree) # print(data_set[(data_set['tearRate'] == 'normal') & (data_set['astigmatic'] == 'yes') & (data_set['prescript'] == 'myope')]) # print(data_set[data_set['tearRate'] == 'reduced'])
def lensesStudy(filepath): fr = open(filepath) lenses = [inst.strip().split("\t") for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = createTree(lenses, lensesLabels) TreePlotter.createPlot(lensesTree)