def testClass(): myDat, labels = tree.createDataSet() myTree = tree.createTree(myDat, labels) # persistenting the decision tree tree.storeTree(myTree, 'myTree.train') myTree2 = tree.grabTree('myTree.train') testVec = [1, 0] print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec) testVec = [1, 1] print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
def classifyTest(): import tree as t import treePlotter as tp dataSet, labels = t.createDataSet() myTree = t.createTree(dataSet, labels.copy()) print(myTree) print(labels) print(classify(myTree, labels, [1, 0])) print(classify(myTree, labels, [1, 1])) tp.createPlot(myTree)
#!/usr/bin/python #encoding:utf-8 import tree from ScrolledText import example import plottree dataSet , labels = tree.createDataSet(); print tree.createTree(dataSet, labels) plottree.createPlot()
# -*- coding: utf-8 -*- import tree import copy dataset, label = tree.createDataSet() print(label) # 这里仅仅用 labels=label是不行的,因为它们指向同一个内存 labels = copy.deepcopy(label) myTree = tree.createTree(dataset, labels) # print(myTree) print(label) testResult = tree.classify(myTree, label, [1, 1]) print(testResult) tree.storeTree(myTree, "F:\NatureRecognition/tree.txt") tt = tree.grabTree("F:\NatureRecognition/tree.txt") print(tt)
import tree # 计算数据集的香农熵 #myDat, labels = tree.createDataSet() #print(myDat) #print(tree.calcShannonEnt(myDat)) #myDat[0][-1] = 'maybe' #print(myDat) #print(tree.calcShannonEnt(myDat)) print("*****************************************************************") # 在前面的简单样本数据上测试函数splitDataSet() #print(tree.splitDataSet(myDat, 0, 1)) #print(tree.splitDataSet(myDat, 0, 0)) print("*****************************************************************") #myDat, labels = tree.createDataSet() #print(myDat) #print(tree.chooseBestFeatureToSplit(myDat)) print("*****************************************************************") # 变量myTree包含了很多代表树结构信息的嵌套字典。 # 从左边开始第一个关键字no surfacing是第一个划分数据集的特征名称,该关键字的值也是另一个数据字典。 # 第二个关键字是no surfacing特征划分的数据集,这些关键字的值是no surfacing节点的子节点 # 这些值可能是类标签,也可能是另一个数据字典。如果值是类标签,则该子节点是叶子节点; # 如果值是另一个数据字典,则子节点是一个判断节点,这种格式结构不断重复就构成了整棵树,本节的例子中,这棵树包含了3个叶子节点以及2个判断节点 myDat, labels = tree.createDataSet() myTree = tree.createTree(myDat, labels) print(myTree)
#-*- coding:utf-8 -*- import tree mydat,label = tree.createDataSet() #mydat #tree.calcShannonEnt(mydat)#得到数据集的熵值 #reload(tree) #tree.splitDataSet(mydat,0,1)#得到第0个特征值为1的元素list #tree.splitDataSet(mydat,0,0) #tree.chooseBestFeatureToSplit(mydat)#得到最佳特征值索引 mytree = tree.createTree(mydat,label)#得到决策树信息 mytree
# -*- coding: utf-8 -*- """ Created on Wed May 23 09:56:25 2018 @author: 705family """ import os os.chdir("C:/Users/705family/Desktop/decisiontree") import tree data, y = tree.createDataSet() myTree = tree.createTree(data, y) # In[] test Part 2 from sklearn.preprocessing import LabelEncoder import pandas as pd import numpy as np data = pd.read_excel('西瓜数据集.xlsx') data = data.iloc[:, 1:-1] clf = LabelEncoder() label = [[] for i in range(np.size(data, 1))] for i in range(np.size(data, 1)): data.iloc[:, i] = clf.fit_transform(data.iloc[:, i]) label[i][:] = list(clf.classes_) y = data.iloc[:, -1] data = np.array(data) data = data.tolist()
def testEntr(): myDat, labels = tree.createDataSet() print tree.calcShannonEnt(myDat)