def createTree(atom_csv, undefined_percentage): #输入为处理过的可达集、优先级字典 title, dataSet = csvOperation.readCsv(atom_csv) #add 0822 for only one class 可能返回字符串,而不是字典 classList = [example[-1] for example in dataSet] if len(set(classList)) == 1: return '(' + title[-1] + ' = ' + classList[0] + ')' else: myTree = id3.createTree( dataSet, title[:-1], undefined_percentage ) #0726,修改函数id3.createTree,加入优先级判定依据,即undefined_percentage return myTree
import id3 import entropy def createDataSet(): dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] labels = ['no surfacing','flippers'] return dataSet, labels myDat,labels=createDataSet() #print entropy.entropy(myDat) #print id3.chooseBestFeatureToSplit(myDat) tree = id3.createTree(myDat,labels) print tree
import id3 import entropy def createDataSet(): dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] labels = ['no surfacing', 'flippers'] return dataSet, labels myDat, labels = createDataSet() #print entropy.entropy(myDat) #print id3.chooseBestFeatureToSplit(myDat) tree = id3.createTree(myDat, labels) print tree
def train(self): if not (self.dataSet and self.labels): return dict() self.tree = id3.createTree(self.dataSet,self.labels)
import id3 import id3plot import pickle if __name__ == "__main__" : # myDat,labels = id3.createDataSet() # print myDat # shan = id3.calcShannonEnt(myDat) # print shan # myTree = id3.createTree(myDat,labels) # print myTree # id3plot.createPlot(myTree) data, labels = id3.createDataSetFromTXT("dataset.txt") print "data is ", data print "label is", labels #计算给定数据集的香农熵 shan = id3.calcShannonEnt(data) print shan #选择最好的数据集划分方式 col = id3.chooseBestFeatureToSplit(data) print col Tree = id3.createTree(data, labels) print Tree id3plot.createPlot(Tree)
import id3 import id3plot import pickle if __name__ == "__main__": # myDat,labels = id3.createDataSet() # print myDat # shan = id3.calcShannonEnt(myDat) # print shan # myTree = id3.createTree(myDat,labels) # print myTree # id3plot.createPlot(myTree) data, labels = id3.createDataSetFromTXT("dataset.txt") print "data is ", data print "label is", labels #计算给定数据集的香农熵 shan = id3.calcShannonEnt(data) print shan #选择最好的数据集划分方式 col = id3.chooseBestFeatureToSplit(data) print col Tree = id3.createTree(data, labels) print Tree id3plot.createPlot(Tree)
convert_list = getConvertList(origin_title, left_list) #print(convert_list) right_list = getRight(atom_list[:]) #print(right_list) atom_dataset = dataSetToAtomDataSet(origin_dataSet, convert_list, right_list, origin_title) #print(atom_dataset) newtitle, newdataset = creatAtomCsv(atom_list[:], atom_dataset[:], origin_dataSet) creatCsv(newtitle, newdataset) #decision tree part title, dataSet = readCsv('atom.csv') print(title[:-1]) myTree = id3.createTree(dataSet, title[:-1]) treePlotter.createPlot(myTree) ''' #recive some message like n[1] = T & n[2] = C title,dataSet = readCsv('atom.csv') message = 'n[1] = T' message_vec = toVec(title[:-1],message) print(message_vec) ''' ''' #test 18 5 11 title,dataSet = readCsv('atom.csv') myTree = id3.createTree(dataSet,title[:-1]) message = 'n[1] = T & n[2] = T & x = true' message_vec = toVec(title[:-1],message) result = id3.classify2(myTree,title[:-1],message_vec)