예제 #1
0
def createTree(atom_csv, undefined_percentage):  #输入为处理过的可达集、优先级字典
    title, dataSet = csvOperation.readCsv(atom_csv)
    #add 0822 for only one class  可能返回字符串,而不是字典
    classList = [example[-1] for example in dataSet]
    if len(set(classList)) == 1:
        return '(' + title[-1] + ' = ' + classList[0] + ')'
    else:
        myTree = id3.createTree(
            dataSet, title[:-1], undefined_percentage
        )  #0726,修改函数id3.createTree,加入优先级判定依据,即undefined_percentage
        return myTree
예제 #2
0
import id3
import entropy

def createDataSet():
    dataSet = [[1, 1, 'yes'],
            [1, 1, 'yes'],
            [1, 0, 'no'],
            [0, 1, 'no'],
            [0, 1, 'no']]
    labels = ['no surfacing','flippers']
    return dataSet, labels

myDat,labels=createDataSet()
#print entropy.entropy(myDat)
#print id3.chooseBestFeatureToSplit(myDat)
tree = id3.createTree(myDat,labels)
print tree
예제 #3
0
import id3
import entropy


def createDataSet():
    dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'],
               [0, 1, 'no']]
    labels = ['no surfacing', 'flippers']
    return dataSet, labels


myDat, labels = createDataSet()
#print entropy.entropy(myDat)
#print id3.chooseBestFeatureToSplit(myDat)
tree = id3.createTree(myDat, labels)
print tree
예제 #4
0
 def train(self):
     if not (self.dataSet and self.labels):
         return dict()
     self.tree = id3.createTree(self.dataSet,self.labels)
예제 #5
0
파일: main.py 프로젝트: gatieme/AderXCoding
import id3

import id3plot
import pickle
if __name__ == "__main__" :
  #  myDat,labels = id3.createDataSet()
  #  print myDat

  #  shan = id3.calcShannonEnt(myDat)
  #  print shan

  #  myTree = id3.createTree(myDat,labels)
  #  print myTree

  #  id3plot.createPlot(myTree)

    data, labels = id3.createDataSetFromTXT("dataset.txt")
    print "data is ", data
    print "label is", labels

    #计算给定数据集的香农熵
    shan = id3.calcShannonEnt(data)
    print shan

    #选择最好的数据集划分方式
    col = id3.chooseBestFeatureToSplit(data)
    print col
    Tree = id3.createTree(data, labels)
    print Tree

    id3plot.createPlot(Tree)
예제 #6
0
import id3

import id3plot
import pickle
if __name__ == "__main__":
    #  myDat,labels = id3.createDataSet()
    #  print myDat

    #  shan = id3.calcShannonEnt(myDat)
    #  print shan

    #  myTree = id3.createTree(myDat,labels)
    #  print myTree

    #  id3plot.createPlot(myTree)

    data, labels = id3.createDataSetFromTXT("dataset.txt")
    print "data is ", data
    print "label is", labels

    #计算给定数据集的香农熵
    shan = id3.calcShannonEnt(data)
    print shan

    #选择最好的数据集划分方式
    col = id3.chooseBestFeatureToSplit(data)
    print col
    Tree = id3.createTree(data, labels)
    print Tree

    id3plot.createPlot(Tree)
예제 #7
0
    convert_list = getConvertList(origin_title, left_list)
    #print(convert_list)
    right_list = getRight(atom_list[:])
    #print(right_list)
    atom_dataset = dataSetToAtomDataSet(origin_dataSet, convert_list,
                                        right_list, origin_title)
    #print(atom_dataset)
    newtitle, newdataset = creatAtomCsv(atom_list[:], atom_dataset[:],
                                        origin_dataSet)
    creatCsv(newtitle, newdataset)

    #decision tree part

    title, dataSet = readCsv('atom.csv')
    print(title[:-1])
    myTree = id3.createTree(dataSet, title[:-1])
    treePlotter.createPlot(myTree)
    '''
	#recive some message like n[1] = T & n[2] = C
	title,dataSet = readCsv('atom.csv')
	message = 'n[1] = T'
	message_vec = toVec(title[:-1],message)
	print(message_vec)
	'''
    '''
	#test 18 5 11
	title,dataSet = readCsv('atom.csv')
	myTree = id3.createTree(dataSet,title[:-1])
	message = 'n[1] = T & n[2] = T & x = true'
	message_vec = toVec(title[:-1],message)
	result = id3.classify2(myTree,title[:-1],message_vec)