print "dataSet = %s, column count = %s" % (dataSet, len(dataSet[0])) print CA.calcShannonEnt(dataSet) def printCallback(featVec, reducedFeatVec, featVec_t): print "featVec = %s, reducedFeatVec = %s, featVec[axis + 1 :] = %s" % ( featVec, reducedFeatVec, featVec_t) print "============ >>> start 数据划分方式 ===================" print CA.splitDataSet(dataSet, 0, 1, printCallback) print CA.splitDataSet(dataSet, 0, 0, printCallback) print "============ >>> start 最好的数据划分方式 =================" # this line just for test invalid dataSet = [[1, 'yes'], [1, 'yes'], [0, 'no'], [0, 'no']]; features = CA.chooseBestFeatureToSplit(dataSet, printCallback) print "features = ", features # result 1 means 按照index =1的特征划分。 也就是第2列特征分组 print "raw dataSet = ", dataSet print "============ >>> start 创建树 =================" myTree = CA.createTree(dataSet, labels, printCallback) print "MyTree = ", myTree ''''' {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} {'flippers': {0: 'no', 1: {'no surfacing': {0: 'no', 1: 'yes'}}}} ''' ''''' 绘制注解 ''' # CA.createPlot() # old func ok . just make not blocking next