def createGlassesTree(filename, debug=False): fr = open(filename) lences = [inst.strip().split('\t') for inst in fr.readlines()] lencesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] if (debug): print "lences = %s, lencesLabels = %s" % (lences, lencesLabels) # 等价于 for inst in fr.readline() lences = [inst.strip().split('\t')] lencesTree = CA.createTree(lences, lencesLabels, None) return lencesTree
print "featVec = %s, reducedFeatVec = %s, featVec[axis + 1 :] = %s" % ( featVec, reducedFeatVec, featVec_t) print "============ >>> start 数据划分方式 ===================" print CA.splitDataSet(dataSet, 0, 1, printCallback) print CA.splitDataSet(dataSet, 0, 0, printCallback) print "============ >>> start 最好的数据划分方式 =================" # this line just for test invalid dataSet = [[1, 'yes'], [1, 'yes'], [0, 'no'], [0, 'no']]; features = CA.chooseBestFeatureToSplit(dataSet, printCallback) print "features = ", features # result 1 means 按照index =1的特征划分。 也就是第2列特征分组 print "raw dataSet = ", dataSet print "============ >>> start 创建树 =================" myTree = CA.createTree(dataSet, labels, printCallback) print "MyTree = ", myTree ''''' {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} {'flippers': {0: 'no', 1: {'no surfacing': {0: 'no', 1: 'yes'}}}} ''' ''''' 绘制注解 ''' # CA.createPlot() # old func ok . just make not blocking next # 返回一个 决策tree。 用于测试 def retrieveTree(i): listOfTrees = [{