예제 #1
0
print "dataSet = %s, column count = %s" % (dataSet, len(dataSet[0]))
print CA.calcShannonEnt(dataSet)


def printCallback(featVec, reducedFeatVec, featVec_t):
    print "featVec = %s, reducedFeatVec = %s, featVec[axis + 1 :] = %s" % (
        featVec, reducedFeatVec, featVec_t)


print "============ >>> start 数据划分方式 ==================="
print CA.splitDataSet(dataSet, 0, 1, printCallback)
print CA.splitDataSet(dataSet, 0, 0, printCallback)

print "============ >>> start 最好的数据划分方式 ================="
# this line just for test invalid  dataSet = [[1, 'yes'], [1, 'yes'], [0, 'no'], [0, 'no']];
features = CA.chooseBestFeatureToSplit(dataSet, printCallback)
print "features = ", features  # result 1 means 按照index =1的特征划分。 也就是第2列特征分组
print "raw dataSet = ", dataSet

print "============ >>> start 创建树  ================="
myTree = CA.createTree(dataSet, labels, printCallback)
print "MyTree = ", myTree
'''''
{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
{'flippers': {0: 'no', 1: {'no surfacing': {0: 'no', 1: 'yes'}}}}
'''
'''''
    绘制注解
'''

# CA.createPlot() # old func ok . just make not blocking next