def crossValidation(trainSet, testSet, labels, m): """ :param trainSet:训练集 :param testSet:测试集 :param labels:特征类别属性 :param m:先验概率对后验概率的影响因子 :return:该模型的性能指标 """ v = len(trainSet) TreeSet = [] for i in range(v): # 分别进行K次生成决策树、决策树剪枝的过程 temData = tuple(trainSet[i]) Tree = createC4_5Tree(list(temData), list(labels)) purnTree = PostPruning_IMEP(Tree, labels, trainSet[i], m) # print('purnTree=', purnTree) TreeSet.append(purnTree) errorSum = 0.0 for i in range(0, v): # 将剪枝后的决策树进行错误估计 error = leafError(TreeSet[i], labels, testSet[i]) errorSum += error return errorSum / v
def showDT(dataSet, labels): """ :param dataSet:数据集 :param labels:属性标签 """ # ID3算法生成分类决策树 ID3Tree = createID3Tree(list(dataSet), list(labels)) print('The ID3 Decision Tree is', ID3Tree) # C4.5算法生成分类决策树 C4_5Tree = createC4_5Tree(list(dataSet), list(labels)) print('The C4.5 Decision Tree is', C4_5Tree) # CART算法生成分类决策树 CARTTree = createCARTTree(list(dataSet), list(labels)) print('The CART Decision Tree is', CARTTree) # 显示各个决策树 createPlot(ID3Tree, 'ID3 Decision Tree') createPlot(C4_5Tree, 'C4.5 Decision Tree') createPlot(CARTTree, 'CART Decision Tree') plt.show() # 显示决策树
# 计算剪枝前后准确度 ID3Accuracy = testAccuracy(ID3TrainTree, labels, trainSet) print('The Accuracy of ID3 Decision Tree is', ID3Accuracy) PreID3Accuracy = testAccuracy(PreID3Tree, labels, trainSet) print('The Accuracy of Pre_Pruning_ID3 Decision Tree is', PreID3Accuracy) REPID3Accuracy = testAccuracy(REPID3Tree, labels, trainSet) print('The Accuracy of REP_Pruning_ID3 Decision Tree is', REPID3Accuracy) PEPID3Accuracy = testAccuracy(PEPID3Tree, labels, trainSet) print('The Accuracy of PEP_Pruning_ID3 Decision Tree is', PEPID3Accuracy) """对C4.5算法生成的决策树,进行剪枝""" '''''' print('===========================================') # 先用C4.5算法,生成决策树 C4_5Tree = createC4_5Tree(list(trainSet), list(labels)) print('The C4.5 Decision Tree:', 'Depth:', getTreeDepth(C4_5Tree), ';Leaf:', getNumLeaf(C4_5Tree)) print('The Node with largest number is', findKeyNode(C4_5Tree)) copyC4_5Tree = copy.deepcopy(C4_5Tree) # 将生成树拷贝 copyC4_5Tree1 = copy.deepcopy(C4_5Tree) # 将生成树拷贝 # 使用PEP方法,对决策树进行后剪枝 PEPC4_5Tree = PostPruning_PEP(C4_5Tree, list(labels), list(trainSet)) print('The PEP_Pruning_C4.5 Decision Tree:', 'Depth:', getTreeDepth(PEPC4_5Tree), ';Leaf:', getNumLeaf(PEPC4_5Tree)) print('The Node with largest number is', findKeyNode(PEPC4_5Tree)) # 使用MEP方法,对决策树进行后剪枝 MEPC4_5Tree = PostPruning_MEP(copyC4_5Tree, list(labels), list(trainSet), k) print('The MEP_Pruning_C4.5 Decision Tree:', 'Depth:', getTreeDepth(MEPC4_5Tree), ';Leaf:', getNumLeaf(MEPC4_5Tree)) print('The Node with largest number is', findKeyNode(MEPC4_5Tree))