예제 #1
0
def crossValidation(trainSet, testSet, labels, m):
    """
    :param trainSet:训练集
    :param testSet:测试集
    :param labels:特征类别属性
    :param m:先验概率对后验概率的影响因子
    :return:该模型的性能指标
    """
    v = len(trainSet)
    TreeSet = []
    for i in range(v):  # 分别进行K次生成决策树、决策树剪枝的过程
        temData = tuple(trainSet[i])
        Tree = createC4_5Tree(list(temData), list(labels))
        purnTree = PostPruning_IMEP(Tree, labels, trainSet[i], m)
        # print('purnTree=', purnTree)
        TreeSet.append(purnTree)
    errorSum = 0.0
    for i in range(0, v):  # 将剪枝后的决策树进行错误估计
        error = leafError(TreeSet[i], labels, testSet[i])
        errorSum += error
    return errorSum / v
예제 #2
0
def showDT(dataSet, labels):
    """
    :param dataSet:数据集
    :param labels:属性标签
    """

    # ID3算法生成分类决策树
    ID3Tree = createID3Tree(list(dataSet), list(labels))
    print('The ID3 Decision Tree is', ID3Tree)

    # C4.5算法生成分类决策树
    C4_5Tree = createC4_5Tree(list(dataSet), list(labels))
    print('The C4.5 Decision Tree is', C4_5Tree)

    # CART算法生成分类决策树
    CARTTree = createCARTTree(list(dataSet), list(labels))
    print('The CART Decision Tree is', CARTTree)

    # 显示各个决策树
    createPlot(ID3Tree, 'ID3 Decision Tree')
    createPlot(C4_5Tree, 'C4.5 Decision Tree')
    createPlot(CARTTree, 'CART Decision Tree')
    plt.show()  # 显示决策树
예제 #3
0
# 计算剪枝前后准确度
ID3Accuracy = testAccuracy(ID3TrainTree, labels, trainSet)
print('The Accuracy of ID3 Decision Tree is', ID3Accuracy)
PreID3Accuracy = testAccuracy(PreID3Tree, labels, trainSet)
print('The Accuracy of Pre_Pruning_ID3 Decision Tree is', PreID3Accuracy)
REPID3Accuracy = testAccuracy(REPID3Tree, labels, trainSet)
print('The Accuracy of REP_Pruning_ID3 Decision Tree is', REPID3Accuracy)
PEPID3Accuracy = testAccuracy(PEPID3Tree, labels, trainSet)
print('The Accuracy of PEP_Pruning_ID3 Decision Tree is', PEPID3Accuracy)


"""对C4.5算法生成的决策树,进行剪枝"""
''''''
print('===========================================')
# 先用C4.5算法,生成决策树
C4_5Tree = createC4_5Tree(list(trainSet), list(labels))
print('The C4.5 Decision Tree:', 'Depth:', getTreeDepth(C4_5Tree), ';Leaf:', getNumLeaf(C4_5Tree))
print('The Node with largest number is', findKeyNode(C4_5Tree))
copyC4_5Tree = copy.deepcopy(C4_5Tree)  # 将生成树拷贝
copyC4_5Tree1 = copy.deepcopy(C4_5Tree)  # 将生成树拷贝

# 使用PEP方法,对决策树进行后剪枝
PEPC4_5Tree = PostPruning_PEP(C4_5Tree, list(labels), list(trainSet))
print('The PEP_Pruning_C4.5 Decision Tree:', 'Depth:', getTreeDepth(PEPC4_5Tree), ';Leaf:', getNumLeaf(PEPC4_5Tree))
print('The Node with largest number is', findKeyNode(PEPC4_5Tree))

# 使用MEP方法,对决策树进行后剪枝
MEPC4_5Tree = PostPruning_MEP(copyC4_5Tree, list(labels), list(trainSet), k)
print('The MEP_Pruning_C4.5 Decision Tree:', 'Depth:', getTreeDepth(MEPC4_5Tree), ';Leaf:', getNumLeaf(MEPC4_5Tree))
print('The Node with largest number is', findKeyNode(MEPC4_5Tree))