Beispiel #1
0
 def leaf_err_sum(self, cur_node, err_set):
     '''
     悲观剪枝,用于计算一个当前节点子树的错误率
     err_num: 当一个叶子节点数据集为空时,错误节点数目就是父节点的错误节点数
     '''
     if len(cur_node.childNode) == 0:    #叶子节点
         if len(cur_node.dataset) == 0:
             err_set.append(0)
         else:
             err_sum = get_err_sum(cur_node.cls, cur_node.dataset)
             err_set.append(err_sum)
     else:                               # 内部节点
         for _, c in cur_node.childNode.items():
             if len(c.childNode) == 0 and len(c.dataset) == 0:
                 self.leaf_err_sum(c, err_set)
             else:
                 self.leaf_err_sum(c, err_set)
Beispiel #2
0
 def leaf_err_sum(self, cur_node, err_set):
     '''
     悲观剪枝,用于计算一个当前节点子树的错误率
     err_num: 当一个叶子节点数据集为空时,错误节点数目就是父节点的错误节点数
     '''
     if len(cur_node.childNode) == 0:  #叶子节点
         if len(cur_node.dataset) == 0:
             err_set.append(0)
         else:
             err_sum = get_err_sum(cur_node.cls, cur_node.dataset)
             err_set.append(err_sum)
     else:  # 内部节点
         for _, c in cur_node.childNode.items():
             if len(c.childNode) == 0 and len(c.dataset) == 0:
                 self.leaf_err_sum(c, err_set)
             else:
                 self.leaf_err_sum(c, err_set)
Beispiel #3
0
    def __prun_tree(self, cur_node):
        '''剪枝'''
        if len(cur_node.childNode) == 0:        #叶子节点直接跳过
            return
        else:
            cur_node.cls = get_cls_from_data(cur_node.dataset)
            cur_err_sum = get_err_sum(cur_node.cls, cur_node.dataset) + 0.5
            leaf_err_set = []
            self.leaf_err_sum(cur_node, leaf_err_set)
            leaf_e_sum  = sum(leaf_err_set) + 0.5 * len(leaf_err_set)
            leaf_err_ratio =  leaf_e_sum / len(cur_node.dataset)
            std_dev = np.sqrt(leaf_err_ratio * (1 - leaf_err_ratio))

            if leaf_e_sum + std_dev > cur_err_sum:
                print leaf_e_sum + std_dev, cur_err_sum, "  prun!!!!"
                cur_node.childNode = {}
                cur_node.cls = get_cls_from_data(cur_node.dataset)
            else:
                for _, c in cur_node.childNode.items():
                    self.__prun_tree(c)
Beispiel #4
0
    def __prun_tree(self, cur_node):
        '''剪枝'''
        if len(cur_node.childNode) == 0:  #叶子节点直接跳过
            return
        else:
            cur_node.cls = get_cls_from_data(cur_node.dataset)
            cur_err_sum = get_err_sum(cur_node.cls, cur_node.dataset) + 0.5
            leaf_err_set = []
            self.leaf_err_sum(cur_node, leaf_err_set)
            leaf_e_sum = sum(leaf_err_set) + 0.5 * len(leaf_err_set)
            leaf_err_ratio = leaf_e_sum / len(cur_node.dataset)
            std_dev = np.sqrt(leaf_err_ratio * (1 - leaf_err_ratio))

            if leaf_e_sum + std_dev > cur_err_sum:
                print leaf_e_sum + std_dev, cur_err_sum, "  prun!!!!"
                cur_node.childNode = {}
                cur_node.cls = get_cls_from_data(cur_node.dataset)
            else:
                for _, c in cur_node.childNode.items():
                    self.__prun_tree(c)