예제 #1
0
    def buildStumpNode(self, now_root):
        now_counts = self.getCount(now_root.data_index)
        if now_counts == None or len(now_counts) == 0:
            return
        if len(now_root.data_index) <= self.max_leaf_data or len(now_counts) == 1:
            now_root.is_leaf = True
            self.setClassfy(now_root, now_counts)
            return

        best_value, best_sub_data_indexs = self.getSubIndex(now_root.data_index, self.tree_divide_feature_index)
        for i in range(len(best_sub_data_indexs)):
            new_son = Node(best_sub_data_indexs[i])
            new_son.setDivideFeatureAndValue(self.tree_divide_feature_index, best_value[0] + best_value[1] * (i + 1))
            self.buildStumpNode(new_son)
            now_root.son.append(new_son)
예제 #2
0
    def buildRandomNode(self, now_root):
        now_counts = self.getCount(now_root.data_index)
        if now_counts == None or len(now_counts) == 0:
            return
        if len(now_root.data_index) <= self.max_leaf_data or len(
                now_counts) == 1:
            now_root.is_leaf = True
            self.setClassfy(now_root, now_counts)
            return
        best_sub_data_indexs = []
        best_gain = -100000000
        best_divide_feature_index = -1
        best_value = ()

        # 随机选择用于划分的特征值的子集
        divide_list = list(range(self.feature_len))
        random.shuffle(divide_list)
        divide_list = divide_list[0:self.divide_feature_len]

        for i in divide_list:
            now_value, now_sub_data_indexs = self.getSubIndex(
                now_root.data_index, i)
            now_gain = self.getGain(now_root.data_index, now_sub_data_indexs)
            if best_gain < now_gain:
                best_divide_feature_index = i
                best_gain = now_gain
                best_sub_data_indexs = now_sub_data_indexs
                best_value = now_value

        for i in range(len(best_sub_data_indexs)):
            new_son = Node(best_sub_data_indexs[i])
            new_son.setDivideFeatureAndValue(
                best_divide_feature_index,
                best_value[0] + best_value[1] * (i + 1))
            self.buildRandomNode(new_son)
            now_root.son.append(new_son)