def buildStumpNode(self, now_root): now_counts = self.getCount(now_root.data_index) if now_counts == None or len(now_counts) == 0: return if len(now_root.data_index) <= self.max_leaf_data or len(now_counts) == 1: now_root.is_leaf = True self.setClassfy(now_root, now_counts) return best_value, best_sub_data_indexs = self.getSubIndex(now_root.data_index, self.tree_divide_feature_index) for i in range(len(best_sub_data_indexs)): new_son = Node(best_sub_data_indexs[i]) new_son.setDivideFeatureAndValue(self.tree_divide_feature_index, best_value[0] + best_value[1] * (i + 1)) self.buildStumpNode(new_son) now_root.son.append(new_son)
def buildRandomNode(self, now_root): now_counts = self.getCount(now_root.data_index) if now_counts == None or len(now_counts) == 0: return if len(now_root.data_index) <= self.max_leaf_data or len( now_counts) == 1: now_root.is_leaf = True self.setClassfy(now_root, now_counts) return best_sub_data_indexs = [] best_gain = -100000000 best_divide_feature_index = -1 best_value = () # 随机选择用于划分的特征值的子集 divide_list = list(range(self.feature_len)) random.shuffle(divide_list) divide_list = divide_list[0:self.divide_feature_len] for i in divide_list: now_value, now_sub_data_indexs = self.getSubIndex( now_root.data_index, i) now_gain = self.getGain(now_root.data_index, now_sub_data_indexs) if best_gain < now_gain: best_divide_feature_index = i best_gain = now_gain best_sub_data_indexs = now_sub_data_indexs best_value = now_value for i in range(len(best_sub_data_indexs)): new_son = Node(best_sub_data_indexs[i]) new_son.setDivideFeatureAndValue( best_divide_feature_index, best_value[0] + best_value[1] * (i + 1)) self.buildRandomNode(new_son) now_root.son.append(new_son)