Ejemplo n.º 1
0
 def train(self, num_trees=200, num_features=2, th=0.0):
     """
     逐个训练, 采用cart算法
     :param num_trees: 树的数量
     :param num_features: 每个划分随机抽取的候选特征数量
     :param th: 传递给dts.train
     :return:
     """
     for i in range(num_trees):
         # bootstrap有放回抽样
         sample_indices = np.random.choice(range(self.m), size=self.m, replace=True)
         sampled = [self.rows[i] for i in sample_indices]
         # 根据ml_decision_trees.train的设定, 当m=num_features时, 即使sample=True, 也会执行bagging
         tree = dts.train_cart(sampled, th=th,
                               target=self.target, m=num_features, sample=True)
         self.trees.append(tree)
     print("training complete on {0} trees".format(num_trees))
Ejemplo n.º 2
0
    def train(self, d=1, k=20):
        """
        回归问题的提升树算法
        :param d: 每棵树的深度
        :param k: 树的数量
        :return:
        """

        def update_y(rows, y_new):
            """
            替换数据集rows的y值, 返回一个新的数据集
            :param rows: 数据集
            :param y_new: 新的y值, list-like
            :return:
            """
            updated = []
            col_id = len(rows[0]) - 1  # 列数
            n_rows = len(rows)  # 行数
            for l in range(n_rows):
                r_new = rows[l][:col_id]
                r_new.append(y_new[l])
                updated.append(r_new)
            return updated

        for i in range(k):
            # 用残差更新训练数据的y值
            self.rows_updated = update_y(self.rows_updated, self.resid)
            # 训练新的weak learner, 入栈
            t = dts.train_cart(self.rows_updated, target="regression", d=d)
            self.trees.append(t)
            # 更新残差
            pred = dts.predict(t, self.rows_updated)
            # print(np.array(pred))
            # print(self.resid)
            self.resid -= np.array(pred)
            # 输出当前步骤均方误差
            mse = np.dot(self.resid, self.resid) / len(self.resid)
            print("training step {0} finished, current mse: {1}".format(str(i), str(mse)))