def __init__(self, n_estimator=10, learning_rate=0.01, min_sample=2, min_gain=0.1, max_depth=10): super(GBDTRegressionScratch, self).__init__(n_estimator, learning_rate) # 回归树损失函数维平方损失 self._loss = SquareLoss() for _ in range(self._n_estimator): tree = CARTRegressionScratch(min_sample, min_gain, max_depth) self._trees.append(tree)
def fit(self, X, y): """模型训练""" # 先对输入标签做one hot编码 y = self._to_one_hot(y) n_sample, self._n_class = y.shape # 初始残差为每个类别的平均值 residual_pred = np.full_like(y, np.mean(y, axis=0)) for _ in range(self._n_estimator): label_trees = [] residual_update = np.zeros_like(residual_pred) # 每个类别分别学习树 for j in range(self._n_class): residual_gradient = self._loss.calc_gradient( y[:, j], residual_pred[:, j]) tree = CARTRegressionScratch(self._min_sample, self._min_gain, self._max_depth) # 每棵树以残差为目标进行训练 tree.fit(X, residual_gradient) label_trees.append(tree) for i in range(n_sample): residual_update[i, j] = tree.predict(X[i]) self._trees.append(label_trees) residual_pred -= self._lr * residual_update