예제 #1
0
 def __init__(self,
              n_estimator=10,
              learning_rate=0.01,
              min_sample=2,
              min_gain=0.1,
              max_depth=10):
     super(GBDTRegressionScratch, self).__init__(n_estimator, learning_rate)
     # 回归树损失函数维平方损失
     self._loss = SquareLoss()
     for _ in range(self._n_estimator):
         tree = CARTRegressionScratch(min_sample, min_gain, max_depth)
         self._trees.append(tree)
예제 #2
0
 def fit(self, X, y):
     """模型训练"""
     # 先对输入标签做one hot编码
     y = self._to_one_hot(y)
     n_sample, self._n_class = y.shape
     # 初始残差为每个类别的平均值
     residual_pred = np.full_like(y, np.mean(y, axis=0))
     for _ in range(self._n_estimator):
         label_trees = []
         residual_update = np.zeros_like(residual_pred)
         # 每个类别分别学习树
         for j in range(self._n_class):
             residual_gradient = self._loss.calc_gradient(
                 y[:, j], residual_pred[:, j])
             tree = CARTRegressionScratch(self._min_sample, self._min_gain,
                                          self._max_depth)
             # 每棵树以残差为目标进行训练
             tree.fit(X, residual_gradient)
             label_trees.append(tree)
             for i in range(n_sample):
                 residual_update[i, j] = tree.predict(X[i])
         self._trees.append(label_trees)
         residual_pred -= self._lr * residual_update