def fit(self, X, y): ''' Inputs: X: Train feature data, type: numpy array, shape: (N, num_feature) Y: Train label data, type: numpy array, shape: (N,) You should update the self.estimators in this function ''' N = X.shape[0] self.f_0 = np.average(y) # print("f0 = ",self.f_0) n_estimators = self.n_estimators lr = self.learning_rate for m in range(n_estimators): if m == 0: y_predict = np.repeat(self.f_0, len(y)) else: y_predict = y_predict_new residual = y - y_predict regressor_m = MyDecisionTreeRegressor(min_samples_split=self.min_samples_split,max_depth=self.max_depth) regressor_m.fit(X, residual) y_predict_new = y_predict + lr * regressor_m.predict(X) self.estimators[m] = regressor_m return self.estimators
def compute_cmjs(self, residual_dict): # For 𝑗 = 1,2, … , 𝐽𝑚, compute 𝑐𝑚𝑗 cmj_dict = copy.deepcopy(residual_dict) self.compute_cmj_dict(cmj_dict) cmj_tree = MyDecisionTreeRegressor(self.max_depth, self.min_samples_split) cmj_tree.root = cmj_dict return cmj_tree
def fit(self, X, y): average = np.mean(y) self.average = average y_predict = np.empty((X.shape[0]), dtype=np.float64) y_predict.fill(average) for k in range(self.n_estimators): residual = self.negative_gradient(y, y_predict) regression_tree = MyDecisionTreeRegressor(self.max_depth, self.min_samples_split) regression_tree.fit(X, residual) y_predict += self.learning_rate * regression_tree.predict( X).ravel() self.estimators[k] = regression_tree '''
def _fit_stage(self, i, X, y, y_pred): N = X.shape[0] residual = y - y_pred # Induce regression tree on residuals tree = MyDecisionTreeRegressor( max_depth=self.max_depth, min_samples_split=self.min_samples_split) tree.fit(X, residual) # Compute multiplier gamma = self.learning_rate * tree.predict(X) # Add tree to ensemble self.estimators[i] = tree return y_pred + gamma
def fit(self, X, y): ''' Inputs: X: Train feature data, type: numpy array, shape: (N, num_feature) Y: Train label data, type: numpy array, shape: (N,) You should update the self.estimators in this function ''' f = np.mean(y) self.f0 = f for i in range(self.n_estimators): residual = y - f estimator = MyDecisionTreeRegressor( max_depth=self.max_depth, min_samples_split=self.min_samples_split) estimator.fit(X, residual) f = f + self.learning_rate * np.array(estimator.predict(X)) self.estimators[i] = estimator
def fit(self, X, y): """ Inputs: X: Train feature data, type: numpy array, shape: (N, num_feature) Y: Train label data, type: numpy array, shape: (N,) You should update the self.estimators in this function """ F = np.mean(y) self.mean = F for i in range(self.n_estimators): # start_time = time.time () dtree = MyDecisionTreeRegressor( max_depth=self.max_depth, min_samples_split=self.min_samples_split) dtree.fit(X, y - F) F_pred = dtree.predict(X) F = F + (self.learning_rate * F_pred) self.estimators[i] = dtree
def fit(self, X, y): ''' Inputs: X: Train feature data, type: numpy array, shape: (N, num_feature) Y: Train label data, type: numpy array, shape: (N,) You should update the self.estimators in this function ''' # np.set_printoptions(precision=17) constant = self.get_mean(y) self.min_y = constant dy = y - constant # print(dy) for i in range(self.n_estimators): learner_k = MyDecisionTreeRegressor(self.max_depth, self.min_samples_split) learner_k.fit(X, dy) self.estimators[i] = learner_k dy = dy - self.learning_rate * learner_k.predict(X) return self.estimators
def fit_regression_tree(self, X, y): # Fit a regression tree to the targets γim resulting in terminal regions 𝑅𝑚𝑗 , 𝑗 =1,2, … , 𝐽𝑚 tree = MyDecisionTreeRegressor(self.max_depth, self.min_samples_split) tree.fit(X, y) return tree