def __init__(self, max_depth=5, min_samples_split=2, min_samples_leaf=1, subsample=1.0, random_state=1234, n_jobs=-1, **kwarg): self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf def find_split(avc): if avc.shape[0] == 0: return None valid_splits = np.logical_and(avc[:, 3] > self.min_samples_leaf, avc[:, 6] > self.min_samples_leaf) avc = avc[valid_splits, :] if avc.shape[0] == 0: return None n_l = avc[:, 3] n_r = avc[:, 6] y_hat_l = avc[:, 4] / n_l y_hat_r = avc[:, 7] / n_r diff = y_hat_l - y_hat_r diff2 = diff * diff friedman_score = n_l * n_r / (n_l + n_r) * diff2 best_idx = np.argsort(friedman_score)[-1] ss = {"selected": avc[best_idx, :]} return ss def is_leaf(branch, branch_parent): if (branch["depth"] >= self.max_depth or branch["n_samples"] < self.min_samples_split): return True else: return False Bonsai.__init__(self, find_split, is_leaf, subsample=subsample, random_state=random_state, n_jobs=n_jobs, z_type="M2")
def __init__(self, max_depth=5, min_samples_split=2, min_samples_leaf=1, min_varsum_decrease=0.0, subsample=1.0, random_state=1234, **kwarg): self.regression = True self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_varsum_decrease = min_varsum_decrease def find_split(avc): if avc.shape[0] == 0: return None valid_splits = np.logical_and(avc[:, 3] > self.min_samples_leaf, avc[:, 6] > self.min_samples_leaf) avc = avc[valid_splits, :] if avc.shape[0] == 0: return None n_l = avc[:, 3] n_r = avc[:, 6] mu_l = avc[:, 4] / n_l mu_r = avc[:, 7] / n_r M2_l = avc[:, 5] M2_r = avc[:, 8] var_l = M2_l / n_l - mu_l * mu_l var_r = M2_r / n_r - mu_r * mu_r varsum = var_l * n_l + var_r * n_r best_idx = np.argsort(varsum)[0] best_varsum = varsum[best_idx] ss = { "selected": avc[best_idx, :], # required for Bonsai "varsum@l": var_l[best_idx], # required for RegTree "varsum@r": var_r[best_idx], } # required for RegTree return ss def is_leaf(branch, branch_parent): varsum_dec = 1.0 + self.min_varsum_decrease if "varsum" in branch_parent: varsum_dec = branch_parent["varsum"] - branch["varsum"] if (branch["depth"] >= self.max_depth or branch["n_samples"] < self.min_samples_split or varsum_dec < self.min_varsum_decrease): return True else: return False Bonsai.__init__( self, find_split, is_leaf, subsample=subsample, random_state=random_state, z_type="M2", )
def __init__(self, alpha=1.0, max_depth=5, min_samples_split=2, min_samples_leaf=1, **kwargs): self.alpha = alpha self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf def find_split(avc): if avc.shape[0] == 0: return None valid_splits = np.logical_and( avc[:, 3] > max(self.min_samples_leaf, 1), avc[:, 6] > max(self.min_samples_leaf, 1), ) avc = avc[valid_splits, :] if avc.shape[0] == 0: return None n_l = avc[:, 3] n_r = avc[:, 6] p_y_l = (avc[:, 4] + 1.0) / (n_l + 2.0) p_y_r = (avc[:, 7] + 1.0) / (n_r + 2.0) p_x_l = n_l / (n_l + n_r) p_x_r = n_r / (n_l + n_r) p_y_l[p_y_l < PRECISION] = PRECISION p_y_r[p_y_r < PRECISION] = PRECISION p_y_l[p_y_l > 1.0 - PRECISION] = 1.0 - PRECISION p_y_r[p_y_r > 1.0 - PRECISION] = 1.0 - PRECISION gain = np.zeros(avc.shape[0]) if self.alpha == 1.0: # Information Gain gain += p_x_l * p_y_l * np.log(p_y_l) gain += p_x_l * (1.0 - p_y_l) * np.log(1.0 - p_y_l) gain += p_x_r * p_y_r * np.log(p_y_r) gain += p_x_r * (1.0 - p_y_r) * np.log(1.0 - p_y_r) elif self.alpha == 0.0: gain -= p_x_l * np.log(p_y_l) gain -= p_x_l * np.log(1.0 - p_y_l) gain -= p_x_r * np.log(p_y_r) gain -= p_x_r * np.log(1.0 - p_y_r) else: gain -= p_x_l * np.power(p_y_l, self.alpha) gain -= p_x_l * np.power(1.0 - p_y_l, self.alpha) gain -= p_x_r * np.power(p_y_r, self.alpha) gain -= p_x_r * np.power(1.0 - p_y_r, self.alpha) gain = gain / self.alpha / (1.0 - self.alpha) best_idx = np.argsort(gain)[-1] return {"selected": avc[best_idx, :]} def is_leaf(branch, branch_parent): if (branch["depth"] >= self.max_depth or branch["n_samples"] < self.min_samples_split): return True else: return False Bonsai.__init__(self, find_split, is_leaf, z_type="M2", **kwargs)
def __init__( self, max_depth=5, min_samples_split=2, min_samples_leaf=1, subsample=1.0, reg_lambda=0.1, # regularization random_state=1234, distribution="gaussian", **kwarg): self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.reg_lambda = reg_lambda self.distribution = distribution def find_split(avc): if avc.shape[0] == 0: return None valid_splits = np.logical_and(avc[:, 3] > self.min_samples_leaf, avc[:, 6] > self.min_samples_leaf) avc = avc[valid_splits, :] if avc.shape[0] == 0: return None if self.distribution == "bernoulli": h_l = avc[:, 5] h_r = avc[:, 8] else: h_l = avc[:, 3] h_r = avc[:, 6] g_l = avc[:, 4] g_r = avc[:, 7] obj = g_l * g_l / (h_l + self.reg_lambda) obj = obj + g_r * g_r / (h_r + self.reg_lambda) y_l = g_l / (h_l + self.reg_lambda) y_r = g_r / (h_r + self.reg_lambda) best_idx = np.argsort(obj)[-1] ss = { "selected": avc[best_idx, :], "y@l": y_l[best_idx], "y@r": y_r[best_idx], } return ss def is_leaf(branch, branch_parent): if (branch["depth"] >= self.max_depth or branch["n_samples"] < self.min_samples_split): return True else: return False Bonsai.__init__( self, find_split, is_leaf, subsample=subsample, random_state=random_state, z_type="Hessian", )