def train(X, y, max_depth, n_estimators, learning_rate, subsample=1.0, K=2, siGn=0): Trees = [] m = shape(X)[0] F0 = 0.5 * log((1 + mean(y)) / (1 - mean(y))) Fx = ones(m) * F0 threNum = threN(m, max_depth) ##叶节点包含的最小数据个数 while n_estimators > 0: residual = quasi_residual(y, Fx) ##更新残差 if subsample < 1.0: ##执行亚采样 index = random.choice(range(m), size=int(subsample * m), replace=False) else: index = array(range(m)) if siGn == 1: ##影响力剪枝 index = influTrim(residual[index]) tree = createTree(X[index], residual[index], max_depth, threNum, K) thisPred = [] ##本轮预测 Trees.append(tree) for i in range(m): thisPred.append(predict(tree, X[i], learning_rate)) Fx += array(thisPred) n_estimators -= 1 return Trees
def predct(Tree,X_test,y_test,lr): n=len(Tree) m=shape(X_test)[0] pred=zeros(m) for i in range(m): for j in range(n): pred[i]+=predict(Tree[j],X_test[i])*lr return pred
def validate(Tree,X_test,y_test): n=len(Tree) m=shape(X_test)[0] pred=zeros(m) for i in range(m): for j in range(n): pred[i]+=predict(Tree[j],X_test[i]) return R2_score(pred,y_test)
def test(X, Forest, learning_rate, K): m = shape(X)[0] n = len(Forest) pred = zeros((m, K)) for i in range(m): for k in range(K): for j in range(n): pred[i, k] += predict(Forest[j][k], X[i], learning_rate) return argmax(pred, axis=1)
def test(X, Trees, learning_rate): m = shape(X)[0] n = len(Trees) pred = zeros(m) for i in range(m): for j in range(n): pred[i] += predict(Trees[j], X[i], learning_rate) pred[pred > 0] = 1 pred[pred <= 0] = -1 return pred
def get_predict(trees_result, trees_feature, data_train): m_tree = len(trees_result) m = np.shape(data_train)[0] result = [] for i in range(m_tree): clf = trees_result[i] feature = trees_feature[i] data = split_data(data_train, feature) result_i = [] for i in range(m): result_i.append(list(predict(data[i][0:-1], clf).keys())[0]) result.append(result_i) final_predict = np.sum(result, axis=0) return final_predict
def bdt(X,y,eps=1000,depth=6,lr=0.1): Tree=[] m,n=shape(X) residual=y.copy() ## 残差 while sum(residual**2)>eps: pred=[] tree=createTree(X,residual,depth) Tree.append(tree) for i in range(m): pred.append(predict(tree,X[i])) residual-=array(pred)*lr ##学习率 print(sum(residual**2)) print('Train MSE:',sum(residual**2)/len(X)) return Tree
def train(X, y, max_depth, n_estimators, learning_rate, lamda, eta): Trees = [] m = shape(X)[0] F = zeros(m) while n_estimators > 0: grad, hes = GraHes(y, F) tree = createTree(X, max_depth, grad, hes, lamda, eta) pred = [] Trees.append(tree) for i in range(m): pred.append(predict(tree, X[i], learning_rate)) F += array(pred) ## print(F[:10]) n_estimators -= 1 return Trees
def train(X, y, max_depth, n_estimators, learning_rate, K): Forest = [] ##每个元素都是K个字典组成的列表 m = shape(X)[0] Fx = zeros((m, K)) threNum = threN(m, max_depth) Forcast = zeros((m, K)) while n_estimators > 0: Trees = [] eFx = exp(Fx) Px = eFx / sum(eFx, axis=1)[:, None] ##按行求和 residual = y - Px n_estimators -= 1 for k in range(K): ##构造K棵树,分别拟合K类残差 Tree = createTree(X, residual[:, k], max_depth, threNum, K) Trees.append(Tree) for i in range(m): Forcast[i, k] = predict(Tree, X[i], learning_rate) Fx += Forcast Forest.append(Trees) return Forest