def bootstrap(x, y, z, p_degree, method, n_bootstrap=100): # Randomly shuffle data data_set = np.c_[x, y, z] np.random.shuffle(data_set) set_size = round(len(x) / 5) # Extract test-set, never used in training. About 1/5 of total data x_test = data_set[0:set_size, 0] y_test = data_set[0:set_size, 1] z_test = data_set[0:set_size, 2] test_indices = np.linspace(0, set_size - 1, set_size) # And define the training set as the rest of the data x_train = np.delete(data_set[:, 0], test_indices) y_train = np.delete(data_set[:, 1], test_indices) z_train = np.delete(data_set[:, 2], test_indices) Z_predict = [] MSE = [] R2s = [] for i in range(n_bootstrap): x_, y_, z_ = resample(x_train, y_train, z_train) if method == 'Ridge': # Ridge regression, save beta values beta = RidgeRegression(x_, y_, z_, degree=p_degree) elif method == 'Lasso': beta = Lasso(x_, y_, z_, degree=p_degree) elif method == 'OLS': beta = ols(x_, y_, z_, degree=p_degree) else: print('ERROR: Cannot recognize method') return 0 M_ = np.c_[x_test, y_test] poly = PolynomialFeatures(p_degree) M = poly.fit_transform(M_) z_hat = M.dot(beta) Z_predict.append(z_hat) # Calculate MSE MSE.append(np.mean((z_test - z_hat)**2)) R2s.append(R2(z_test, z_hat)) # Calculate MSE, Bias and Variance MSE_M = np.mean(MSE) R2_M = np.mean(R2s) bias = np.mean((z_test - np.mean(Z_predict, axis=0, keepdims=True))**2) variance = np.mean(np.var(Z_predict, axis=0, keepdims=True)) return MSE_M, R2_M, bias, variance
def hold_out2(X, y, percent, num_val): """留出集评估正规方程函数,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta""" m = len(y) X1 = X y1 = y J1 = [] #装每轮的训练集代价函数 J2 = [] #装每轮的测试集代价函数 J5 = [[0], [0]] #装每轮的theta mae = 0 mape = 0 mse = 0 rmse = 0 r2 = 0 for i in range(num_val): X1, y1 = random_data(X, y) q = int(m * percent) train_X = X1[:q, :] #按照百分比进行训练集和测试集的切割 train_y = y1[:q, :] val_X = X1[q:, :] val_y = y1[q:, :] theta, J_train = normalEqu(train_X, train_y) #调用正规方程函数得到代价函数的theta J_val = computeCost(val_X, val_y, theta) #得到验证集的代价J mae += MAE(val_y, np.dot(val_X, theta)) # 调用MAE函数,进行加和 mape += MAPE(val_y, np.dot(val_X, theta)) # 调用MAPE函数 r2 += R2(val_y, np.dot(val_X, theta)) # 调用R2函数 mse += MSE_RMSE.MSE(val_y, np.dot(val_X, theta)) # 调用MSE函数 rmse += MSE_RMSE.RMSE(val_y, np.dot(val_X, theta)) # 调用RMSE函数 J1.append(J_train) J2.append(J_val) J5 = np.hstack([J5, theta]) l, theta = np.hsplit(J5, [1]) theta = np.mean(theta, axis=1) #几轮下来得到theta平均值 theta = theta.reshape(2, 1) print("theta") print(theta) #输出theta J3 = np.mean(J1) #几轮下来得到J_train平均值 J4 = np.mean(J2) #几轮下来得到J_test平均值 dr = pd.Series( [ J3, J4, mae / num_val, mape / num_val, mse / num_val, rmse / num_val, r2 / num_val ], index=["J_train", "J_val", "MAE", "MAPE", "MSE", "RMSE", "R2"]) # 创立含有七种评估的矩阵 print(dr) return theta
def hold_out3(X, y, percent, num_val, k): """留出集评估局部加权线性回归,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta""" m = len(y) X1 = X y1 = y J1 = [] #装每轮的训练集代价函数 J2 = [] #装每轮的测试集代价函数 mae = 0 mape = 0 mse = 0 rmse = 0 r2 = 0 for i in range(num_val): X1, y1 = random_data(X, y) q = int(m * percent) train_X = X1[:q, :] #按照百分比进行训练集和测试集的切割 train_y = y1[:q, :] val_X = X1[q:, :] val_y = y1[q:, :] y_pre1 = lw.lwlrTest(train_X, train_X, train_y, k) #得到训练集预测值 y_pre2 = lw.lwlrTest(val_X, train_X, train_y, k) #得到验证集预测值 J_train = comCost_lwlr(train_y, y_pre1) #得到训练集代价J J_val = comCost_lwlr(val_y, y_pre2) #得到验证集的代价J mae += MAE(val_y, y_pre2) # 调用MAE函数,进行加和 mape += MAPE(val_y, y_pre2) # 调用MAPE函数 r2 += R2(val_y, y_pre2) # 调用R2函数 mse += MSE_RMSE.MSE(val_y, y_pre2) # 调用MSE函数 rmse += MSE_RMSE.RMSE(val_y, y_pre2) # 调用RMSE函数 J1.append(J_train) J2.append(J_val) J3 = np.mean(J1) #几轮下来得到J_train平均值 J4 = np.mean(J2) #几轮下来得到J_test平均值 dr = pd.Series( [ J3, J4, mae / num_val, mape / num_val, mse / num_val, rmse / num_val, r2 / num_val ], index=["J_train", "J_val", "MAE", "MAPE", "MSE", "RMSE", "R2"]) # 创立含有七种评估的矩阵 print(dr)
learningRate = 0.01 # Training a Linear Regression weights = initalWeights cost = [] for i in range(maxIter): yp = np.matmul(X_train, weights.T).ravel() J = computeL2Cost(Y_train, yp) G = computeGradient(X_train, Y_train, yp) weights = gradientDescent(weights, G, learningRate) if i % 10 == 0: print("Cost of the model is {}".format(J)) cost.append(J) print("Weights after the training are : {}".format(weights)) # Plotting the training loss curve plt.plot(range(0, len(cost)), cost) plt.title('Cost per iterations') plt.show() # Prediction using the model yp = np.matmul(X_test, weights.T).ravel() print("MSE for the fitted model is {}".format(computeL2Cost(Y_test, yp))) R2_score = R2(Y_test, yp) print("The variance explained by the model is {}".format(R2_score))
X1_train = X1[q:, :] X1_val = X1[:q, :] print(PD.plot1(X1_val, y_val)) #print(X_train) #print(X_val) theta = np.zeros((n + 1, 1)) #theta初始为行数n+1的零矩阵 d = grad_learnning(X_train, y_train, theta) #对学习效率进行一个初步的筛选 print(d) #通过数组显示,可以查看较优值 theta, J_history = gradientDesent(X_train, y_train, theta, 0.1, 1500) #选择较优值进行模型构建 print("theta") print(theta) #显示theta print("代价函数的变化过程") print(J_history) #显示代价函数的变化过程 x_test1 = np.linspace(0, 1, 300).reshape(-1, 1) #显示拟合曲线查看拟合效果 ones2 = np.ones((300, 1)).reshape(-1, 1) x_test2 = np.hstack([ones2, x_test1]) #特征矩阵中合并一个x0矩阵,x0初始为1 y_pre1 = np.dot(x_test2, theta) #通过theta,x,得到y矩阵 print("可视化拟合效果") print(PD.plot2(X1_train, y_train, x_test1, y_pre1)) #可视化拟合效果 y_val_pre = np.dot(X_val, theta) MAE = MAE(y_val, y_val_pre) #调用MAE函数 MAPE = MAPE(y_val, y_val_pre) #调用MAPE函数 R2 = R2(y_val, y_val_pre) #调用R2函数 MSE = MSE_RMSE.MSE(y_val, y_val_pre) #调用MSE函数 RMSE = MSE_RMSE.RMSE(y_val, y_val_pre) #调用RMSE函数 dr = pd.Series([MAE, MAPE, MSE, RMSE, R2], index=["MAE", "MAPE", "MSE", "RMSE", "R2"]) #创立含有五种评估的矩阵 print(dr) #输出评估矩阵
#print("数据初步可视化") print(PD.plot3(X1, X2, y.ravel())) #对数据进行初步可视化,看其标准化效果 X1 = X1.reshape(-1, 1) X2 = X2.reshape(-1, 1) X = X1 * X2 #房子的总体积 X = vc.Degree(4, X) #多项式选择 m = len(y) #ones=np.ones(m).reshape(-1,1) #X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1 #print(X) """对惩罚项运用交叉验证得出的代价函数值进行选择,选择出较好的惩罚值的模型,然后进行模型性能评估""" reg_choose(X, y, numval=10) #10折交叉验证得到不同惩罚值的代价函数表 print("从函数表可以看出,J值均比较大,可能为欠拟合") theta = normalEqu_Reg(X, y, 0.0) #选出较好的theta,此时惩罚值为零 x_test1 = np.linspace(0, 1, 300).reshape(-1, 1) #显示拟合曲线查看拟合效果 x_test3 = x_test1 * x_test1 x_test3 = vc.Degree(4, x_test3) #print(x_test3) y_pre1 = np.dot(x_test3, theta) #通过theta,x,得到y矩阵 print("可视化拟合效果") print(PD.plot4(X1, X2, y.ravel(), x_test1, x_test1, y_pre1.ravel())) #可视化拟合效果 y_pre = np.dot(X, theta) #用较好的模型进行性能评估 MAE = MAE(y, y_pre) #调用MAE函数 MAPE = MAPE(y, y_pre) #调用MAPE函数 R2 = R2(y, y_pre) #调用R2函数 MSE = MSE_RMSE.MSE(y, y_pre) #调用MSE函数 RMSE = MSE_RMSE.RMSE(y, y_pre) #调用RMSE函数 dr = pd.Series([MAE, MAPE, MSE, RMSE, R2], index=["MAE", "MAPE", "MSE", "RMSE", "R2"]) #创立含有五种评估的矩阵 print(dr) #输出评估矩阵