def bootstrap(x, y, z, p_degree, method, n_bootstrap=100):
    # Randomly shuffle data
    data_set = np.c_[x, y, z]
    np.random.shuffle(data_set)
    set_size = round(len(x) / 5)

    # Extract test-set, never used in training. About 1/5 of total data
    x_test = data_set[0:set_size, 0]
    y_test = data_set[0:set_size, 1]
    z_test = data_set[0:set_size, 2]
    test_indices = np.linspace(0, set_size - 1, set_size)

    # And define the training set as the rest of the data
    x_train = np.delete(data_set[:, 0], test_indices)
    y_train = np.delete(data_set[:, 1], test_indices)
    z_train = np.delete(data_set[:, 2], test_indices)

    Z_predict = []
    MSE = []
    R2s = []
    for i in range(n_bootstrap):
        x_, y_, z_ = resample(x_train, y_train, z_train)

        if method == 'Ridge':
            # Ridge regression, save beta values
            beta = RidgeRegression(x_, y_, z_, degree=p_degree)
        elif method == 'Lasso':
            beta = Lasso(x_, y_, z_, degree=p_degree)
        elif method == 'OLS':
            beta = ols(x_, y_, z_, degree=p_degree)
        else:
            print('ERROR: Cannot recognize method')
            return 0

        M_ = np.c_[x_test, y_test]
        poly = PolynomialFeatures(p_degree)
        M = poly.fit_transform(M_)
        z_hat = M.dot(beta)

        Z_predict.append(z_hat)

        # Calculate MSE
        MSE.append(np.mean((z_test - z_hat)**2))
        R2s.append(R2(z_test, z_hat))

    # Calculate MSE, Bias and Variance
    MSE_M = np.mean(MSE)
    R2_M = np.mean(R2s)
    bias = np.mean((z_test - np.mean(Z_predict, axis=0, keepdims=True))**2)
    variance = np.mean(np.var(Z_predict, axis=0, keepdims=True))
    return MSE_M, R2_M, bias, variance
Exemple #2
0
def hold_out2(X, y, percent, num_val):
    """留出集评估正规方程函数,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta"""
    m = len(y)
    X1 = X
    y1 = y
    J1 = []  #装每轮的训练集代价函数
    J2 = []  #装每轮的测试集代价函数
    J5 = [[0], [0]]  #装每轮的theta
    mae = 0
    mape = 0
    mse = 0
    rmse = 0
    r2 = 0
    for i in range(num_val):
        X1, y1 = random_data(X, y)
        q = int(m * percent)
        train_X = X1[:q, :]  #按照百分比进行训练集和测试集的切割
        train_y = y1[:q, :]
        val_X = X1[q:, :]
        val_y = y1[q:, :]
        theta, J_train = normalEqu(train_X, train_y)  #调用正规方程函数得到代价函数的theta

        J_val = computeCost(val_X, val_y, theta)  #得到验证集的代价J
        mae += MAE(val_y, np.dot(val_X, theta))  # 调用MAE函数,进行加和
        mape += MAPE(val_y, np.dot(val_X, theta))  # 调用MAPE函数
        r2 += R2(val_y, np.dot(val_X, theta))  # 调用R2函数
        mse += MSE_RMSE.MSE(val_y, np.dot(val_X, theta))  # 调用MSE函数
        rmse += MSE_RMSE.RMSE(val_y, np.dot(val_X, theta))  # 调用RMSE函数
        J1.append(J_train)
        J2.append(J_val)
        J5 = np.hstack([J5, theta])
    l, theta = np.hsplit(J5, [1])
    theta = np.mean(theta, axis=1)  #几轮下来得到theta平均值
    theta = theta.reshape(2, 1)
    print("theta")
    print(theta)  #输出theta
    J3 = np.mean(J1)  #几轮下来得到J_train平均值
    J4 = np.mean(J2)  #几轮下来得到J_test平均值

    dr = pd.Series(
        [
            J3, J4, mae / num_val, mape / num_val, mse / num_val,
            rmse / num_val, r2 / num_val
        ],
        index=["J_train", "J_val", "MAE", "MAPE", "MSE", "RMSE",
               "R2"])  # 创立含有七种评估的矩阵
    print(dr)
    return theta
Exemple #3
0
def hold_out3(X, y, percent, num_val, k):
    """留出集评估局部加权线性回归,输入X特征矩阵,y标签数组,percent训练集所占百分比,num_val几轮验证,输出theta,评估矩阵,返回theta"""
    m = len(y)
    X1 = X
    y1 = y
    J1 = []  #装每轮的训练集代价函数
    J2 = []  #装每轮的测试集代价函数

    mae = 0
    mape = 0
    mse = 0
    rmse = 0
    r2 = 0
    for i in range(num_val):
        X1, y1 = random_data(X, y)
        q = int(m * percent)
        train_X = X1[:q, :]  #按照百分比进行训练集和测试集的切割
        train_y = y1[:q, :]
        val_X = X1[q:, :]
        val_y = y1[q:, :]
        y_pre1 = lw.lwlrTest(train_X, train_X, train_y, k)  #得到训练集预测值
        y_pre2 = lw.lwlrTest(val_X, train_X, train_y, k)  #得到验证集预测值
        J_train = comCost_lwlr(train_y, y_pre1)  #得到训练集代价J
        J_val = comCost_lwlr(val_y, y_pre2)  #得到验证集的代价J
        mae += MAE(val_y, y_pre2)  # 调用MAE函数,进行加和
        mape += MAPE(val_y, y_pre2)  # 调用MAPE函数
        r2 += R2(val_y, y_pre2)  # 调用R2函数
        mse += MSE_RMSE.MSE(val_y, y_pre2)  # 调用MSE函数
        rmse += MSE_RMSE.RMSE(val_y, y_pre2)  # 调用RMSE函数
        J1.append(J_train)
        J2.append(J_val)

    J3 = np.mean(J1)  #几轮下来得到J_train平均值
    J4 = np.mean(J2)  #几轮下来得到J_test平均值

    dr = pd.Series(
        [
            J3, J4, mae / num_val, mape / num_val, mse / num_val,
            rmse / num_val, r2 / num_val
        ],
        index=["J_train", "J_val", "MAE", "MAPE", "MSE", "RMSE",
               "R2"])  # 创立含有七种评估的矩阵
    print(dr)
Exemple #4
0
learningRate = 0.01

# Training a Linear Regression
weights = initalWeights
cost = []

for i in range(maxIter):

    yp = np.matmul(X_train, weights.T).ravel()
    J = computeL2Cost(Y_train, yp)
    G = computeGradient(X_train, Y_train, yp)
    weights = gradientDescent(weights, G, learningRate)

    if i % 10 == 0:
        print("Cost of the model is {}".format(J))
    cost.append(J)

print("Weights after the training are : {}".format(weights))

# Plotting the training loss curve
plt.plot(range(0, len(cost)), cost)
plt.title('Cost per iterations')
plt.show()

# Prediction using the model
yp = np.matmul(X_test, weights.T).ravel()
print("MSE for the fitted model is {}".format(computeL2Cost(Y_test, yp)))

R2_score = R2(Y_test, yp)
print("The variance explained by the model is {}".format(R2_score))
Exemple #5
0
X1_train = X1[q:, :]
X1_val = X1[:q, :]
print(PD.plot1(X1_val, y_val))
#print(X_train)
#print(X_val)
theta = np.zeros((n + 1, 1))  #theta初始为行数n+1的零矩阵
d = grad_learnning(X_train, y_train, theta)  #对学习效率进行一个初步的筛选
print(d)  #通过数组显示,可以查看较优值
theta, J_history = gradientDesent(X_train, y_train, theta, 0.1,
                                  1500)  #选择较优值进行模型构建
print("theta")
print(theta)  #显示theta
print("代价函数的变化过程")
print(J_history)  #显示代价函数的变化过程
x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)  #显示拟合曲线查看拟合效果
ones2 = np.ones((300, 1)).reshape(-1, 1)
x_test2 = np.hstack([ones2, x_test1])  #特征矩阵中合并一个x0矩阵,x0初始为1
y_pre1 = np.dot(x_test2, theta)  #通过theta,x,得到y矩阵
print("可视化拟合效果")
print(PD.plot2(X1_train, y_train, x_test1, y_pre1))  #可视化拟合效果

y_val_pre = np.dot(X_val, theta)
MAE = MAE(y_val, y_val_pre)  #调用MAE函数
MAPE = MAPE(y_val, y_val_pre)  #调用MAPE函数
R2 = R2(y_val, y_val_pre)  #调用R2函数
MSE = MSE_RMSE.MSE(y_val, y_val_pre)  #调用MSE函数
RMSE = MSE_RMSE.RMSE(y_val, y_val_pre)  #调用RMSE函数
dr = pd.Series([MAE, MAPE, MSE, RMSE, R2],
               index=["MAE", "MAPE", "MSE", "RMSE", "R2"])  #创立含有五种评估的矩阵
print(dr)  #输出评估矩阵
Exemple #6
0
#print("数据初步可视化")
print(PD.plot3(X1, X2, y.ravel()))  #对数据进行初步可视化,看其标准化效果
X1 = X1.reshape(-1, 1)
X2 = X2.reshape(-1, 1)
X = X1 * X2  #房子的总体积
X = vc.Degree(4, X)  #多项式选择
m = len(y)
#ones=np.ones(m).reshape(-1,1)
#X=np.hstack([ones,X])#特征矩阵中合并一个x0矩阵,x0初始为1
#print(X)
"""对惩罚项运用交叉验证得出的代价函数值进行选择,选择出较好的惩罚值的模型,然后进行模型性能评估"""
reg_choose(X, y, numval=10)  #10折交叉验证得到不同惩罚值的代价函数表
print("从函数表可以看出,J值均比较大,可能为欠拟合")
theta = normalEqu_Reg(X, y, 0.0)  #选出较好的theta,此时惩罚值为零
x_test1 = np.linspace(0, 1, 300).reshape(-1, 1)  #显示拟合曲线查看拟合效果
x_test3 = x_test1 * x_test1
x_test3 = vc.Degree(4, x_test3)
#print(x_test3)
y_pre1 = np.dot(x_test3, theta)  #通过theta,x,得到y矩阵
print("可视化拟合效果")
print(PD.plot4(X1, X2, y.ravel(), x_test1, x_test1, y_pre1.ravel()))  #可视化拟合效果
y_pre = np.dot(X, theta)
#用较好的模型进行性能评估
MAE = MAE(y, y_pre)  #调用MAE函数
MAPE = MAPE(y, y_pre)  #调用MAPE函数
R2 = R2(y, y_pre)  #调用R2函数
MSE = MSE_RMSE.MSE(y, y_pre)  #调用MSE函数
RMSE = MSE_RMSE.RMSE(y, y_pre)  #调用RMSE函数
dr = pd.Series([MAE, MAPE, MSE, RMSE, R2],
               index=["MAE", "MAPE", "MSE", "RMSE", "R2"])  #创立含有五种评估的矩阵
print(dr)  #输出评估矩阵