예제 #1
0
def run_bootstrap_OLS():
    """Runs an instance of bootstrap"""
    x, y, z, P, complexity = MakeData(p=7)[:5]
    BootVariances = np.zeros(complexity)
    BootBias = np.zeros(complexity)
    BootMSE = np.zeros(complexity)


    for p in P:
        """Performs bootstrap resampling on each polynomial
         order with N number of bootstraps"""
        X = create_designmatrix(x, y, p)
        data = train_test_splitter(X, z, testsize=0.2)
        N = 1000
        BootBias[p-1], BootVariances[p-1], BootMSE[p-1] = bootstrap(data, N, OLS_reg)

    return BootVariances, BootBias, BootMSE, P, x, y, z
예제 #2
0
import matplotlib.pyplot as plt
from setup import create_designmatrix, MakeData, ridge_reg, OLS_reg, MSE
import sklearn.linear_model as skl

x, y, z, P, complexity = MakeData(N=10)[:5]

nlambdas = 100
_lambda = np.logspace(-4, 1, nlambdas)

MSEPredictLasso = np.zeros(nlambdas)
MSEPredictRidge = np.zeros(nlambdas)
MSEPredictOLS = np.zeros(nlambdas)
MSEPredictRidge_mine = np.zeros(nlambdas)

p = 10
X = create_designmatrix(x, y, p)
X_train, X_test, Y_train, Y_test = train_test_split(X, z, test_size=0.2)
for i in range(nlambdas):
    lam = _lambda[i]

    clf_ridge = skl.Ridge(alpha=lam, fit_intercept=False).fit(X_train, Y_train)
    ridge_beta_mine = ridge_reg(X_train, Y_train, lam)
    clf_lasso = skl.Lasso(alpha=lam, fit_intercept=False).fit(X_train, Y_train)
    ols_beta = OLS_reg(X_train, Y_train)

    yridge = clf_ridge.predict(X_test)
    yridge_mine = X_test @ ridge_beta_mine
    ylasso = clf_lasso.predict(X_test)
    yols = X_test @ ols_beta

    MSEPredictLasso[i] = MSE(Y_test, ylasso)
예제 #3
0
from setup import create_designmatrix, MakeData, OLS_reg, MSE, train_test_splitter, R2score, plot_Franke
from Ridge import ridge_reg

np.random.seed(1)

lambdas = np.logspace(-4, 0, 25)

Ridge_lam = lambdas[8]

Lasso_lam = lambdas[1]

"Training data"
x, y, Y_train = MakeData(N=10, s=3)[:3]
"""OLS: polynomial degree = 4"""
X_OLS = create_designmatrix(x, y, 4)
X_OLS_train, Y_OLS_train = train_test_splitter(X_OLS, Y_train,
                                               testsize=0.2)[0:3:2]
OLS_betas = OLS_reg(X_OLS_train, Y_OLS_train)
"""Ridge: polynomial degree = 5"""
X_Ridge = create_designmatrix(x, y, 5)
X_Ridge_train, Y_Ridge_train = train_test_splitter(X_Ridge,
                                                   Y_train,
                                                   testsize=0.2)[0:3:2]
Ridge_betas = ridge_reg(X_Ridge_train, Y_Ridge_train, Ridge_lam)
"""Lasso: polynomial degree = 12"""
X_Lasso = create_designmatrix(x, y, 12)
X_Lasso_train, Y_Lasso_train = train_test_splitter(X_Lasso,
                                                   Y_train,
                                                   testsize=0.2)[0:3:2]
clf_lasso = skl.Lasso(alpha=Lasso_lam, fit_intercept=False,
예제 #4
0
from setup import create_designmatrix, MakeData, OLS_reg, MSE, train_test_splitter, ridge_reg
from assessment import kfold_cross_validation
import seaborn as sns
from matplotlib.patches import Rectangle

if __name__ == "__main__":

    lambdas = np.logspace(-4, 0, 25)

    x, y, z, P, complexity = MakeData(p=9)[:5]

    # Each column is a polynomial order, rows are lambda values
    KfoldMSE_Ridge = np.zeros((complexity, len(lambdas)))

    for j, p in enumerate(P):
        X = create_designmatrix(x, y, p, scale=False)
        for i, lam in enumerate(lambdas):
            KfoldMSE_Ridge[j,
                           i] = kfold_cross_validation(X, z, ridge_reg, lam)[0]

    #Find minimum MSE value:
    min_MSE = np.amin(KfoldMSE_Ridge)
    index1 = np.where(KfoldMSE_Ridge == np.amin(KfoldMSE_Ridge))

    print("Minimum MSE value for Lasso", min_MSE)

    #Find second and third minimum
    temp = np.copy(KfoldMSE_Ridge)
    temp[index1[1][0], index1[0][0]] += 10
    index2 = np.where(temp == np.amin(temp))
예제 #5
0
from terrain_setup import train, test, val, plot_terrain
from assessment import bootstrap, kfold_cross_validation
from setup import ridge_reg, create_designmatrix

PolyMax = 20
Polys = np.arange(1, PolyMax + 1)

Bias = np.zeros(PolyMax)
Variance = np.zeros(PolyMax)
MSE_boot = np.zeros(PolyMax)
MSE_cv = np.copy(Bias)
R2_cv = np.copy(Bias)
MSE_std_cv = np.copy(Bias)

for i, p in enumerate(Polys):
    X_train = create_designmatrix(train[0], train[1], p)
    Y_train = train[2].ravel()
    X_val = create_designmatrix(val[0], val[1], p)
    Y_val = val[2].ravel()

    data = [X_train, X_val, Y_train, Y_val]
    """Bootstrap"""
    N_boots = 100
    Bias[i], Variance[i], MSE_boot[i] = bootstrap(data, N_boots, ridge_reg)
    """Cross-validation"""
    X_cv = np.concatenate((X_train, X_val))
    Y_cv = np.concatenate((Y_train, Y_val))
    MSE_cv[i], R2_cv[i], MSE_std_cv[i] = kfold_cross_validation(X_cv,
                                                                Y_cv,
                                                                ridge_reg,
                                                                k=10,
예제 #6
0
print(f"   OLS    |   {OLS_degree}   |   n/a")
print(f"  Ridge   |   {Ridge_model[0]}  |   {Ridge_model[1][1]}")
print(f"  Lasso   |   {Lasso_model[0]}   |   {Lasso_model[1][1]}")

np.random.seed(1)

Ridge_lam = Ridge_model[1][1]

Lasso_lam = Lasso_model[1][1]
"""Y train"""
Y_train = train[2].ravel()
Y_val = val[2].ravel()
Y_train = np.concatenate((Y_train, Y_val))
"""OLS"""

X_train = create_designmatrix(train[0], train[1], OLS_degree, scale=True)
X_val = create_designmatrix(val[0], val[1], OLS_degree, scale=True)
X_OLS_train = np.concatenate((X_train, X_val), )

OLS_betas = OLS_reg(X_OLS_train, Y_train)
"""Ridge"""

X_train = create_designmatrix(train[0], train[1], Ridge_model[0], scale=True)
X_val = create_designmatrix(val[0], val[1], Ridge_model[0], scale=True)
X_Ridge_train = np.concatenate((X_train, X_val))

Ridge_betas = ridge_reg(X_Ridge_train, Y_train, Ridge_lam)
"""Lasso"""

X_train = create_designmatrix(train[0], train[1], Lasso_model[0], scale=True)
X_val = create_designmatrix(val[0], val[1], Lasso_model[0], scale=True)
예제 #7
0
def OLS_analysis(x,
                 y,
                 P,
                 z,
                 R2=False,
                 confinf_beta=False,
                 print_error=False,
                 save_X=False):
    """
    Performs OLS regression on the dataset for polynomials from order 1 to 5

    The data is scaled, split, and then the model is trained. The mean squared
    error of the testing and the R2 score is printed for each polynomial.
    """
    R2_list = []
    ConfInfB = []
    MSE_list = np.zeros((len(P), 2))
    Betas = []
    X_list = []

    if print_error:
        print("  P  |     MSE   |   R2   ")
        print("--------------------------")

    for i, p in enumerate(P):

        X = create_designmatrix(x, y, p)
        X_train, X_test, Y_train, Y_test = train_test_splitter(X,
                                                               z,
                                                               testsize=0.2)

        # Finding Beta
        B = OLS_reg(X_train, Y_train)
        Betas.append(B)

        # Predicting using OLS
        ytilde_train = X_train @ B
        ytilde_test = X_test @ B

        # Finding MSE of training and test data
        MSE_list[i, 1] = np.mean(np.square(Y_test - ytilde_test))  # Test MSE
        MSE_list[i, 0] = np.mean(np.square(Y_train -
                                           ytilde_train))  # Training MSE

        if save_X:
            X_list.append(X)

        if R2:
            R2_list.append(R2score(Y_test, ytilde_test))

        if confinf_beta:
            sig2 = np.mean(np.square(Y_train - ytilde_train))
            VarB = sig2 * np.linalg.inv(X_train.T @ X_train).diagonal()
            ConfInf = np.zeros((len(B), 2))
            ConfInf[:, 0] = B - 2 * np.sqrt(VarB)
            ConfInf[:, 1] = B + 2 * np.sqrt(VarB)
            ConfInfB.append(ConfInf)

        if print_error:
            print(
                f" {p:2.0f}  |   {MSE_list[p-1][0]:.4f}  |  {R2_list[p-1]:.4f}"
            )

    return Betas, MSE_list, X_list, R2_list, ConfInfB,
def Model_selection_terrain_with_lambda(model,
                                        modelname,
                                        lambdas=np.logspace(-4, 0, 25),
                                        PolyMin=1,
                                        PolyMax=12,
                                        cv_model=kfold_cross_validation):

    Polys = np.arange(PolyMin, PolyMax + 1)

    # Each column is a polynomial order, rows are lambda values
    MSE_cv = np.zeros((len(Polys), len(lambdas)))
    MSE_std_cv = np.zeros((len(Polys), len(lambdas)))

    for j, p in enumerate(Polys):
        X_train = create_designmatrix(train[0], train[1], p, scale=False)
        Y_train = train[2].ravel()
        X_val = create_designmatrix(val[0], val[1], p, scale=False)
        Y_val = val[2].ravel()
        X_cv = np.concatenate((X_train, X_val))
        Y_cv = np.concatenate((Y_train, Y_val))
        for i, lam in enumerate(lambdas):
            MSE_cv[j, i], MSE_std_cv[j, i] = cv_model(X_cv,
                                                      Y_cv,
                                                      model,
                                                      lam,
                                                      k=7)[0:3:2]

    #Find minimum MSE value:
    min_MSE = np.amin(MSE_cv)
    index1 = np.where(MSE_cv == np.amin(MSE_cv))
    min_MSE_std = MSE_std_cv[index1[0][0], index1[1][0]]
    print("Minimum MSE value for Lasso", min_MSE)

    #Find simplest model within one standard error
    temp = np.where(MSE_cv < min_MSE + min_MSE_std)
    print("Minimum MSE + std(Minimum MSE) = ", min_MSE + min_MSE_std)

    for i, j in enumerate((MSE_cv < min_MSE + min_MSE_std).T):
        for k, l in enumerate(j):
            if l:
                break
        else:
            continue
        break

    chosenmodelpol = k + 1
    chosenmodellam = [i, lambdas[i]]

    # Plot the MSEs and the corresponding polynomial degree and lambda value
    Scaled_for_plot = 0.01 * MSE_cv
    Pplot = (1, PolyMax)
    Lamplot = (0, len(lambdas))
    Poly_lables = [str(x) for x in range(Pplot[0], Pplot[1] + 1)]
    Lam_lables = [str(x) for x in range(Lamplot[0], Lamplot[1])]

    f, ax = plt.subplots(figsize=(9, 6))
    ax.add_patch(
        Rectangle((index1[0][0], index1[1][0]),
                  1,
                  1,
                  fill=False,
                  edgecolor='pink',
                  lw=3))

    sns.set(font_scale=1)
    ax = sns.heatmap(
        Scaled_for_plot.T,
        cbar=False,
        annot=True,
        square=True,
        #yticklabels=Lam_lables,
        xticklabels=Poly_lables,
        fmt='3.0f')
    plt.ylabel(r"$\lambda$ values enumerated low->high")
    plt.xlabel("Polynomial order")
    plt.title(f'MSE of {modelname} regression,' + r"scaled $10^{-2}$")
    plt.tight_layout()
    plt.show()

    return chosenmodelpol, chosenmodellam
def Model_selection_terrain(model, modelname, PolyMax=12, s=3):
    Polys = np.arange(1, PolyMax + 1)

    Bias = np.zeros(PolyMax)
    Variance = np.zeros(PolyMax)
    MSE_boot = np.zeros(PolyMax)
    MSE_cv = np.copy(Bias)
    R2_cv = np.copy(Bias)
    MSE_std_cv = np.copy(Bias)

    for i, p in enumerate(Polys):
        X_train = create_designmatrix(train[0], train[1], p, scale=False)
        Y_train = train[2].ravel()
        X_val = create_designmatrix(val[0], val[1], p, scale=False)
        Y_val = val[2].ravel()
        """Cross-validation"""
        X_cv = np.concatenate((X_train, X_val))
        Y_cv = np.concatenate((Y_train, Y_val))
        MSE_cv[i], R2_cv[i], MSE_std_cv[i] = kfold_cross_validation(X_cv,
                                                                    Y_cv,
                                                                    model,
                                                                    k=10,
                                                                    s=s)
        """Bootstrap"""
        scaler = StandardScaler()
        scaler.fit(X_train[:, 1:])
        X_train[:, 1:] = scaler.transform(X_train[:, 1:])
        X_val[:, 1:] = scaler.transform(X_val[:, 1:])
        data = [X_train, X_val, Y_train, Y_val]
        N_boots = 100
        Bias[i], Variance[i], MSE_boot[i] = bootstrap(data,
                                                      N_boots,
                                                      model,
                                                      s=s)

    MSE_min_boot = np.amin(MSE_boot)
    ind_min_boot = np.where(MSE_boot == MSE_min_boot)[0][0]
    print(f"Minimum MSE for bootstrap= {MSE_min_boot}, P = {ind_min_boot+1}")

    MSE_min_cv = np.amin(MSE_cv)
    ind_min_cv = np.where(MSE_cv == MSE_min_cv)[0][0]
    print(
        f"Minimum MSE for cross validation= {MSE_min_cv}, P = {ind_min_cv+1}")
    """Plot Bias-variance analysis"""
    plt.style.use('ggplot')
    fig = plt.figure().gca()
    plt.plot(Polys, Variance, label="Variance")
    plt.plot(Polys, Bias, label="Bias")
    plt.plot(Polys, MSE_boot, label="MSE")
    plt.legend()
    plt.xlabel("Complexity")
    plt.ylabel("Error")
    plt.title(f"Bias-Variance Tradeoff {modelname}")
    plt.ylim((-10, 30000))
    fig.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.show()

    STD_line = [MSE_std_cv[ind_min_cv] + MSE_cv[ind_min_cv]] * len(Polys)

    for i, error in enumerate(MSE_cv):
        if error < MSE_std_cv[ind_min_cv] + MSE_cv[ind_min_cv]:
            chosen_model = Polys[i]
            break
    """Plot MSE of boo
    tstrap and cross validation"""
    plt.style.use('ggplot')
    fig = plt.figure().gca()
    plt.plot(Polys, MSE_boot, label="Bootstrap MSE")
    plt.plot(Polys, MSE_cv, label="Cross validation MSE")
    plt.plot(Polys, STD_line, color="Orange", linestyle="--")
    plt.vlines(chosen_model, -10, 30000, colors='Orange', linestyles='--')
    plt.legend()
    plt.ylim((-10, 30000))
    plt.xlabel("Complexity")
    plt.ylabel("MSE")
    plt.title(f"MSE analysis of {modelname}")
    fig.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.show()

    return chosen_model