def run_bootstrap_OLS(): """Runs an instance of bootstrap""" x, y, z, P, complexity = MakeData(p=7)[:5] BootVariances = np.zeros(complexity) BootBias = np.zeros(complexity) BootMSE = np.zeros(complexity) for p in P: """Performs bootstrap resampling on each polynomial order with N number of bootstraps""" X = create_designmatrix(x, y, p) data = train_test_splitter(X, z, testsize=0.2) N = 1000 BootBias[p-1], BootVariances[p-1], BootMSE[p-1] = bootstrap(data, N, OLS_reg) return BootVariances, BootBias, BootMSE, P, x, y, z
import matplotlib.pyplot as plt from setup import create_designmatrix, MakeData, ridge_reg, OLS_reg, MSE import sklearn.linear_model as skl x, y, z, P, complexity = MakeData(N=10)[:5] nlambdas = 100 _lambda = np.logspace(-4, 1, nlambdas) MSEPredictLasso = np.zeros(nlambdas) MSEPredictRidge = np.zeros(nlambdas) MSEPredictOLS = np.zeros(nlambdas) MSEPredictRidge_mine = np.zeros(nlambdas) p = 10 X = create_designmatrix(x, y, p) X_train, X_test, Y_train, Y_test = train_test_split(X, z, test_size=0.2) for i in range(nlambdas): lam = _lambda[i] clf_ridge = skl.Ridge(alpha=lam, fit_intercept=False).fit(X_train, Y_train) ridge_beta_mine = ridge_reg(X_train, Y_train, lam) clf_lasso = skl.Lasso(alpha=lam, fit_intercept=False).fit(X_train, Y_train) ols_beta = OLS_reg(X_train, Y_train) yridge = clf_ridge.predict(X_test) yridge_mine = X_test @ ridge_beta_mine ylasso = clf_lasso.predict(X_test) yols = X_test @ ols_beta MSEPredictLasso[i] = MSE(Y_test, ylasso)
from setup import create_designmatrix, MakeData, OLS_reg, MSE, train_test_splitter, R2score, plot_Franke from Ridge import ridge_reg np.random.seed(1) lambdas = np.logspace(-4, 0, 25) Ridge_lam = lambdas[8] Lasso_lam = lambdas[1] "Training data" x, y, Y_train = MakeData(N=10, s=3)[:3] """OLS: polynomial degree = 4""" X_OLS = create_designmatrix(x, y, 4) X_OLS_train, Y_OLS_train = train_test_splitter(X_OLS, Y_train, testsize=0.2)[0:3:2] OLS_betas = OLS_reg(X_OLS_train, Y_OLS_train) """Ridge: polynomial degree = 5""" X_Ridge = create_designmatrix(x, y, 5) X_Ridge_train, Y_Ridge_train = train_test_splitter(X_Ridge, Y_train, testsize=0.2)[0:3:2] Ridge_betas = ridge_reg(X_Ridge_train, Y_Ridge_train, Ridge_lam) """Lasso: polynomial degree = 12""" X_Lasso = create_designmatrix(x, y, 12) X_Lasso_train, Y_Lasso_train = train_test_splitter(X_Lasso, Y_train, testsize=0.2)[0:3:2] clf_lasso = skl.Lasso(alpha=Lasso_lam, fit_intercept=False,
from setup import create_designmatrix, MakeData, OLS_reg, MSE, train_test_splitter, ridge_reg from assessment import kfold_cross_validation import seaborn as sns from matplotlib.patches import Rectangle if __name__ == "__main__": lambdas = np.logspace(-4, 0, 25) x, y, z, P, complexity = MakeData(p=9)[:5] # Each column is a polynomial order, rows are lambda values KfoldMSE_Ridge = np.zeros((complexity, len(lambdas))) for j, p in enumerate(P): X = create_designmatrix(x, y, p, scale=False) for i, lam in enumerate(lambdas): KfoldMSE_Ridge[j, i] = kfold_cross_validation(X, z, ridge_reg, lam)[0] #Find minimum MSE value: min_MSE = np.amin(KfoldMSE_Ridge) index1 = np.where(KfoldMSE_Ridge == np.amin(KfoldMSE_Ridge)) print("Minimum MSE value for Lasso", min_MSE) #Find second and third minimum temp = np.copy(KfoldMSE_Ridge) temp[index1[1][0], index1[0][0]] += 10 index2 = np.where(temp == np.amin(temp))
from terrain_setup import train, test, val, plot_terrain from assessment import bootstrap, kfold_cross_validation from setup import ridge_reg, create_designmatrix PolyMax = 20 Polys = np.arange(1, PolyMax + 1) Bias = np.zeros(PolyMax) Variance = np.zeros(PolyMax) MSE_boot = np.zeros(PolyMax) MSE_cv = np.copy(Bias) R2_cv = np.copy(Bias) MSE_std_cv = np.copy(Bias) for i, p in enumerate(Polys): X_train = create_designmatrix(train[0], train[1], p) Y_train = train[2].ravel() X_val = create_designmatrix(val[0], val[1], p) Y_val = val[2].ravel() data = [X_train, X_val, Y_train, Y_val] """Bootstrap""" N_boots = 100 Bias[i], Variance[i], MSE_boot[i] = bootstrap(data, N_boots, ridge_reg) """Cross-validation""" X_cv = np.concatenate((X_train, X_val)) Y_cv = np.concatenate((Y_train, Y_val)) MSE_cv[i], R2_cv[i], MSE_std_cv[i] = kfold_cross_validation(X_cv, Y_cv, ridge_reg, k=10,
print(f" OLS | {OLS_degree} | n/a") print(f" Ridge | {Ridge_model[0]} | {Ridge_model[1][1]}") print(f" Lasso | {Lasso_model[0]} | {Lasso_model[1][1]}") np.random.seed(1) Ridge_lam = Ridge_model[1][1] Lasso_lam = Lasso_model[1][1] """Y train""" Y_train = train[2].ravel() Y_val = val[2].ravel() Y_train = np.concatenate((Y_train, Y_val)) """OLS""" X_train = create_designmatrix(train[0], train[1], OLS_degree, scale=True) X_val = create_designmatrix(val[0], val[1], OLS_degree, scale=True) X_OLS_train = np.concatenate((X_train, X_val), ) OLS_betas = OLS_reg(X_OLS_train, Y_train) """Ridge""" X_train = create_designmatrix(train[0], train[1], Ridge_model[0], scale=True) X_val = create_designmatrix(val[0], val[1], Ridge_model[0], scale=True) X_Ridge_train = np.concatenate((X_train, X_val)) Ridge_betas = ridge_reg(X_Ridge_train, Y_train, Ridge_lam) """Lasso""" X_train = create_designmatrix(train[0], train[1], Lasso_model[0], scale=True) X_val = create_designmatrix(val[0], val[1], Lasso_model[0], scale=True)
def OLS_analysis(x, y, P, z, R2=False, confinf_beta=False, print_error=False, save_X=False): """ Performs OLS regression on the dataset for polynomials from order 1 to 5 The data is scaled, split, and then the model is trained. The mean squared error of the testing and the R2 score is printed for each polynomial. """ R2_list = [] ConfInfB = [] MSE_list = np.zeros((len(P), 2)) Betas = [] X_list = [] if print_error: print(" P | MSE | R2 ") print("--------------------------") for i, p in enumerate(P): X = create_designmatrix(x, y, p) X_train, X_test, Y_train, Y_test = train_test_splitter(X, z, testsize=0.2) # Finding Beta B = OLS_reg(X_train, Y_train) Betas.append(B) # Predicting using OLS ytilde_train = X_train @ B ytilde_test = X_test @ B # Finding MSE of training and test data MSE_list[i, 1] = np.mean(np.square(Y_test - ytilde_test)) # Test MSE MSE_list[i, 0] = np.mean(np.square(Y_train - ytilde_train)) # Training MSE if save_X: X_list.append(X) if R2: R2_list.append(R2score(Y_test, ytilde_test)) if confinf_beta: sig2 = np.mean(np.square(Y_train - ytilde_train)) VarB = sig2 * np.linalg.inv(X_train.T @ X_train).diagonal() ConfInf = np.zeros((len(B), 2)) ConfInf[:, 0] = B - 2 * np.sqrt(VarB) ConfInf[:, 1] = B + 2 * np.sqrt(VarB) ConfInfB.append(ConfInf) if print_error: print( f" {p:2.0f} | {MSE_list[p-1][0]:.4f} | {R2_list[p-1]:.4f}" ) return Betas, MSE_list, X_list, R2_list, ConfInfB,
def Model_selection_terrain_with_lambda(model, modelname, lambdas=np.logspace(-4, 0, 25), PolyMin=1, PolyMax=12, cv_model=kfold_cross_validation): Polys = np.arange(PolyMin, PolyMax + 1) # Each column is a polynomial order, rows are lambda values MSE_cv = np.zeros((len(Polys), len(lambdas))) MSE_std_cv = np.zeros((len(Polys), len(lambdas))) for j, p in enumerate(Polys): X_train = create_designmatrix(train[0], train[1], p, scale=False) Y_train = train[2].ravel() X_val = create_designmatrix(val[0], val[1], p, scale=False) Y_val = val[2].ravel() X_cv = np.concatenate((X_train, X_val)) Y_cv = np.concatenate((Y_train, Y_val)) for i, lam in enumerate(lambdas): MSE_cv[j, i], MSE_std_cv[j, i] = cv_model(X_cv, Y_cv, model, lam, k=7)[0:3:2] #Find minimum MSE value: min_MSE = np.amin(MSE_cv) index1 = np.where(MSE_cv == np.amin(MSE_cv)) min_MSE_std = MSE_std_cv[index1[0][0], index1[1][0]] print("Minimum MSE value for Lasso", min_MSE) #Find simplest model within one standard error temp = np.where(MSE_cv < min_MSE + min_MSE_std) print("Minimum MSE + std(Minimum MSE) = ", min_MSE + min_MSE_std) for i, j in enumerate((MSE_cv < min_MSE + min_MSE_std).T): for k, l in enumerate(j): if l: break else: continue break chosenmodelpol = k + 1 chosenmodellam = [i, lambdas[i]] # Plot the MSEs and the corresponding polynomial degree and lambda value Scaled_for_plot = 0.01 * MSE_cv Pplot = (1, PolyMax) Lamplot = (0, len(lambdas)) Poly_lables = [str(x) for x in range(Pplot[0], Pplot[1] + 1)] Lam_lables = [str(x) for x in range(Lamplot[0], Lamplot[1])] f, ax = plt.subplots(figsize=(9, 6)) ax.add_patch( Rectangle((index1[0][0], index1[1][0]), 1, 1, fill=False, edgecolor='pink', lw=3)) sns.set(font_scale=1) ax = sns.heatmap( Scaled_for_plot.T, cbar=False, annot=True, square=True, #yticklabels=Lam_lables, xticklabels=Poly_lables, fmt='3.0f') plt.ylabel(r"$\lambda$ values enumerated low->high") plt.xlabel("Polynomial order") plt.title(f'MSE of {modelname} regression,' + r"scaled $10^{-2}$") plt.tight_layout() plt.show() return chosenmodelpol, chosenmodellam
def Model_selection_terrain(model, modelname, PolyMax=12, s=3): Polys = np.arange(1, PolyMax + 1) Bias = np.zeros(PolyMax) Variance = np.zeros(PolyMax) MSE_boot = np.zeros(PolyMax) MSE_cv = np.copy(Bias) R2_cv = np.copy(Bias) MSE_std_cv = np.copy(Bias) for i, p in enumerate(Polys): X_train = create_designmatrix(train[0], train[1], p, scale=False) Y_train = train[2].ravel() X_val = create_designmatrix(val[0], val[1], p, scale=False) Y_val = val[2].ravel() """Cross-validation""" X_cv = np.concatenate((X_train, X_val)) Y_cv = np.concatenate((Y_train, Y_val)) MSE_cv[i], R2_cv[i], MSE_std_cv[i] = kfold_cross_validation(X_cv, Y_cv, model, k=10, s=s) """Bootstrap""" scaler = StandardScaler() scaler.fit(X_train[:, 1:]) X_train[:, 1:] = scaler.transform(X_train[:, 1:]) X_val[:, 1:] = scaler.transform(X_val[:, 1:]) data = [X_train, X_val, Y_train, Y_val] N_boots = 100 Bias[i], Variance[i], MSE_boot[i] = bootstrap(data, N_boots, model, s=s) MSE_min_boot = np.amin(MSE_boot) ind_min_boot = np.where(MSE_boot == MSE_min_boot)[0][0] print(f"Minimum MSE for bootstrap= {MSE_min_boot}, P = {ind_min_boot+1}") MSE_min_cv = np.amin(MSE_cv) ind_min_cv = np.where(MSE_cv == MSE_min_cv)[0][0] print( f"Minimum MSE for cross validation= {MSE_min_cv}, P = {ind_min_cv+1}") """Plot Bias-variance analysis""" plt.style.use('ggplot') fig = plt.figure().gca() plt.plot(Polys, Variance, label="Variance") plt.plot(Polys, Bias, label="Bias") plt.plot(Polys, MSE_boot, label="MSE") plt.legend() plt.xlabel("Complexity") plt.ylabel("Error") plt.title(f"Bias-Variance Tradeoff {modelname}") plt.ylim((-10, 30000)) fig.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.show() STD_line = [MSE_std_cv[ind_min_cv] + MSE_cv[ind_min_cv]] * len(Polys) for i, error in enumerate(MSE_cv): if error < MSE_std_cv[ind_min_cv] + MSE_cv[ind_min_cv]: chosen_model = Polys[i] break """Plot MSE of boo tstrap and cross validation""" plt.style.use('ggplot') fig = plt.figure().gca() plt.plot(Polys, MSE_boot, label="Bootstrap MSE") plt.plot(Polys, MSE_cv, label="Cross validation MSE") plt.plot(Polys, STD_line, color="Orange", linestyle="--") plt.vlines(chosen_model, -10, 30000, colors='Orange', linestyles='--') plt.legend() plt.ylim((-10, 30000)) plt.xlabel("Complexity") plt.ylabel("MSE") plt.title(f"MSE analysis of {modelname}") fig.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.show() return chosen_model