def OLS_SVD_unit_test(min_deg=2, max_deg=5, tol=1e-6): """ Tests our implementation of OLS with SVD against sci-kit learn up to a given tolerance """ n = 100 # Number of data points # Prepare data set x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2 degrees = np.arange(min_deg, max_deg + 1) for deg in degrees: # Set up design matrix X = linear_regression.design_matrix_2D(x, y, 5) # Compute optimal parameters using our homegrown OLS beta = linear_regression.OLS_SVD_2D(X=X, z=z) # Compute optimal parameters using sklearn skl_reg = LinearRegression(fit_intercept=False).fit(X, z) beta_skl = skl_reg.coef_ for i in range(len(beta)): if abs(beta[i] - beta_skl[i]) < tol: pass else: print( "Warning! mismatch with SKL in OLS_SVD_unit_test with tol = %.0e" % tol) print("Parameter no. %i for deg = %i" % (i, deg)) print("-> (OUR) beta = %8.12f" % beta[i]) print("-> (SKL) beta = %8.12f" % beta_skl[i]) return
def bootstrap_all(X_train, X_test, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge): """ Performs the bootstrapped bias variance analysis for OLS, Ridge and Lasso, given input training and test data, the number of bootstrap iterations and the lambda values for Ridge and Lasso. Returns MSE, mean squared bias and mean variance for Ridge, Lasso and OLS in that order. """ z_boot_ols = np.zeros((len(z_test),n_bootstraps)) z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train[shuffle] , z_train[shuffle] betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test) #Lasso, given lambda z_boot_ridge[:,i] = X_test @ betas_boot_ridge z_boot_ols[:,i] = X_test @ betas_boot_ols ridge_mse, ridge_bias, ridge_variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_mse, lasso_bias, lasso_variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) ols_mse, ols_bias, ols_variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols) return ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance
def k_fold_cv_all(X, z, n_lambdas, lambdas, k_folds): """ Performs k-fold cross validation for Ridge, Lasso and OLS. The Lasso and Ridge MSE-values are computed for a number of n_lambdas, with the lambda values given by the lambdas array. OLS is done only once for each of the k_folds folds. Args: X (array): Design matrix z (array): Data-values/response-values/whatever-they-are-called-in-your-field-values n_lambdas (int): number of lambda values to use for Lasso and Ridge. lambdas (array): The actual lambda-values to try. k_folds (int): The number of folds. Return: lasso_cv_mse (array): array containing the computed MSE for each lambda in Lasso ridge_cv_mse (array): array containing the computed MSE for each lambda in Ridge ols_cv_mse (float): computed MSE for OLS. """ ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb, fit_intercept=False).fit( X_train_cv, z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i, j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i, j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) # Get ols_mse for cv. ols_fold_score = np.zeros(k_folds) for i in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] ols_cv_betas = linear_regression.OLS_SVD_2D(X_train_cv, z_train_cv) z_ols_test = X_test_cv @ ols_cv_betas ols_fold_score[i] = stat_tools.MSE(z_test_cv, z_ols_test) ols_cv_mse = np.mean(ols_fold_score) return lasso_cv_mse, ridge_cv_mse, ols_cv_mse
def franke_predictions(n=1000, noise_scale=0.2, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5, plot_grid_size=2000): """ For a given sample size n, noise_scale, max_degree and penalty parameters: produces ols, ridge and lasso predictions, as well as ground truth on a plotting meshgrid with input grid size. output: x_plot_mesh: meshgrid of x-coordinates y_plot_mesh: meshgrid of y-coordinates z_predict_ols: ols prediction of z on the meshgrid z_predict_ridge: ridge prediction of z on the meshgrid z_predict_lasso: lasso prediction of z on the meshgrid z_plot_franke: Actual Franke values on the meshgrid. """ x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale*np.random.normal(0,1,len(z)) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Scaling X = linear_regression.design_matrix_2D(x,y,degree) scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) #Setting up plotting grid x_plot = np.linspace(0,1,plot_grid_size) y_plot = np.linspace(0,1,plot_grid_size) x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot,y_plot) x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(), y_plot_mesh.flatten() X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,y_plot_mesh_flat,degree) X_plot_design_scaled = scaler.transform(X_plot_design) z_plot_franke = FrankeFunction(x_plot_mesh, y_plot_mesh) # OLS betas = linear_regression.OLS_SVD_2D(X_scaled, z) z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept z_predict_ols = z_predict_flat_ols.reshape(plot_grid_size,-1) # Ridge betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda) z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept z_predict_ridge = z_predict_flat_ridge.reshape(plot_grid_size,-1) # Lasso clf_Lasso = skl.Lasso(alpha=lasso_lambda,fit_intercept=False, max_iter=10000).fit(X_scaled,z) z_predict_flat_lasso = clf_Lasso.predict(X_plot_design_scaled) + z_intercept z_predict_lasso = z_predict_flat_lasso.reshape(plot_grid_size,-1) return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_plot_franke
def k_folds_cv_OLS_only(X, z, k_folds): """As could be guessed, computes the k-fold cross-validation MSE for OLS, given input X, y as data; k_folds as number of folds. Returns the computed MSE. """ ols_fold_score = np.zeros(k_folds) test_list, train_list = k_fold_selection(z, k_folds) for i in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] ols_cv_betas = linear_regression.OLS_SVD_2D(X_train_cv, z_train_cv) z_ols_test = X_test_cv @ ols_cv_betas ols_fold_score[i] = stat_tools.MSE(z_test_cv, z_ols_test) ols_cv_mse = np.mean(ols_fold_score) return ols_cv_mse
def terrain_analysis_plots( spacing=100, max_degree=20, n_lambdas=30, k_folds=5, n_bootstraps=50, do_boot=False, do_subset=False, ): # Setting up the terrain data: # Note structure! X-coordinates are on the rows of terrain_data # Point_selection.flatten() moves most rapidly over the x-coordinates # Meshgrids flattened also move most rapidly over the x-coordinates. Thus # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif") point_selection = terrain_data[:1801:spacing, :1801: spacing] # Make square and downsample x_terrain_selection = np.linspace(0, 1, point_selection.shape[1]) y_terrain_selection = np.linspace(0, 1, point_selection.shape[0]) X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection, y_terrain_selection) z_terrain_selection = point_selection.flatten() # the response values x_terrain_selection_flat = X_coord_selection.flatten( ) # the first degree feature variables y_terrain_selection_flat = Y_coord_selection.flatten( ) # the first degree feature variables lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x = x_terrain_selection_flat y = y_terrain_selection_flat z = z_terrain_selection x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering z_intercept = np.mean(z) z = z - z_intercept z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ##### Setup of problem is completede above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Probably should not have this. # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 # Probably actually not # X_test_scaled[:,0] = 1 # Have a bad feeling about how this might affect ridge/lasso. # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. plt.figure() plt.semilogy(ols_cv_mse, label="ols") plt.semilogy(best_ridge_mse, label="ridge") plt.semilogy(best_lasso_mse, label="lasso") plt.title( "CV MSE for OLS, Ridge and Lasso, with the best lambdas for each degree" ) plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambdas:") print(best_ridge_lambda) print("best lasso lambda") print(best_lasso_lambda) return
def terrain_predictions(spacing=40, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5): """For a given sampling spacing, degree and penalty parameters: produces ols, ridge and lasso predictions, as well as ground truth on a plotting meshgrid. output: x_plot_mesh: meshgrid of x-coordinates y_plot_mesh: meshgrid of y-coordinates z_predict_ols: ols prediction of z on the meshgrid z_predict_ridge: ridge prediction of z on the meshgrid z_predict_lasso: lasso prediction of z on the meshgrid z_true: Actual terrain values on the meshgrid. """ # #control variables, resticted to upper half of plot currently. # spacing = 10 # degree = 25 # ridge_lambda = 1e-2 # lasso_lambda = 1e-5 # Setting up the terrain data: # Note structure! X-coordinates are on the rows of terrain_data # Point_selection.flatten() moves most rapidly over the x-coordinates # Meshgrids flattened also move most rapidly over the x-coordinates. Thus # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif") point_selection = terrain_data[:1801:spacing, :1801: spacing] # Make quadratic and downsample x_terrain_selection = np.linspace(0, 1, point_selection.shape[1]) y_terrain_selection = np.linspace(0, 1, point_selection.shape[0]) X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection, y_terrain_selection) z_terrain_selection = point_selection.flatten() # the response values x_terrain_selection_flat = X_coord_selection.flatten( ) # the first degree feature variables y_terrain_selection_flat = Y_coord_selection.flatten( ) # the first degree feature variables x = x_terrain_selection_flat y = y_terrain_selection_flat z = z_terrain_selection # Centering z_intercept = np.mean(z) z = z - z_intercept # Scaling X = linear_regression.design_matrix_2D(x, y, degree) scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) x_plot = np.linspace(0, 1, 1801) y_plot = np.linspace(0, 1, 1801) x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot, y_plot) x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten( ), y_plot_mesh.flatten() X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat, y_plot_mesh_flat, degree) X_plot_design_scaled = scaler.transform(X_plot_design) # Ground truth z_true = terrain_data[:1801, :1801] # OLS betas = linear_regression.OLS_SVD_2D(X_scaled, z) z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept z_predict_ols = z_predict_flat_ols.reshape(-1, 1801) # Ridge betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda) z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept z_predict_ridge = z_predict_flat_ridge.reshape(-1, 1801) # Lasso clf_Lasso = skl.Lasso(alpha=lasso_lambda, fit_intercept=False).fit(X_scaled, z) z_predict_flat_lasso = clf_Lasso.predict( X_plot_design_scaled) + z_intercept z_predict_lasso = z_predict_flat_lasso.reshape(-1, 1801) return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_true
# Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 #maybe not for ridge+lasso # X_test_scaled[:,0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = stat_tools.k_fold_cv_all(X_scaled,z,n_lambdas,lambdas,k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse
def crossvalidation_OLS(X, z, k_folds): # Wrapper function, see crossvalidation return crossvalidation(X, z, k_folds, regression=linear_regression.OLS_SVD_2D(X, z))
def franke_analysis_plots( n=1000, noise_scale=0.2, max_degree=20, n_bootstraps=100, k_folds=5, n_lambdas=30, do_boot=True, do_subset=True, ): # Note that max_degrees is the number of degrees, i.e. including 0. # n = 500 # noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) # max_degree = 15 # n_lambdas = 30 # n_bootstraps = 100 # k_folds = 5 lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Centering the response z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ########### Setup of problem is completed above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 #maybe not for ridge+lasso # X_test_scaled[:,0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. # CV MSE for OLS: plt.figure() plt.semilogy(ols_cv_mse) plt.title("OLS CV MSE") plt.show() # Bootstrap for OLS: plt.figure() plt.semilogy(ols_boot_mse, label="mse") plt.semilogy(ols_boot_bias, label="bias") plt.semilogy(ols_boot_variance, label="variance") plt.title("OLS bias-variance-MSE by bootstrap") plt.legend() plt.show() # CV for Ridge, best+low+middle+high lambdas plt.figure() plt.semilogy(best_ridge_mse, label="best for each degree") plt.semilogy(ridge_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(ridge_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(ridge_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Ridge CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best ridge lambdas: plt.figure() plt.semilogy(ridge_best_lambda_boot_mse, label="mse") plt.semilogy(ridge_best_lambda_boot_bias, label="bias") plt.semilogy(ridge_best_lambda_boot_variance, label="variance") plt.title("Best ridge lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high ridge lambdas plt.figure() plt.semilogy(ridge_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high ridge lambdas") plt.legend() plt.show() # CV for lasso, best+low+middle+high lambdas plt.figure() plt.semilogy(best_lasso_mse, label="best lambda for each degree") plt.semilogy(lasso_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(lasso_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(lasso_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Lasso CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best lasso lambdas: plt.figure() plt.semilogy(lasso_best_lambda_boot_mse, label="mse") plt.semilogy(lasso_best_lambda_boot_bias, label="bias") plt.semilogy(lasso_best_lambda_boot_variance, label="variance") plt.title("Best lasso lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high lasso lambdas plt.figure() plt.semilogy(lasso_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high lasso lambdas") plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambda:") print(best_ridge_lambda) print("best lasso lambda:") print(best_lasso_lambda) return
def part_1a(): # Sample the franke function n times at randomly chosen points n = 100 deg = 5 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z_noisy = z + noise_scale * np.random.normal(0, 1, len(z)) # Making the design matrix X = linear_regression.design_matrix_2D(x, y, deg) # Find the least-squares solution beta = linear_regression.OLS_2D(X, z) beta_noisy = linear_regression.OLS_2D(X, z_noisy) # Split into training and test data with ratio 0.2 X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2) # Scale data according to sklearn, beware possible problems with intercept and std. scaler = StandardScaler() scaler.fit(X_train) X_train_scaled = scaler.transform(X_train) X_test_scaled = scaler.transform(X_test) # For ridge and lasso, lasso directly from sklearn. # For given polynomial degree, input X and z. X should be prescaled. n_lambdas = 100 lambdas = np.logspace(-3, 0, n_lambdas) k_folds = 5 ridge_fold_score = np.zeros(n_lambdas, k_folds) lasso_fold_score = np.zeros(n_lambdas, k_folds) test_list, train_list = k_fold_selection(z, k_folds) for i in range(n_lambdas): for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb).fit(X_train_cv, z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i, j] = stat_tools.MSE(z, z_ridge_test) lasso_fold_score[i, j] = stat_tools.MSE(z, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1, keepdims=True) ridge_cv_mse = np.mean(ridge_fold_score, axis=1, keepdims=True) best_lambda_lasso = lambdas[np.argmin(lasso_cv_mse)] best_lambda_ridge = lambdas[np.argmin(ridge_cv_mse)] # Bootstrap skeleton # For given polynomial degree, input X_train, z_train, X_test and z_test. # X_train and X_test should be scaled? n_bootstraps = 100 z_boot_model = np.zeros(len(z_test), n_bootstraps) for bootstrap_number in range(n_bootstraps): # For the number of data value points (len_z) in the training set, pick a random # data value (z_train[random]) and its corresponding row in the design matrix shuffle = np.random.randint(0, len(z_train), len(z_train)) X_boot, z_boot = X_train[shuffle], z_train[shuffle] betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:, i] = X_test @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance( z_test, z_boot_model) # Check MSE print("MSE = %.3f" % MSE(z, linear_regression.evaluate_poly_2D(x, y, beta, deg))) # And with noise print("Including standard normal noise scaled by {}, MSE = {:.3f}".format( noise_scale, MSE(z_noisy, linear_regression.evaluate_poly_2D(x, y, beta_noisy, deg)))) # Evaluate the Franke function & least-squares x = np.linspace(0, 1, 30) y = np.linspace(0, 1, 30) X, Y = np.meshgrid(x, y) z_analytic = FrankeFunction(X, Y) z_fit = linear_regression.evaluate_poly_2D(X, Y, beta, deg) z_fit_noisy = linear_regression.evaluate_poly_2D(X, Y, beta_noisy, deg) fig = plt.figure() # Plot the analytic curve ax = fig.add_subplot(1, 3, 1, projection="3d") ax.set_title("Franke Function") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_analytic, cmap=cm.coolwarm) # Plot the fitted curve ax = fig.add_subplot(1, 3, 2, projection="3d") ax.set_title("OLS") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_fit, cmap=cm.coolwarm) # Plot fitted curve, with noisy beta estimates ax = fig.add_subplot(1, 3, 3, projection="3d") ax.set_title("OLS with noise") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_fit_noisy, cmap=cm.coolwarm) plt.show() return
def terrain_analysis(): # Setting up the terrain data: terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif') x_terrain = np.arange(terrain_data.shape[1]) y_terrain = np.arange(terrain_data.shape[0]) X_coord, Y_coord = np.meshgrid(x_terrain, y_terrain) z_terrain = terrain_data.flatten() # the response values x_terrain_flat = X_coord.flatten() # the first degree feature variables y_terrain_flat = Y_coord.flatten() # the first degree feature variables max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3, 0, n_lambdas) subset_lambdas = lambdas[::5] #### Should select a subset in some manner of the terrain points #### Should probably also make the feature variables be float that range from [0,1] x = x_terrain_flat[::20] y = y_terrain_flat[::20] z = z_terrain[::20] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled[:, 0] = 1 # Probably should not have this. # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:, 0] = 1 # Probably actually not X_test_scaled[:, 0] = 1 # Have a bad feeling about how this might affect ridge/lasso. # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \ stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \ stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance subset_lambda_index += 1 ################ All necessary computations should have been done above. Below follows ################ the plotting part. return
def franke_analysis(): n = 1000 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3, 0, n_lambdas) subset_lambdas = lambdas[::5] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled[:, 0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:, 0] = 1 #maybe not for ridge+lasso X_test_scaled[:, 0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regressions bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \ stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \ stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance subset_lambda_index += 1
def deprecated_franke_analysis_full(): n = 1000 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale*np.random.normal(0,1,len(z)) max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3,0,n_lambdas) subset_lambdas = lambdas[::5] # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x,y,degree) X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = 0.2) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) X[:,0] = 1 # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:,0] = 1 X_test_scaled[:,0] = 1 # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regressions bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] z_boot_ols = np.zeros((len(z_test),n_bootstraps)) z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge z_boot_ols[:,i] = X_test_scaled @ betas_boot_ols ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols) # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) subset_lambda_index += 1
def terrain_analysis(): # Setting up the terrain data: terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif') x_terrain = np.arange(terrain_data.shape[1]) #apparently, from the problem description. y_terrain = np.arange(terrain_data.shape[0]) X_coord, Y_coord = np.meshgrid(x_terrain,y_terrain) z_terrain = terrain_data.flatten() # the response values x_terrain_flat = X_coord.flatten() # the first degree feature variables y_terrain_flat = Y_coord.flatten() # the first degree feature variables max_degree = 10 n_lambdas = 15 n_bootstraps = 20 k_folds = 5 lambdas = np.logspace(-3,0,n_lambdas) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) # Actual computations for degree in range(max_degree): X_terrain_design = linear_regression.design_matrix_2D(x_terrain_flat,y_terrain_flat,degree) X_train, X_test, z_train, z_test = train_test_split(X_terrain_design, z_terrain, test_size = 0.2) # Scaling and feeding to CV. z = z_terrain X = X_terrain_design scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) X[:,0] = 1 # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:,0] = 1 X_test_scaled[:,0] = 1 # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # OLS bootstap, get bootstrapped mse, bias and variance for given degree. z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) ols_boot_mse[degree] = mse ols_boot_bias[degree] = bias ols_boot_variance[degree] = variance # Ridge bootstrap, get bootstrapped mse, bias and variance for given degree and lambda lamb = best_ridge_lambda[degree] z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) ridge_best_lambda_boot_mse[degree] = mse ridge_best_lambda_boot_bias[degree] = bias ridge_best_lambda_boot_variance[degree] = variance # Lasso bootstrap, get bootstrapped mse, bias and variance for given degree and lambda. lamb = best_lasso_lambda[degree] z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot) z_boot_model[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda #z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) lasso_best_lambda_boot_mse[degree] = mse lasso_best_lambda_boot_bias[degree] = bias lasso_best_lambda_boot_variance[degree] = variance ################ All necessary computations should have been done above. Below follows ################ the plotting part. return