def franke_predictions(n=1000, noise_scale=0.2, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5, plot_grid_size=2000): """ For a given sample size n, noise_scale, max_degree and penalty parameters: produces ols, ridge and lasso predictions, as well as ground truth on a plotting meshgrid with input grid size. output: x_plot_mesh: meshgrid of x-coordinates y_plot_mesh: meshgrid of y-coordinates z_predict_ols: ols prediction of z on the meshgrid z_predict_ridge: ridge prediction of z on the meshgrid z_predict_lasso: lasso prediction of z on the meshgrid z_plot_franke: Actual Franke values on the meshgrid. """ x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale*np.random.normal(0,1,len(z)) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Scaling X = linear_regression.design_matrix_2D(x,y,degree) scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) #Setting up plotting grid x_plot = np.linspace(0,1,plot_grid_size) y_plot = np.linspace(0,1,plot_grid_size) x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot,y_plot) x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(), y_plot_mesh.flatten() X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,y_plot_mesh_flat,degree) X_plot_design_scaled = scaler.transform(X_plot_design) z_plot_franke = FrankeFunction(x_plot_mesh, y_plot_mesh) # OLS betas = linear_regression.OLS_SVD_2D(X_scaled, z) z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept z_predict_ols = z_predict_flat_ols.reshape(plot_grid_size,-1) # Ridge betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda) z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept z_predict_ridge = z_predict_flat_ridge.reshape(plot_grid_size,-1) # Lasso clf_Lasso = skl.Lasso(alpha=lasso_lambda,fit_intercept=False, max_iter=10000).fit(X_scaled,z) z_predict_flat_lasso = clf_Lasso.predict(X_plot_design_scaled) + z_intercept z_predict_lasso = z_predict_flat_lasso.reshape(plot_grid_size,-1) return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_plot_franke
def Ridge_unit_test(min_deg=2, max_deg=5, tol=1e-6): """ Tests our implementation of Ridge against sci-kit learn up to a given tolerance """ n = 100 # Number of data points # Prepare data set x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2 degrees = np.arange(min_deg, max_deg + 1) for deg in degrees: # Set up design matrix X = linear_regression.design_matrix_2D(x, y, 5) for lamb in np.linspace(0, 1, 10): # Compute optimal parameters using our homegrown Ridge regression beta = linear_regression.Ridge_2D(X=X, z=z, lamb=lamb) # Compute optimal parameters using sklearn skl_reg = Ridge(alpha=lamb, fit_intercept=False).fit(X, z) beta_skl = skl_reg.coef_ for i in range(len(beta)): if abs(beta[i] - beta_skl[i]) < tol: pass else: print( "Warning! mismatch with SKL in Ridge_2D_unit_test with tol = %.0e" % tol) print("Parameter no. %i for deg = %i" % (i, deg)) print("-> (OUR) beta = %8.12f" % beta[i]) print("-> (SKL) beta = %8.12f" % beta_skl[i]) return
def OLS_unit_test(min_deg=2, max_deg=5, tol=1e-6): n = 100 # Number of data points # Prepare data set x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2 degrees = np.arange(min_deg, max_deg + 1) for deg in degrees: # Set up design matrix X = linear_regression.design_matrix_2D(x, y, 5) # Compute optimal parameters using our homegrown OLS beta = linear_regression.OLS_2D(X=X, z=z) # Compute optimal parameters using sklearn skl_reg = LinearRegression(fit_intercept=False).fit(X, z) beta_skl = skl_reg.coef_ for i in range(len(beta)): if abs(beta[i] - beta_skl[i]) < tol: pass else: print( "Warning! mismatch with SKL in OLS_unit_test with tol = %.0e" % tol ) print("Parameter no. %i for deg = %i" % (i, deg)) print("-> (OUR) beta = %8.12f" % beta[i]) print("-> (SKL) beta = %8.12f" % beta_skl[i]) return
def terrain_analysis_plots( spacing=100, max_degree=20, n_lambdas=30, k_folds=5, n_bootstraps=50, do_boot=False, do_subset=False, ): # Setting up the terrain data: # Note structure! X-coordinates are on the rows of terrain_data # Point_selection.flatten() moves most rapidly over the x-coordinates # Meshgrids flattened also move most rapidly over the x-coordinates. Thus # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif") point_selection = terrain_data[:1801:spacing, :1801: spacing] # Make square and downsample x_terrain_selection = np.linspace(0, 1, point_selection.shape[1]) y_terrain_selection = np.linspace(0, 1, point_selection.shape[0]) X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection, y_terrain_selection) z_terrain_selection = point_selection.flatten() # the response values x_terrain_selection_flat = X_coord_selection.flatten( ) # the first degree feature variables y_terrain_selection_flat = Y_coord_selection.flatten( ) # the first degree feature variables lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x = x_terrain_selection_flat y = y_terrain_selection_flat z = z_terrain_selection x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering z_intercept = np.mean(z) z = z - z_intercept z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ##### Setup of problem is completede above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Probably should not have this. # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 # Probably actually not # X_test_scaled[:,0] = 1 # Have a bad feeling about how this might affect ridge/lasso. # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. plt.figure() plt.semilogy(ols_cv_mse, label="ols") plt.semilogy(best_ridge_mse, label="ridge") plt.semilogy(best_lasso_mse, label="lasso") plt.title( "CV MSE for OLS, Ridge and Lasso, with the best lambdas for each degree" ) plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambdas:") print(best_ridge_lambda) print("best lasso lambda") print(best_lasso_lambda) return
def terrain_predictions(spacing=40, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5): """For a given sampling spacing, degree and penalty parameters: produces ols, ridge and lasso predictions, as well as ground truth on a plotting meshgrid. output: x_plot_mesh: meshgrid of x-coordinates y_plot_mesh: meshgrid of y-coordinates z_predict_ols: ols prediction of z on the meshgrid z_predict_ridge: ridge prediction of z on the meshgrid z_predict_lasso: lasso prediction of z on the meshgrid z_true: Actual terrain values on the meshgrid. """ # #control variables, resticted to upper half of plot currently. # spacing = 10 # degree = 25 # ridge_lambda = 1e-2 # lasso_lambda = 1e-5 # Setting up the terrain data: # Note structure! X-coordinates are on the rows of terrain_data # Point_selection.flatten() moves most rapidly over the x-coordinates # Meshgrids flattened also move most rapidly over the x-coordinates. Thus # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif") point_selection = terrain_data[:1801:spacing, :1801: spacing] # Make quadratic and downsample x_terrain_selection = np.linspace(0, 1, point_selection.shape[1]) y_terrain_selection = np.linspace(0, 1, point_selection.shape[0]) X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection, y_terrain_selection) z_terrain_selection = point_selection.flatten() # the response values x_terrain_selection_flat = X_coord_selection.flatten( ) # the first degree feature variables y_terrain_selection_flat = Y_coord_selection.flatten( ) # the first degree feature variables x = x_terrain_selection_flat y = y_terrain_selection_flat z = z_terrain_selection # Centering z_intercept = np.mean(z) z = z - z_intercept # Scaling X = linear_regression.design_matrix_2D(x, y, degree) scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) x_plot = np.linspace(0, 1, 1801) y_plot = np.linspace(0, 1, 1801) x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot, y_plot) x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten( ), y_plot_mesh.flatten() X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat, y_plot_mesh_flat, degree) X_plot_design_scaled = scaler.transform(X_plot_design) # Ground truth z_true = terrain_data[:1801, :1801] # OLS betas = linear_regression.OLS_SVD_2D(X_scaled, z) z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept z_predict_ols = z_predict_flat_ols.reshape(-1, 1801) # Ridge betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda) z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept z_predict_ridge = z_predict_flat_ridge.reshape(-1, 1801) # Lasso clf_Lasso = skl.Lasso(alpha=lasso_lambda, fit_intercept=False).fit(X_scaled, z) z_predict_flat_lasso = clf_Lasso.predict( X_plot_design_scaled) + z_intercept z_predict_lasso = z_predict_flat_lasso.reshape(-1, 1801) return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_true
lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x,y,degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train)
def franke_analysis_plots( n=1000, noise_scale=0.2, max_degree=20, n_bootstraps=100, k_folds=5, n_lambdas=30, do_boot=True, do_subset=True, ): # Note that max_degrees is the number of degrees, i.e. including 0. # n = 500 # noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) # max_degree = 15 # n_lambdas = 30 # n_bootstraps = 100 # k_folds = 5 lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Centering the response z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ########### Setup of problem is completed above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 #maybe not for ridge+lasso # X_test_scaled[:,0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. # CV MSE for OLS: plt.figure() plt.semilogy(ols_cv_mse) plt.title("OLS CV MSE") plt.show() # Bootstrap for OLS: plt.figure() plt.semilogy(ols_boot_mse, label="mse") plt.semilogy(ols_boot_bias, label="bias") plt.semilogy(ols_boot_variance, label="variance") plt.title("OLS bias-variance-MSE by bootstrap") plt.legend() plt.show() # CV for Ridge, best+low+middle+high lambdas plt.figure() plt.semilogy(best_ridge_mse, label="best for each degree") plt.semilogy(ridge_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(ridge_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(ridge_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Ridge CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best ridge lambdas: plt.figure() plt.semilogy(ridge_best_lambda_boot_mse, label="mse") plt.semilogy(ridge_best_lambda_boot_bias, label="bias") plt.semilogy(ridge_best_lambda_boot_variance, label="variance") plt.title("Best ridge lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high ridge lambdas plt.figure() plt.semilogy(ridge_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high ridge lambdas") plt.legend() plt.show() # CV for lasso, best+low+middle+high lambdas plt.figure() plt.semilogy(best_lasso_mse, label="best lambda for each degree") plt.semilogy(lasso_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(lasso_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(lasso_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Lasso CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best lasso lambdas: plt.figure() plt.semilogy(lasso_best_lambda_boot_mse, label="mse") plt.semilogy(lasso_best_lambda_boot_bias, label="bias") plt.semilogy(lasso_best_lambda_boot_variance, label="variance") plt.title("Best lasso lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high lasso lambdas plt.figure() plt.semilogy(lasso_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high lasso lambdas") plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambda:") print(best_ridge_lambda) print("best lasso lambda:") print(best_lasso_lambda) return
def part_1a(): # Sample the franke function n times at randomly chosen points n = 100 deg = 5 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z_noisy = z + noise_scale * np.random.normal(0, 1, len(z)) # Making the design matrix X = linear_regression.design_matrix_2D(x, y, deg) # Find the least-squares solution beta = linear_regression.OLS_2D(X, z) beta_noisy = linear_regression.OLS_2D(X, z_noisy) # Split into training and test data with ratio 0.2 X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2) # Scale data according to sklearn, beware possible problems with intercept and std. scaler = StandardScaler() scaler.fit(X_train) X_train_scaled = scaler.transform(X_train) X_test_scaled = scaler.transform(X_test) # For ridge and lasso, lasso directly from sklearn. # For given polynomial degree, input X and z. X should be prescaled. n_lambdas = 100 lambdas = np.logspace(-3, 0, n_lambdas) k_folds = 5 ridge_fold_score = np.zeros(n_lambdas, k_folds) lasso_fold_score = np.zeros(n_lambdas, k_folds) test_list, train_list = k_fold_selection(z, k_folds) for i in range(n_lambdas): for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb).fit(X_train_cv, z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i, j] = stat_tools.MSE(z, z_ridge_test) lasso_fold_score[i, j] = stat_tools.MSE(z, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1, keepdims=True) ridge_cv_mse = np.mean(ridge_fold_score, axis=1, keepdims=True) best_lambda_lasso = lambdas[np.argmin(lasso_cv_mse)] best_lambda_ridge = lambdas[np.argmin(ridge_cv_mse)] # Bootstrap skeleton # For given polynomial degree, input X_train, z_train, X_test and z_test. # X_train and X_test should be scaled? n_bootstraps = 100 z_boot_model = np.zeros(len(z_test), n_bootstraps) for bootstrap_number in range(n_bootstraps): # For the number of data value points (len_z) in the training set, pick a random # data value (z_train[random]) and its corresponding row in the design matrix shuffle = np.random.randint(0, len(z_train), len(z_train)) X_boot, z_boot = X_train[shuffle], z_train[shuffle] betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:, i] = X_test @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance( z_test, z_boot_model) # Check MSE print("MSE = %.3f" % MSE(z, linear_regression.evaluate_poly_2D(x, y, beta, deg))) # And with noise print("Including standard normal noise scaled by {}, MSE = {:.3f}".format( noise_scale, MSE(z_noisy, linear_regression.evaluate_poly_2D(x, y, beta_noisy, deg)))) # Evaluate the Franke function & least-squares x = np.linspace(0, 1, 30) y = np.linspace(0, 1, 30) X, Y = np.meshgrid(x, y) z_analytic = FrankeFunction(X, Y) z_fit = linear_regression.evaluate_poly_2D(X, Y, beta, deg) z_fit_noisy = linear_regression.evaluate_poly_2D(X, Y, beta_noisy, deg) fig = plt.figure() # Plot the analytic curve ax = fig.add_subplot(1, 3, 1, projection="3d") ax.set_title("Franke Function") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_analytic, cmap=cm.coolwarm) # Plot the fitted curve ax = fig.add_subplot(1, 3, 2, projection="3d") ax.set_title("OLS") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_fit, cmap=cm.coolwarm) # Plot fitted curve, with noisy beta estimates ax = fig.add_subplot(1, 3, 3, projection="3d") ax.set_title("OLS with noise") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_fit_noisy, cmap=cm.coolwarm) plt.show() return
def terrain_analysis(): # Setting up the terrain data: terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif') x_terrain = np.arange(terrain_data.shape[1]) y_terrain = np.arange(terrain_data.shape[0]) X_coord, Y_coord = np.meshgrid(x_terrain, y_terrain) z_terrain = terrain_data.flatten() # the response values x_terrain_flat = X_coord.flatten() # the first degree feature variables y_terrain_flat = Y_coord.flatten() # the first degree feature variables max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3, 0, n_lambdas) subset_lambdas = lambdas[::5] #### Should select a subset in some manner of the terrain points #### Should probably also make the feature variables be float that range from [0,1] x = x_terrain_flat[::20] y = y_terrain_flat[::20] z = z_terrain[::20] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled[:, 0] = 1 # Probably should not have this. # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:, 0] = 1 # Probably actually not X_test_scaled[:, 0] = 1 # Have a bad feeling about how this might affect ridge/lasso. # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \ stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \ stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance subset_lambda_index += 1 ################ All necessary computations should have been done above. Below follows ################ the plotting part. return
def franke_analysis(): n = 1000 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3, 0, n_lambdas) subset_lambdas = lambdas[::5] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled[:, 0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:, 0] = 1 #maybe not for ridge+lasso X_test_scaled[:, 0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regressions bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \ stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \ stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance subset_lambda_index += 1
def deprecated_franke_analysis_full(): n = 1000 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale*np.random.normal(0,1,len(z)) max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3,0,n_lambdas) subset_lambdas = lambdas[::5] # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x,y,degree) X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = 0.2) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) X[:,0] = 1 # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:,0] = 1 X_test_scaled[:,0] = 1 # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regressions bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] z_boot_ols = np.zeros((len(z_test),n_bootstraps)) z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge z_boot_ols[:,i] = X_test_scaled @ betas_boot_ols ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols) # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) subset_lambda_index += 1
def terrain_analysis(): # Setting up the terrain data: terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif') x_terrain = np.arange(terrain_data.shape[1]) #apparently, from the problem description. y_terrain = np.arange(terrain_data.shape[0]) X_coord, Y_coord = np.meshgrid(x_terrain,y_terrain) z_terrain = terrain_data.flatten() # the response values x_terrain_flat = X_coord.flatten() # the first degree feature variables y_terrain_flat = Y_coord.flatten() # the first degree feature variables max_degree = 10 n_lambdas = 15 n_bootstraps = 20 k_folds = 5 lambdas = np.logspace(-3,0,n_lambdas) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) # Actual computations for degree in range(max_degree): X_terrain_design = linear_regression.design_matrix_2D(x_terrain_flat,y_terrain_flat,degree) X_train, X_test, z_train, z_test = train_test_split(X_terrain_design, z_terrain, test_size = 0.2) # Scaling and feeding to CV. z = z_terrain X = X_terrain_design scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) X[:,0] = 1 # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:,0] = 1 X_test_scaled[:,0] = 1 # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # OLS bootstap, get bootstrapped mse, bias and variance for given degree. z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) ols_boot_mse[degree] = mse ols_boot_bias[degree] = bias ols_boot_variance[degree] = variance # Ridge bootstrap, get bootstrapped mse, bias and variance for given degree and lambda lamb = best_ridge_lambda[degree] z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) ridge_best_lambda_boot_mse[degree] = mse ridge_best_lambda_boot_bias[degree] = bias ridge_best_lambda_boot_variance[degree] = variance # Lasso bootstrap, get bootstrapped mse, bias and variance for given degree and lambda. lamb = best_lasso_lambda[degree] z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot) z_boot_model[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda #z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) lasso_best_lambda_boot_mse[degree] = mse lasso_best_lambda_boot_bias[degree] = bias lasso_best_lambda_boot_variance[degree] = variance ################ All necessary computations should have been done above. Below follows ################ the plotting part. return
return w if __name__ == "__main__": """ Some very early testing """ np.random.seed(123) x = np.random.uniform(0, 1, 500) y = np.random.uniform(0, 1, 500) z = FrankeFunction(x, y) deg = 2 X = linear_regression.design_matrix_2D(x, y, deg) N_predictors = int((deg + 1) * (deg + 2) / 2) w_init = np.random.randn(N_predictors) w_SGD_OLS = SGD( X, z, M=250, init_w=w_init, n_epochs=100, learning_rate=0.01, cost_gradient=CostFunctions.OLS_cost_gradient, ) w_SGDM_OLS = SGDM( X, z,