def Ridge_unit_test(min_deg=2, max_deg=5, tol=1e-6): """ Tests our implementation of Ridge against sci-kit learn up to a given tolerance """ n = 100 # Number of data points # Prepare data set x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2 degrees = np.arange(min_deg, max_deg + 1) for deg in degrees: # Set up design matrix X = linear_regression.design_matrix_2D(x, y, 5) for lamb in np.linspace(0, 1, 10): # Compute optimal parameters using our homegrown Ridge regression beta = linear_regression.Ridge_2D(X=X, z=z, lamb=lamb) # Compute optimal parameters using sklearn skl_reg = Ridge(alpha=lamb, fit_intercept=False).fit(X, z) beta_skl = skl_reg.coef_ for i in range(len(beta)): if abs(beta[i] - beta_skl[i]) < tol: pass else: print( "Warning! mismatch with SKL in Ridge_2D_unit_test with tol = %.0e" % tol) print("Parameter no. %i for deg = %i" % (i, deg)) print("-> (OUR) beta = %8.12f" % beta[i]) print("-> (SKL) beta = %8.12f" % beta_skl[i]) return
def OLS_unit_test(min_deg=0, max_deg=15, tol=1e-6): n = 100 # Number of data points # Prepare data set x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2 degrees = np.arange(min_deg, max_deg + 1) for deg in degrees: # Set up design matrix X = linear_regression.design_matrix(x, y, 5) # Compute optimal parameters using our homegrown OLS beta = linear_regression.OLS(X=X, z=z) # Compute optimal parameters using sklearn skl_reg = LinearRegression(fit_intercept=False).fit(X, z) beta_skl = skl_reg.coef_ for i in range(len(beta)): if abs(beta[i] - beta_skl[i]) < tol: pass else: print("Warning! mismatch with SKL in OLS_unit_test with tol = %.0e" % tol) print("Parameter no. %i for deg = %i" % (i, deg)) print("-> (OUR) beta = %8.12f" % beta[i]) print("-> (SKL) beta = %8.12f" % beta_skl[i]) return
sys.path.insert(0, "../") import linear_regression import utils import stat_tools import crossvalidation import bootstrap from FrankeFunction import FrankeFunction utils.plot_settings() # LaTeX fonts in Plots! n = 500 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) max_degree = 15 n_lambdas = 30 n_bootstraps = 100 k_folds = 5 lambdas = np.logspace(-5, 0, n_lambdas) subset_lambdas = lambdas[::60] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering the response z_intercept = np.mean(z) z = z - z_intercept
def franke_analysis_plots( n=1000, noise_scale=0.2, max_degree=20, n_bootstraps=100, k_folds=5, n_lambdas=30, do_boot=True, do_subset=True, ): # Note that max_degrees is the number of degrees, i.e. including 0. # n = 500 # noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) # max_degree = 15 # n_lambdas = 30 # n_bootstraps = 100 # k_folds = 5 lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Centering the response z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ########### Setup of problem is completed above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 #maybe not for ridge+lasso # X_test_scaled[:,0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. # CV MSE for OLS: plt.figure() plt.semilogy(ols_cv_mse) plt.title("OLS CV MSE") plt.show() # Bootstrap for OLS: plt.figure() plt.semilogy(ols_boot_mse, label="mse") plt.semilogy(ols_boot_bias, label="bias") plt.semilogy(ols_boot_variance, label="variance") plt.title("OLS bias-variance-MSE by bootstrap") plt.legend() plt.show() # CV for Ridge, best+low+middle+high lambdas plt.figure() plt.semilogy(best_ridge_mse, label="best for each degree") plt.semilogy(ridge_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(ridge_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(ridge_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Ridge CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best ridge lambdas: plt.figure() plt.semilogy(ridge_best_lambda_boot_mse, label="mse") plt.semilogy(ridge_best_lambda_boot_bias, label="bias") plt.semilogy(ridge_best_lambda_boot_variance, label="variance") plt.title("Best ridge lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high ridge lambdas plt.figure() plt.semilogy(ridge_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high ridge lambdas") plt.legend() plt.show() # CV for lasso, best+low+middle+high lambdas plt.figure() plt.semilogy(best_lasso_mse, label="best lambda for each degree") plt.semilogy(lasso_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(lasso_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(lasso_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Lasso CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best lasso lambdas: plt.figure() plt.semilogy(lasso_best_lambda_boot_mse, label="mse") plt.semilogy(lasso_best_lambda_boot_bias, label="bias") plt.semilogy(lasso_best_lambda_boot_variance, label="variance") plt.title("Best lasso lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high lasso lambdas plt.figure() plt.semilogy(lasso_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high lasso lambdas") plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambda:") print(best_ridge_lambda) print("best lasso lambda:") print(best_lasso_lambda) return
from sklearn.linear_model import Lasso from sklearn.utils import resample from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.metrics import mean_squared_error from sklearn.preprocessing import StandardScaler np.random.seed(16091995) n_datapoints = 1000 bootstraps = 100 x = np.random.rand(n_datapoints) y = np.random.rand(n_datapoints) z = FrankeFunction(x, y) + 0.05*np.random.normal(0, 1, n_datapoints) p_min = 20 p_max = 33 polynomial_degrees = np.arange(p_min, p_max + 1, 1) lambdas = np.logspace(-20, -6, 10) x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size = 0.2) scaler = StandardScaler()
def franke_predictions(n=1000, noise_scale=0.2, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5, plot_grid_size=2000): """For a given sample size n, noise_scale, max_degree and penalty parameters: produces ols, ridge and lasso predictions, as well as ground truth on a plotting meshgrid with input grid size. output: x_plot_mesh: meshgrid of x-coordinates y_plot_mesh: meshgrid of y-coordinates z_predict_ols: ols prediction of z on the meshgrid z_predict_ridge: ridge prediction of z on the meshgrid z_predict_lasso: lasso prediction of z on the meshgrid z_plot_franke: Actual Franke values on the meshgrid. """ np.random.seed(2018) x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Scaling X = linear_regression.design_matrix_2D(x, y, degree) scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled = X_scaled[:, 1:] # Setting up plotting grid x_plot = np.linspace(0, 1, plot_grid_size) y_plot = np.linspace(0, 1, plot_grid_size) x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot, y_plot) x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten( ), y_plot_mesh.flatten() X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat, y_plot_mesh_flat, degree) X_plot_design_scaled = scaler.transform(X_plot_design) X_plot_design_scaled = X_plot_design_scaled[:, 1:] z_plot_franke = FrankeFunction(x_plot_mesh, y_plot_mesh) # OLS betas = linear_regression.OLS_SVD_2D(X_scaled, z) z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept z_predict_ols = z_predict_flat_ols.reshape(plot_grid_size, -1) # Ridge betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda) z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept z_predict_ridge = z_predict_flat_ridge.reshape(plot_grid_size, -1) # Lasso clf_Lasso = skl.Lasso(alpha=lasso_lambda, fit_intercept=False, max_iter=10000).fit(X_scaled, z) z_predict_flat_lasso = clf_Lasso.predict( X_plot_design_scaled) + z_intercept z_predict_lasso = z_predict_flat_lasso.reshape(plot_grid_size, -1) return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_plot_franke