def cv_analysis(x, y, z, degrees, k, regression=linear_regression.OLS_SVD, lambd=None): scaler = StandardScaler() MSE_test = np.empty(degrees.size) for i, deg in enumerate(degrees): X = linear_regression.design_matrix(x, y, deg) # Normalize the design matrix X = scaler.fit_transform(X) X[:, 0] = np.ones(X.shape[0]) if regression != "LASSO": MSE_test[i] = cross_validation.cross_validation( X, z, k_folds=k, regression=regression) else: MSE_test[i] = cross_validation.cross_validation_lasso(X, z, k_folds=k, lambd=lambd) return MSE_test
def OLS_unit_test(min_deg=0, max_deg=15, tol=1e-6): n = 100 # Number of data points # Prepare data set x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2 degrees = np.arange(min_deg, max_deg + 1) for deg in degrees: # Set up design matrix X = linear_regression.design_matrix(x, y, 5) # Compute optimal parameters using our homegrown OLS beta = linear_regression.OLS(X=X, z=z) # Compute optimal parameters using sklearn skl_reg = LinearRegression(fit_intercept=False).fit(X, z) beta_skl = skl_reg.coef_ for i in range(len(beta)): if abs(beta[i] - beta_skl[i]) < tol: pass else: print("Warning! mismatch with SKL in OLS_unit_test with tol = %.0e" % tol) print("Parameter no. %i for deg = %i" % (i, deg)) print("-> (OUR) beta = %8.12f" % beta[i]) print("-> (SKL) beta = %8.12f" % beta_skl[i]) return
def bootstrap_analysis(x, y, z, degrees, N_bootstraps, regression=linear_regression.OLS_SVD, lambd=None): scaler = StandardScaler() MSE_test = np.empty(degrees.size) bias2_test = np.empty(degrees.size) variance_test = np.empty(degrees.size) for i, deg in enumerate(degrees): X = linear_regression.design_matrix(x, y, deg) # Split data, but don't shuffle. OK since data is already randomly sampled! # Facilitates a direct comparison of the clean & Noisy data X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2, shuffle=False) # Normalize data sets X_train = scaler.fit_transform(X_train) X_train[:, 0] = np.ones(X_train.shape[0]) X_test = scaler.fit_transform(X_test) X_test[:, 0] = np.ones(X_test.shape[0]) # Lasso requires special treatment due to difference between SKL's way of doing things # And our codes if regression != "LASSO": MSE_test[i], bias2_test[i], variance_test[i] = bootstrap.bootstrap( X_train, X_test, z_train, z_test, bootstraps=N_bootstraps, regression=regression) else: MSE_test[i], bias2_test[i], variance_test[ i] = bootstrap.bootstrap_lasso(X_train, X_test, z_train, z_test, bootstraps=N_bootstraps, lambd=lambd) return MSE_test, bias2_test, variance_test