def CV_fit(X, z, k, f=None, alpha=0, method='OLS'): #f is the exact function OLS = method == 'OLS' Ridge = method == 'Ridge' Lasso = method == 'Lasso' if f is None: f = z kf = oh.k_fold(k) kf.get_n_splits(X) beta = np.zeros((k, X.shape[1])) errors = np.zeros(k) betasSigma = np.zeros(beta.shape) i = 0 for train_index, test_index in kf.split(): #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_validation = X[train_index], X[test_index] z_train, z_validation = z[train_index], z[test_index] f_train, f_validation = f[train_index], f[test_index] if OLS: beta[i, :] = oh.linFit(X_train, z_train, model='OLS', _lambda=0) #zPredictsOLS[:,i] = (X_test @ betaOLS).reshape(-1) # Used validation to get good results elif Ridge: beta[i, :] = oh.linFit(X_train, z_train, model='Ridge', _lambda=alpha) #zPredictsRidge[:,i] = (X_test @ betaRidge).reshape(-1) # Used validation to get good results elif Lasso: clf = skl.Lasso(alpha=alpha, fit_intercept=False, max_iter=10**8, precompute=True).fit(X_train, z_train) beta[i, :] = clf.coef_ else: raise Exception( 'method has to be Lasso, OLS or Ridge, not {}'.format(method)) zPredicts = (X_validation @ beta[i, :]) errors[i] = np.mean((f_validation - zPredicts)**2) if OLS: sigmaOLSSq = 1 / (X_validation.shape[0] - 0 * X_validation.shape[1]) * np.sum( (z_validation - zPredicts)**2) sigmaBetaOLSSq = sigmaOLSSq * np.diag( np.linalg.pinv(X_validation.T @ X_validation)) betasSigma[i, :] = np.sqrt(sigmaBetaOLSSq) elif Ridge: XInvRidge = np.linalg.pinv(X_validation.T @ X_validation + alpha * np.eye(len(beta[i, :]))) sigmaRidgeSq = 1 / (X_validation.shape[0] - 0 * X_validation.shape[1]) * np.sum( (z_validation - zPredicts)**2) sigmaBetaRidgeSq = sigmaRidgeSq * np.diag( XInvRidge @ X_validation.T @ X_validation @ XInvRidge.T) betasSigma[i, :] = np.sqrt(sigmaBetaRidgeSq) elif Lasso: pass i += 1 return beta, errors, betasSigma
betasLassoTemp = np.empty((k, numBetas)) betasSigmaLassoTemp = np.empty((k, numBetas)) zTests = np.empty((int(z.shape[0] / k))) i = 0 for train_index, test_index in kf.split(): X_train, X_validation = X_rest[train_index], X_rest[test_index] x_train, x_validation = x_rest[train_index], x_rest[test_index] y_train, y_validation = y_rest[train_index], y_rest[test_index] z_train, z_validation = z_rest[train_index], z_rest[test_index] f_train, f_validation = f_rest[train_index], f_rest[test_index] # OLS, Finding the best lambda betaOLS = oh.linFit(X_train, z_train, model='OLS', _lambda=_lambda) betasOLSTemp[i] = betaOLS.reshape(-1) zPredictsOLS = (X_validation @ betaOLS) errorsOLS[i] = np.mean((f_validation - zPredictsOLS)**2) sigmaOLSSq = 1 / (X_validation.shape[0] - 0 * X_validation.shape[1]) * np.sum( (z_validation - zPredictsOLS)**2) sigmaBetaOLSSq = sigmaOLSSq * np.diag( np.linalg.pinv(X_validation.T @ X_validation)) betasSigmaOLSTemp[i] = np.sqrt(sigmaBetaOLSSq) # Ridge, Finding the best lambda betaRidge = oh.linFit(X_train, z_train, model='Ridge',
n = 1000 x_ = np.random.rand(n) y_ = np.random.rand(n) z = oh.frankeFunction(x_, y_) + 0.1 * np.random.randn(n) # Set up the design matrix MSE = [] R2_score = [] for grad in range(1, 6): X = oh.create_X(x_, y_, grad) invXTX = np.linalg.inv(X.T @ X) # Need this anyway #beta = invXTX @ X.T @ z beta = oh.linFit(X, z) ztilde = X @ beta MSE.append(oh.mse(z, ztilde)) R2_score.append(oh.R2_score(z, ztilde)) #sigma = np.sqrt(np.var(ztilde)) #print("Sigma numpy: ", sigma) sigma = np.sqrt(1 / (X.shape[0] - X.shape[1] - 1) * np.sum( (z - ztilde)**2)) #print("Sigma self: ", sigma) betaSigma = np.zeros(len(beta)) relative = betaSigma betaConf = np.zeros((len(beta), 2)) for i in range(len(beta)): #betaSigma[i] = sigma * np.sqrt(np.sqrt(invXTX[i][i]))
z2 = z.reshape(2, 2) assert oh.bias(z, z) + oh.var(z, z) == oh.mse(z, z) assert oh.bias(z, z + 1) + oh.var(z, z + 1) == oh.mse(z, z + 1) print("The function bias(z,ztilde) works as advertised") # Test R2 score print("Testing the R2 score") z = np.arange(4) + 1 assert oh.R2_score(z, z) == 1 print("Testing a matrix") z2 = z.reshape(2, 2) assert oh.R2_score(z, z) == 1 print("The function R2_score(z,ztilde) works as advertised") print("Testing linear Regression functions") x = np.random.randn(11) y = np.random.randn(11) X = oh.create_X(x, y, 2) np.set_printoptions(precision=2) print(X) z = x + y zTildeOLS = X @ oh.linFit(X, z, model='OLS') assert oh.mse(z, zTildeOLS) < 10**(-28) print("OLS works as advertised") zTildeRidge = X @ oh.linFit(X, z, model='Ridge', _lambda=0.1) assert oh.mse(z, zTildeOLS) < 10**(-28) print("Ridge works as advertised") zTildeLasso = X @ oh.linFit(X, z, model='Lasso', _lambda=0.1) assert oh.mse(z, zTildeOLS) < 10**(-28) print("Ridge works as advertised")