def CV_fit(X, z, k, f=None, alpha=0, method='OLS'): #f is the exact function OLS = method == 'OLS' Ridge = method == 'Ridge' Lasso = method == 'Lasso' if f is None: f = z kf = oh.k_fold(k) kf.get_n_splits(X) beta = np.zeros((k, X.shape[1])) errors = np.zeros(k) betasSigma = np.zeros(beta.shape) i = 0 for train_index, test_index in kf.split(): #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_validation = X[train_index], X[test_index] z_train, z_validation = z[train_index], z[test_index] f_train, f_validation = f[train_index], f[test_index] if OLS: beta[i, :] = oh.linFit(X_train, z_train, model='OLS', _lambda=0) #zPredictsOLS[:,i] = (X_test @ betaOLS).reshape(-1) # Used validation to get good results elif Ridge: beta[i, :] = oh.linFit(X_train, z_train, model='Ridge', _lambda=alpha) #zPredictsRidge[:,i] = (X_test @ betaRidge).reshape(-1) # Used validation to get good results elif Lasso: clf = skl.Lasso(alpha=alpha, fit_intercept=False, max_iter=10**8, precompute=True).fit(X_train, z_train) beta[i, :] = clf.coef_ else: raise Exception( 'method has to be Lasso, OLS or Ridge, not {}'.format(method)) zPredicts = (X_validation @ beta[i, :]) errors[i] = np.mean((f_validation - zPredicts)**2) if OLS: sigmaOLSSq = 1 / (X_validation.shape[0] - 0 * X_validation.shape[1]) * np.sum( (z_validation - zPredicts)**2) sigmaBetaOLSSq = sigmaOLSSq * np.diag( np.linalg.pinv(X_validation.T @ X_validation)) betasSigma[i, :] = np.sqrt(sigmaBetaOLSSq) elif Ridge: XInvRidge = np.linalg.pinv(X_validation.T @ X_validation + alpha * np.eye(len(beta[i, :]))) sigmaRidgeSq = 1 / (X_validation.shape[0] - 0 * X_validation.shape[1]) * np.sum( (z_validation - zPredicts)**2) sigmaBetaRidgeSq = sigmaRidgeSq * np.diag( XInvRidge @ X_validation.T @ X_validation @ XInvRidge.T) betasSigma[i, :] = np.sqrt(sigmaBetaRidgeSq) elif Lasso: pass i += 1 return beta, errors, betasSigma
def fit_terrain_data(terrain1, degree=15, reg_type='OLS', k=5, alpha=0, x_splits=4, y_splits=8, plot_every_area=False): OLS = reg_type == 'OLS' Ridge = reg_type == 'Ridge' Lasso = reg_type == 'Lasso' ny, nx = terrain1.shape mx = int(nx / x_splits) my = int(ny / y_splits) terrain1 = terrain1[:my * y_splits, :mx * x_splits] z_final_predict = np.zeros(terrain1.shape) area_error = np.zeros(y_splits * x_splits) for i_x in range(x_splits): for j_y in range(y_splits): #print( 'areas left=', x_splits*y_splits - (j_y + (i_x)*y_splits)) terrain = terrain1[my * j_y:my * (j_y + 1), mx * i_x:mx * (i_x + 1)] Nx = terrain.shape[0] Ny = terrain.shape[1] x = np.zeros((mx, my)) y = np.zeros((mx, my)) x_line = np.linspace(0, 1, mx) y_line = np.linspace(0, 1, my) for i in range(mx): for j in range(my): x[i, j] = x_line[i] y[i, j] = y_line[j] x = x.flatten() y = y.flatten() z = terrain.flatten() xy = np.c_[x, y] X_plot = create_X(x, y, degree) X = create_X(x, y, degree) kf = oh.k_fold(k) X_rest, X_test, z_rest, z_test = train_test_split( X, z, test_size=int(z.shape[0] / k), shuffle=True) betas, errors, betaSigma = CV_fit(X_rest, z_rest, k, alpha=0, method=reg_type) best_i = np.argmin(errors) beta = betas[best_i, :] area_error[j_y + (i_x) * y_splits] = np.mean(errors, axis=0) z_plot1 = X_plot.dot(beta).reshape(terrain.shape) z_final_predict[my * j_y:my * (j_y + 1), mx * i_x:mx * (i_x + 1)] = z_plot1[:, :] if plot_every_area: plt.figure() plt.title( 'Terrain over Norway, area{}'.format(j_y + (i_x) * y_splits)) plt.imshow(terrain, cmap='gray') plt.xlabel('X') plt.ylabel('Y') plt.figure() plt.title( 'Terrain over Norway prediction, area{}'.format(j_y + (i_x) * y_splits)) plt.imshow(z_plot1, cmap='gray') plt.xlabel('X') plt.ylabel('Y') plt.show() _min = -2 _max = 2.5 fig1 = plt.figure() plt.imshow(z_final_predict, cmap='gray', vmin=_min, vmax=_max) plt.xlabel('X') plt.ylabel('Y') fig2 = plt.figure() plt.imshow(terrain1, cmap='gray', vmin=_min, vmax=_max) plt.title('Terrain') plt.xlabel('X') plt.ylabel('Y') #plt.show() mse = np.mean((z_final_predict - terrain1)**2) print('mse over all points=', mse) print('area error', np.mean(area_error)) print('R2 score', oh.R2_score(z_final_predict, terrain1)) return fig1, fig2
lamErrOLS = np.zeros(lambdas.shape[0]) lamErrRidge = np.zeros(lambdas.shape[0]) lamErrLasso = np.zeros(lambdas.shape[0]) numBetas = X_rest.shape[1] betasOLS = np.empty((numLambdas, numBetas)) betasRidge = np.empty((numLambdas, numBetas)) betasLasso = np.empty((numLambdas, numBetas)) betasSigmaOLS = np.empty((numLambdas, numBetas)) betasSigmaRidge = np.empty((numLambdas, numBetas)) betasSigmaLasso = np.empty((numLambdas, numBetas)) kf = oh.k_fold(n_splits=k, shuffle=True) kf.get_n_splits(X_rest) for nlam, _lambda in enumerate(lambdas): print(_lambda) ######### KFold! ############# errorsOLS = np.empty(k) zPredictsOLS = np.empty((int(z.shape[0] / k))) betasOLSTemp = np.empty((k, numBetas)) betasSigmaOLSTemp = np.empty((k, numBetas)) errorsRidge = np.empty(k) zPredictsRidge = np.empty((int(z.shape[0] / k))) betasRidgeTemp = np.empty((k, numBetas)) betasSigmaRidgeTemp = np.empty((k, numBetas))