def bootstrap(x, y, max_deg, boots_num): np.random.seed(130) """ applies the bootstrap algorithm args: x,y (np.array): initial datapoints max_deg (int): boots_num (int): number of bootstraps """ x, y = np.meshgrid(x, y) z = np.ravel( f.FrankeFunction(x, y) + 0.5 * np.random.randn(np.shape(x)[0], np.shape(y)[1])) MSE_degree_values = np.zeros(max_deg) MSE_test_degree_values = np.zeros(max_deg) MSE_train_values = np.zeros(boots_num) MSE_test_values = np.zeros(boots_num) for k, deg in enumerate(range(1, max_deg)): #Degrees loop that contains K-fold X_design = f.X_make(x, y, deg) scaler = StandardScaler() X_tr, X_te, z_tr, z_te = train_test_split(X_design, z, test_size=0.2) scaler.fit(X_tr) X_train = scaler.transform(X_tr) X_test = scaler.transform(X_te) #doing this AFTER train test split. otherwise the test data #gets affected by the train data z_bootstrap = np.empty(int(len(z_tr))) index_array = np.arange(0, len(z_tr), 1) for i in range(boots_num): indx = resample(index_array, random_state=0) z_bootstrap = z_tr[indx] z_test = X_test.dot(f.OLS(X_train[indx, :], z_bootstrap)) z_train = X_train.dot(f.OLS(X_train[indx, :], z_bootstrap)) MSE_train_values[i] = f.MSE(z_tr, z_train) MSE_test_values[i] = f.MSE(z_te, z_test) MSE_degree_values[k] = np.sum(MSE_train_values) / boots_num MSE_test_degree_values[k] = np.sum(MSE_test_values) / boots_num return MSE_degree_values, MSE_test_degree_values
def sgdm(m, degrees, n_epochs, b, eta, noise=0, gamma = 0): #stocastic gradient decent with momentum np.random.seed(1337) x = np.random.rand(m,degrees) #+1? y = np.random.rand(m,degrees) #+1? X_mesh, Y_mesh = np.meshgrid(x, y) z = f.FrankeFunction(X_mesh, Y_mesh) + noise*np.random.randn(X_mesh.shape[0], Y_mesh.shape[0]) z= np.ravel(z) X = f.X_make(X_mesh,Y_mesh, degrees) #SPLIT AND SCALE X_tr, X_te, z_tr, z_te = train_test_split(X,z, test_size=0.3) scaler = StandardScaler() # X_tr = scaler.fit(X_tr).transform(X_tr) # z_tr = scaler.transform(z_tr.reshape(-1,1)) # z_te = scaler.fit(z_te).transform(z_te) # removes the mean and scales each feature/variable to unit variance scaler.fit(X_tr) # compute the mean and std to be used for later scaling X_tr= scaler.transform(X_tr) # perform standardization by centering and scaling X_te = scaler.transform(X_te) # fit to data, then transform it z_tr = z_tr.reshape(-1,1) z_te = z_te.reshape(-1,1) scaler.fit(z_tr) z_tr = scaler.transform(z_tr) z_te = scaler.transform(z_te) l = int((degrees+1)*(degrees+2)/2) #length of design matrix row beta = np.random.randn(l,1) #length of a design matrix row #b = int(m/batch_num) #batch size batch_num = int(m/b) if m%batch_num: print('warning; batch number and dataset not compatible') v = 0 mse_eval = np.zeros(n_epochs) index_array = np.arange(m) #indexes of rows batch_array= np.arange(batch_num) batch_array *=b for epoch in range(n_epochs): np.random.shuffle(index_array) for i in range(batch_num): #m is number of batches xi = X_tr[index_array[batch_array[i]]: (index_array[(batch_array[i]+1)])] zi = z_tr[index_array[batch_array[i]]: (index_array[(batch_array[i]+1)])] gradients = 2/b * xi.T @ (xi @ beta - zi.reshape(-1,1)) #derived from cost function #eta = 0.001#learning_rate(epoch*m+i) v = gamma*v + eta*gradients beta = beta - v z_eval = X_te.dot(beta) mse_eval[epoch] = f.MSE(z_te, z_eval) beta_ols = f.OLS(X_tr, z_tr) z_ols = X_te.dot(beta_ols) mse_beta = f.MSE(z_te, z_ols) return beta, mse_eval, mse_beta
def cross_validation(n, maxdegree, noise, n_folds, method=f.OLS, seed=130, lmbda=0, datatype='Franke', filename='SRTM_data_Minneapolis'): """ cross_validation Input: n - number of datapoints before meshgrid maxdegree - max degree to iterate over noise - amount of noise n_folds - number of folds in cross validation method - regression method (OLS, Ridge, Lasso) seed - seed to random number generator lmbda - lambda value to use in Ridge and Lasso datatype - datatype to fit (Franke, Terrain) filename - file with terrain data Output: polydegree - array with model complexity MSE_mean - array with mean MSE from each cross validation MSE_best - MSE for the best fit R2Score_skl - array with R2Score for Scikit Learn cross validation R2Score_mean - array with mean R2Score from each cross validation """ if n % n_folds != 0: raise Exception("Can't divide data set in n_folds equally sized folds") polydegree = np.zeros(maxdegree) MSE_mean = np.zeros(maxdegree) MSE_mean_sklearn = np.zeros(maxdegree) R2Score_mean = np.zeros(maxdegree) R2Score_skl = np.zeros(maxdegree) # Make data np.random.seed(int(seed)) if datatype == 'Franke': x_train, x_test, y_train, y_test, z_train, z_test = f.FrankeData( n, noise, test_size=0.3) elif datatype == 'Terrain': x_train, x_test, y_train, y_test, z_train, z_test = f.TerrainData( n, filename) for degree in range(0, maxdegree): polydegree[degree] = degree # Create design matrix X_train = f.design_matrix(x_train, y_train, degree) # Shuffle data to get random folds index = np.arange(0, np.shape(X_train)[0], 1) np.random.seed(int(seed)) np.random.shuffle(index) X_train_random = X_train[index, :] z_train_random = z_train[index] # Split data in n_folds folds X_folds = np.array(np.array_split(X_train_random, n_folds)) z_folds = np.array(np.array_split(z_train_random, n_folds)) if method == f.OLS: clf = skl.LinearRegression() scores = cross_val_score(clf, X_train, z_train, cv=n_folds, scoring='neg_mean_squared_error') MSE_mean_sklearn[degree] = np.abs(np.mean(scores)) best_degree_sklearn = np.argmin(MSE_mean_sklearn) # Make fit to holy test data X_train_best = f.design_matrix(x_train, y_train, best_degree_sklearn) scaler = StandardScaler() scaler.fit(X_train_best) X_train_best_scaled = scaler.transform(X_train_best) X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn) X_test_best_scaled = scaler.transform(X_test_best) X_train_best_scaled[:, 0] = 1 X_test_best_scaled[:, 0] = 1 scaler.fit(z_train.reshape(-1, 1)) z_train_scaled = scaler.transform(z_train.reshape(-1, 1)) z_test_scaled = scaler.transform(z_test.reshape(-1, 1)) beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled) elif method == f.Ridge: clf = skl.Ridge() scores = cross_val_score(clf, X_train, z_train, cv=n_folds, scoring='neg_mean_squared_error') MSE_mean_sklearn[degree] = np.abs(np.mean(scores)) best_degree_sklearn = np.argmin(MSE_mean_sklearn) # Make fit to holy test data X_train_best = f.design_matrix(x_train, y_train, best_degree_sklearn) scaler = StandardScaler() scaler.fit(X_train_best) X_train_best_scaled = scaler.transform(X_train_best) X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn) X_test_best_scaled = scaler.transform(X_test_best) X_train_best_scaled[:, 0] = 1 X_test_best_scaled[:, 0] = 1 scaler.fit(z_train.reshape(-1, 1)) z_train_scaled = scaler.transform(z_train.reshape(-1, 1)) z_test_scaled = scaler.transform(z_test.reshape(-1, 1)) beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled) elif method == 'Lasso': clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False) scores = cross_val_score(clf_lasso, X_train, z_train, cv=n_folds, scoring='neg_mean_squared_error') MSE_mean_sklearn[degree] = np.abs(np.mean(scores)) best_degree_sklearn = np.argmin(MSE_mean_sklearn) # Make fit to holy test data X_train_best = f.design_matrix(x_train, y_train, best_degree_sklearn) scaler = StandardScaler() scaler.fit(X_train_best) X_train_best_scaled = scaler.transform(X_train_best) X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn) X_test_best_scaled = scaler.transform(X_test_best) X_train_best_scaled[:, 0] = 1 X_test_best_scaled[:, 0] = 1 scaler.fit(z_train.reshape(-1, 1)) z_train_scaled = scaler.transform(z_train.reshape(-1, 1)) z_test_scaled = scaler.transform(z_test.reshape(-1, 1)) beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled) # cross validation for k in range(n_folds): # Validation data X_val = X_folds[k] z_val = np.reshape(z_folds[k], (-1, 1)) # Training data idx = np.ones(n_folds, dtype=bool) idx[k] = False X_train_fold = X_folds[idx] # Combine folds X_train_fold = np.reshape( X_train_fold, (X_train_fold.shape[0] * X_train_fold.shape[1], X_train_fold.shape[2])) z_train_fold = np.reshape(np.ravel(z_folds[idx]), (-1, 1)) # Scaling data scaler = StandardScaler( ) # removes the mean and scales each feature/variable to unit variance scaler.fit( X_train_fold ) # compute the mean and std to be used for later scaling X_train_fold_scaled = scaler.transform( X_train_fold ) # perform standardization by centering and scaling X_val_scaled = scaler.transform(X_val) # Set first column to one as StandardScaler sets it to zero X_train_fold_scaled[:, 0] = 1 X_val_scaled[:, 0] = 1 # scaler.fit(z_train_fold) # z_train_fold_scaled = scaler.transform(z_train_fold) # z_val_scaled = scaler.transform(z_val) z_train_fold_scaled = z_train_fold z_val_scaled = z_val # Choose method for calculating coefficients beta if method == f.OLS: beta_fold = method(X_train_fold_scaled, z_train_fold_scaled) z_tilde_fold = X_val_scaled @ beta_fold # z_tilde_fold_train = X_train_ elif method == f.Ridge: beta_fold = method(X_train_fold_scaled, z_train_fold_scaled, lmbda, degree) z_tilde_fold = X_val_scaled @ beta_fold elif method == 'Lasso': clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False).fit( X_train_fold_scaled, z_train_fold_scaled) z_tilde_fold = clf_lasso.predict(X_val_scaled) MSE_mean[degree] += f.MSE(z_val_scaled, z_tilde_fold) R2Score_mean[degree] += f.R2Score(z_val_scaled, z_tilde_fold) MSE_mean[degree] /= n_folds R2Score_mean[degree] /= n_folds # # Cross-validation using Scikit-Learn # clf = skl.LinearRegression() # R2Score_skl[degree] = np.mean(cross_val_score(clf, X_train, z_train, scoring='r2', cv=n_folds)) # Find the degree with smallest MSE best_degree = np.argmin(MSE_mean) print(best_degree) # Make fit to holy test data X_train_best = f.design_matrix(x_train, y_train, best_degree) scaler.fit(X_train_best) X_train_best_scaled = scaler.transform(X_train_best) X_test_best = f.design_matrix(x_test, y_test, best_degree) X_test_best_scaled = scaler.transform(X_test_best) X_train_best_scaled[:, 0] = 1 X_test_best_scaled[:, 0] = 1 scaler.fit(z_train.reshape(-1, 1)) z_train_scaled = scaler.transform(z_train.reshape(-1, 1)) z_test_scaled = scaler.transform(z_test.reshape(-1, 1)) beta_best = f.OLS(X_train_best_scaled, z_train_scaled) z_tilde_best = X_test_best_scaled @ beta_best MSE_best = f.MSE(z_test_scaled, z_tilde_best) print(MSE_best) return polydegree, MSE_mean, MSE_best, R2Score_skl, R2Score_mean, beta_best, best_degree, MSE_mean_sklearn, best_degree_sklearn, beta_best_sklearn
def no_resampling(n, maxdegree, noise, method=f.OLS, lmbda=0, seed=7053): # arrays for plotting of error polydegree = np.zeros(maxdegree) MSE_OLS = np.zeros(maxdegree) R2Score_OLS = np.zeros(maxdegree) MSE_test = np.zeros(maxdegree) MSE_train = np.zeros(maxdegree) MSE_train_scaled = np.zeros(maxdegree) MSE_test_scaled = np.zeros(maxdegree) R2Score_scaled = np.zeros(maxdegree) # Make data np.random.seed(seed) x = np.sort(np.random.uniform(0, 1, n)) y = np.sort(np.random.uniform(0, 1, n)) x, y = np.meshgrid(x, y) # Franke Function z = np.ravel(f.FrankeFunction(x, y) + noise * np.random.randn(n, n)) for degree in range(0, maxdegree): polydegree[degree] = degree #Create design matrix X = f.design_matrix(x, y, degree) # Split in training and test data X_train, X_test, z_train, z_test = train_test_split(X, z.reshape(-1, 1), test_size=0.3) # OLS estimate train/test without scaled beta_OLS_train = f.OLS(X_train, z_train) ztilde_test = X_test @ beta_OLS_train ztilde_train = X_train @ beta_OLS_train MSE_train[degree] = f.MSE(z_train, ztilde_train) MSE_test[degree] = f.MSE(z_test, ztilde_test) # Scale data scaler = StandardScaler( ) # removes the mean and scales each feature/variable to unit variance scaler.fit( X_train) # compute the mean and std to be used for later scaling X_train_scaled = scaler.transform( X_train) # perform standardization by centering and scaling X_test_scaled = scaler.transform( X_test) # fit to data, then transform it scaler.fit(z_train) # z_train_scaled = scaler.transform(z_train) # z_test_scaled = scaler.transform(z_test) z_train_scaled = z_train z_test_scaled = z_test # Set the first column to 1 since StandardScaler sets it to 0 X_train_scaled[:, 0] = 1 X_test_scaled[:, 0] = 1 if method == f.OLS: beta_train_scaled = method(X_train_scaled, z_train_scaled) z_tilde_test_scaled = X_test_scaled @ beta_train_scaled z_tilde_train_scaled = X_train_scaled @ beta_train_scaled elif method == f.Ridge: beta_train_scaled = method(X_train_scaled, z_train_scaled, lmbda, degree) z_tilde_test_scaled = X_test_scaled @ beta_train_scaled z_tilde_train_scaled = X_train_scaled @ beta_train_scaled elif method == 'Lasso': clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False).fit( X_train_scaled, z_train_scaled) z_tilde_test_scaled = clf_lasso.predict(X_test_scaled) z_tilde_train_scaled = clf_lasso.predict(X_train_scaled) MSE_train_scaled[degree] = f.MSE(z_train_scaled, z_tilde_train_scaled) MSE_test_scaled[degree] = f.MSE(z_test_scaled, z_tilde_test_scaled) R2Score_scaled[degree] = f.R2Score(z_test_scaled, z_tilde_test_scaled) return polydegree, MSE_train, MSE_test, MSE_train_scaled, MSE_test_scaled, R2Score_scaled # Start figure fig = plt.figure() ax = fig.gca(projection='3d') # Plot the surface # ztilde_plot = np.reshape(ztilde, (n, n)) # surf = ax.plot_surface(x, y, ztilde_plot, cmap=cm.coolwarm, linewidth=0, antialiased=False) # Customize the z axis ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(10)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) # Add a colar bar which maps values to colors fig.colorbar(surf, shrink=0.5, aspect=5) plt.show()
np.random.seed(seed) x = np.sort(np.random.uniform(0, 1, n)) y = np.sort(np.random.uniform(0, 1, n)) x, y = np.meshgrid(x, y) # Franke Function z = np.ravel(f.FrankeFunction(x, y) + noise*np.random.randn(n, n)) for degree in range(maxdegree): #Create design matrix X = f.design_matrix(x, y, degree) X_train, X_test, z_train, z_test = train_test_split(X, z) # Ordinary least squares beta_OLS = f.OLS(X, z) ztilde = X @ beta_OLS # Confidence interval as function of beta var_beta = f.variance_beta(X, noise) err_beta = 1.96*np.sqrt(var_beta) # 95% confidence interval beta_idx = np.linspace(0, X.shape[1] - 1, X.shape[1]) # Plot of confidence interval for OLS plt.style.use('ggplot') plt.errorbar(beta_idx, beta_OLS, err_beta, fmt='.') plt.xlabel("n", size=12) plt.ylabel('Confidence interval', size=12) plt.title("95 % confidence interval as function of $\\beta$", size=16) plt.show()
def OLS(self): """Calculates the ordinary least squares and its mean squared error.""" beta = f.OLS(self.X_train, self.z_train) z_tilde = self.X_test @ beta self.MSE = f.MSE(self.z_test, z_tilde)