def __test_bootstrap_fit(): """A small implementation of a test case.""" from regression import OLSRegression import sklearn.preprocessing as sk_preproc # Initial values deg = 2 N_bs = 1000 n = 100 test_percent = 0.35 noise = 0.3 np.random.seed(1234) # Sets up random matrices x = np.random.rand(n, 1) poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) y = 2*x*x + np.exp(-2*x) + noise * \ np.random.randn(x.shape[0], x.shape[1]) # Sets up design matrix X = poly.fit_transform(x) # Performs regression reg = OLSRegression() reg.fit(X, y) y_predict = reg.predict(X).ravel() print("Regular linear regression") print("r2: {:-20.16f}".format(reg.score(X, y))) print("mse: {:-20.16f}".format(metrics.mse(y, reg.predict(X)))) print("Beta: ", reg.coef_.ravel()) print("var(Beta): ", reg.coef_var.ravel()) print("") # Performs a bootstrap print("Bootstrapping") bs_reg = BootstrapRegression(X, y, OLSRegression()) bs_reg.bootstrap(N_bs, test_percent=test_percent) print("r2: {:-20.16f}".format(bs_reg.r2)) print("mse: {:-20.16f}".format(bs_reg.mse)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta: ", bs_reg.coef_.ravel()) print("var(Beta): ", bs_reg.coef_var.ravel()) print("mse = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.mse, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.mse))) import matplotlib.pyplot as plt plt.plot(x.ravel(), y, "o", label="Data") plt.plot(x.ravel(), y_predict, "o", label=r"Pred, R^2={:.4f}".format(reg.score(X, y))) plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred, yerr=np.sqrt(bs_reg.y_pred_var), fmt="o", label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.r2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$2x^2 + \sigma^2$") plt.legend() plt.show()
def task1b(pickle_fname, N_samples=10000, training_size=0.5, N_bs=200, L_system_size=20, figure_folder="../fig"): """Task b of project 2""" print("=" * 80) print("Task b") states, energies = generate_1d_ising_data(L_system_size, N_samples) X_train, X_test, y_train, y_test = \ sk_modsel.train_test_split(states, energies, test_size=1-training_size, shuffle=False) lambda_values = np.logspace(-4, 4, 9) print("Train size: ", X_train.shape) print("Test size: ", X_test.shape) # Linear regression linreg = reg.OLSRegression() linreg.fit(cp.deepcopy(X_train), cp.deepcopy(y_train)) y_pred_linreg = linreg.predict(cp.deepcopy(X_test)) y_pred_linreg_train = linreg.predict(cp.deepcopy(X_train)) linreg_general_results = { "test": { "r2": metrics.r2(y_test, y_pred_linreg), "mse": metrics.mse(y_test, y_pred_linreg), "bias": metrics.bias(y_test, y_pred_linreg) }, "train": { "r2": metrics.r2(y_train, y_pred_linreg_train), "mse": metrics.mse(y_train, y_pred_linreg_train), "bias": metrics.bias(y_train, y_pred_linreg_train) } } print("LINREG:") print("R2: {:-20.16f}".format(linreg_general_results["test"]["r2"])) print("MSE: {:-20.16f}".format(linreg_general_results["test"]["mse"])) print("Bias: {:-20.16f}".format(linreg_general_results["test"]["bias"])) # print("Beta coefs: {}".format(linreg.coef_)) # print("Beta coefs variances: {}".format(linreg.coef_var)) J_leastsq = np.asarray(linreg.coef_).reshape( (L_system_size, L_system_size)) linreg_bs_results = bs.BootstrapWrapper( X_train, y_train, sk_model.LinearRegression(fit_intercept=False), N_bs, X_test=X_test, y_test=y_test) linreg_cvkf_results = cv.kFoldCVWrapper( X_train, y_train, sk_model.LinearRegression(fit_intercept=False), k=4, X_test=X_test, y_test=y_test) ridge_general_results = [] ridge_bs_results = [] ridge_cvkf_results = [] lasso_general_results = [] lasso_bs_results = [] lasso_cvkf_results = [] heatmap_data = {} for i, lmbda in enumerate(lambda_values): # Ridge regression ridge_reg = reg.RidgeRegression(lmbda) ridge_reg.fit(cp.deepcopy(X_train), cp.deepcopy(y_train)) y_pred_ridge = ridge_reg.predict(cp.deepcopy(X_test)).reshape(-1, 1) y_pred_ridge_train = ridge_reg.predict(cp.deepcopy(X_train)).reshape( -1, 1) ridge_general_results.append({ "test": { "lambda": lmbda, "r2": metrics.r2(y_test, y_pred_ridge), "mse": metrics.mse(y_test, y_pred_ridge), "bias": metrics.bias(y_test, y_pred_ridge) }, "train": { "lambda": lmbda, "r2": metrics.r2(y_train, y_pred_ridge_train), "mse": metrics.mse(y_train, y_pred_ridge_train), "bias": metrics.bias(y_train, y_pred_ridge_train) }, }) print("\nRIDGE (lambda={}):".format(lmbda)) print("R2: {:-20.16f}".format( ridge_general_results[-1]["test"]["r2"])) print("MSE: {:-20.16f}".format( ridge_general_results[-1]["test"]["mse"])) print("Bias: {:-20.16f}".format( ridge_general_results[-1]["test"]["bias"])) ridge_bs_results.append( bs.BootstrapWrapper(X_train, y_train, reg.RidgeRegression(lmbda), N_bs, X_test=X_test, y_test=y_test)) ridge_cvkf_results.append( cv.kFoldCVWrapper(X_train, y_train, reg.RidgeRegression(lmbda), k=4, X_test=X_test, y_test=y_test)) # Lasso regression lasso_reg = sk_model.Lasso(alpha=lmbda) # Filtering out annoing warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") lasso_reg.fit(cp.deepcopy(X_train), cp.deepcopy(y_train)) y_pred_lasso = lasso_reg.predict(cp.deepcopy(X_test)).reshape( -1, 1) y_pred_lasso_train = lasso_reg.predict( cp.deepcopy(X_train)).reshape(-1, 1) lasso_general_results.append({ "test": { "lambda": lmbda, "r2": metrics.r2(y_test, y_pred_lasso), "mse": metrics.mse(y_test, y_pred_lasso), "bias": metrics.bias(y_test, y_pred_lasso) }, "train": { "lambda": lmbda, "r2": metrics.r2(y_train, y_pred_lasso_train), "mse": metrics.mse(y_train, y_pred_lasso_train), "bias": metrics.bias(y_train, y_pred_lasso_train) }, }) print("\nLASSO (lambda={}):".format(lmbda)) print("R2: {:-20.16f}".format( lasso_general_results[-1]["test"]["r2"])) print("MSE: {:-20.16f}".format( lasso_general_results[-1]["test"]["mse"])) print("Bias: {:-20.16f}".format( lasso_general_results[-1]["test"]["bias"])) # print("Beta coefs: {}".format(lasso_reg.coef_)) # Filtering out annoing warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") lasso_bs_results.append( bs.BootstrapWrapper(cp.deepcopy(X_train), cp.deepcopy(y_train), sk_model.Lasso(lmbda), N_bs, X_test=X_test, y_test=y_test)) lasso_cvkf_results.append( cv.kFoldCVWrapper(cp.deepcopy(X_train), cp.deepcopy(y_train), sk_model.Lasso(lmbda), k=4, X_test=X_test, y_test=y_test)) J_ridge = np.asarray(ridge_reg.coef_).reshape( (L_system_size, L_system_size)) J_lasso = np.asarray(lasso_reg.coef_).reshape( (L_system_size, L_system_size)) heatmap_data[i] = [J_leastsq, J_ridge, J_lasso] # plot_heatmap(J_leastsq, J_ridge, J_lasso, # L_system_size, lmbda, figure_folder, # "regression_ising_1d_heatmap_lambda{}.pdf".format(lmbda)) # cmap_args = dict(vmin=-1., vmax=1., cmap='seismic') # fig, axarr = plt.subplots(nrows=1, ncols=3) # axarr[0].imshow(J_leastsq, **cmap_args) # axarr[0].set_title(r'$\mathrm{OLS}$', fontsize=16) # axarr[0].tick_params(labelsize=16) # axarr[1].imshow(J_ridge, **cmap_args) # axarr[1].set_title( # r'$\mathrm{Ridge}, \lambda=%.4f$' % (lmbda), fontsize=16) # axarr[1].tick_params(labelsize=16) # im = axarr[2].imshow(J_lasso, **cmap_args) # axarr[2].set_title( # r'$\mathrm{LASSO}, \lambda=%.4f$' % (lmbda), fontsize=16) # axarr[2].tick_params(labelsize=16) # divider = make_axes_locatable(axarr[2]) # cax = divider.append_axes("right", size="5%", pad=0.05) # cbar = fig.colorbar(im, cax=cax) # cbar.ax.set_yticklabels(np.arange(-1.0, 1.0+0.25, 0.25), fontsize=14) # cbar.set_label(r'$J_{i,j}$', labelpad=-40, # y=1.12, fontsize=16, rotation=0) # # plt.show() # figure_path = os.path.join( # figure_folder, "ising_1d_heatmap_lambda{}.pdf".format(lmbda)) # fig.savefig(figure_path) # print("Figure for lambda={} stored at {}.".format(lmbda, figure_path)) # plt.close(fig) with open(pickle_fname, "wb") as f: pickle.dump( { "L_system_size": L_system_size, "ols": linreg_general_results, "ols_bs": linreg_bs_results, "ols_cv": linreg_cvkf_results, "ridge": ridge_general_results, "ridge_bs": ridge_bs_results, "ridge_cv": ridge_cvkf_results, "lasso": lasso_general_results, "lasso_bs": lasso_bs_results, "lasso_cv": lasso_cvkf_results, "heatmap_data": heatmap_data }, f) print("Data pickled and dumped to: {:s}".format(pickle_fname))
def __init__(self, x, y, z, alpha, deg=5, N_bs=100, N_cv_bs=100, k_splits=4, test_percent=0.4, print_results=False): """Lasso method for scikit learn.""" poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) X = poly.fit_transform(np.c_[cp.deepcopy(x).ravel(), cp.deepcopy(y).ravel()]) ridge = sk_model.Lasso(alpha=alpha, fit_intercept=False) ridge.fit(X, z.ravel()) # Gets the predicted y values z_predict = ridge.predict(X) bias = metrics.bias2(z.ravel(), z_predict) R2 = ridge.score(X, z.ravel()) mse = metrics.mse(z.ravel(), z_predict) # poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) # X = poly.fit_transform( # np.c_[cp.deepcopy(x).reshape(-1, 1), # cp.deepcopy(y).reshape(-1, 1)]) # linreg = sk_model.LinearRegression(fit_intercept=False) # linreg.fit(X, z.ravel()) # z_predict_ = linreg.predict(X) # r2 = metrics.R2(z.ravel(), z_predict_) # bias = metrics.bias2(z.ravel(), z_predict_) # mse_error = metrics.mse(z.ravel(), z_predict_) # Gets the beta coefs beta = ridge.coef_ self.data["regression"] = { "y_pred": z_predict, "r2": R2, "mse": mse, "bias": bias, "beta_coefs": ridge.coef_, "beta_coefs_var": None, } if print_results: print("Lambda: {:-e}".format(alpha)) print("R2: {:-20.16f}".format(R2)) print("MSE: {:-20.16f}".format(mse)) print("Bias: {:-20.16f}".format(bias)) print("Beta coefs: {}".format(beta)) reg_kwargs = {"alpha": alpha, "fit_intercept": False} sk_results = sk_resampling.sk_learn_k_fold_cv( cp.deepcopy(x), cp.deepcopy(y), cp.deepcopy(z), sk_model.Lasso(**reg_kwargs), poly.transform, test_percent=test_percent, k_splits=k_splits, print_results=print_results) self.data["kfoldcv"] = sk_results bs_reg = bs.BootstrapRegression( cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()), sk_model.Lasso(**reg_kwargs), poly.transform) bs_reg.reg = sk_model.Lasso(alpha=alpha, fit_intercept=False) bs_reg.bootstrap(N_bs, test_percent=test_percent) self._fill_data(bs_reg, "bootstrap") if print_results: print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta coefs: {}".format(bs_reg.coef_)) print("Beta coefs variances: {}".format(bs_reg.coef_var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE)))
def __init__(self, x, y, z, deg=1, N_bs=100, N_cv_bs=100, k_splits=4, test_percent=0.4, print_results=False): """Manual implementation of the OLS.""" poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) X = poly.fit_transform(cp.deepcopy(np.c_[x.ravel(), y.ravel()]), z.ravel()) linreg = reg.OLSRegression() linreg.fit(X, cp.deepcopy(z.ravel())) z_predict_ = linreg.predict(X).ravel() if print_results: print("R2: {:-20.16f}".format(metrics.R2(z.ravel(), z_predict_))) print("MSE: {:-20.16f}".format(metrics.mse(z.ravel(), z_predict_))) print("Bias: {:-20.16f}".format( metrics.bias2(z.ravel(), z_predict_))) print("Beta coefs: {}".format(linreg.coef_)) print("Beta coefs variances: {}".format(linreg.coef_var)) self.data["regression"] = { "y_pred": z_predict_, "r2": metrics.R2(z.ravel(), z_predict_), "mse": metrics.mse(z.ravel(), z_predict_), "bias": metrics.bias2(z.ravel(), z_predict_), "beta_coefs": linreg.coef_, "beta_coefs_var": linreg.coef_var, "beta_95c": np.sqrt(linreg.coef_var) * 2, } # Resampling with k-fold cross validation kfcv = cv.kFoldCrossValidation( cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()), reg.OLSRegression(), poly.transform) kfcv.cross_validate(k_splits=k_splits, test_percent=test_percent) if print_results: print("R2: {:-20.16f}".format(kfcv.R2)) print("MSE: {:-20.16f}".format(kfcv.MSE)) print("Bias^2:{:-20.16f}".format(kfcv.bias)) print("Var(y):{:-20.16f}".format(kfcv.var)) print("Beta coefs: {}".format(kfcv.coef_)) print("Beta coefs variances: {}".format(kfcv.coef_var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var, kfcv.bias + kfcv.var)) print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE))) self._fill_data(kfcv, "kfoldcv") # Resampling with mc cross validation mccv = cv.MCCrossValidation(cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()), reg.OLSRegression(), poly.transform) mccv.cross_validate(N_cv_bs, k_splits=k_splits, test_percent=test_percent) if print_results: print("R2: {:-20.16f}".format(mccv.R2)) print("MSE: {:-20.16f}".format(mccv.MSE)) print("Bias^2:{:-20.16f}".format(mccv.bias)) print("Var(y):{:-20.16f}".format(mccv.var)) print("Beta coefs: {}".format(mccv.coef_)) print("Beta coefs variances: {}".format(mccv.coef_var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var, mccv.bias + mccv.var)) print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE))) self._fill_data(mccv, "mccv") # Resampling with bootstrapping bs_reg = bs.BootstrapRegression( cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()), reg.OLSRegression(), poly.transform) bs_reg.bootstrap(N_bs, test_percent=test_percent) if print_results: print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta coefs: {}".format(bs_reg.coef_)) print("Beta coefs variances: {}".format(bs_reg.coef_var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE))) self._fill_data(bs_reg, "bootstrap")
def __init__(self, x, y, z, alpha, deg=5, N_bs=100, N_cv_bs=100, k_splits=4, test_percent=0.4, print_results=False): poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) X = poly.fit_transform(np.c_[cp.deepcopy(x).ravel(), cp.deepcopy(y).ravel()]) ridge = sk_model.Ridge(alpha=alpha, fit_intercept=False) ridge.fit(X, z.ravel()) # Gets the predicted y values z_predict = ridge.predict(X) R2 = ridge.score(X, z.ravel()) # R2 = 1 - np.sum((z.ravel() - z_predict)**2)/np.sum((z.ravel() - np.mean(z.ravel()))**2) mse = metrics.mse(z.ravel(), z_predict) bias = metrics.bias2(z.ravel(), z_predict) N, P = X.shape z_variance = np.sum((z.ravel() - z_predict)**2) / (N - P - 1) # Gets the beta variance beta_variance = metrics.ridge_regression_variance(X, z_variance, alpha) self.data["regression"] = { "y_pred": z_predict, "r2": R2, "mse": mse, "bias": bias, "beta_coefs": ridge.coef_, "beta_coefs_var": beta_variance, "beta_95c": np.sqrt(beta_variance) * 2, } if print_results: print("Lambda: {:-e}".format(alpha)) print("R2: {:-20.16f}".format(R2)) print("MSE: {:-20.16f}".format(mse)) print("Bias: {:-20.16f}".format(bias)) print("Beta coefs: {}".format(ridge.coef_)) print("Beta coefs variances: {}".format(beta_variance)) reg_kwargs = {"alpha": alpha, "fit_intercept": False, "solver": "lsqr"} kfcf_results = sk_resampling.sk_learn_k_fold_cv( cp.deepcopy(x), cp.deepcopy(y), cp.deepcopy(z), sk_model.Ridge(**reg_kwargs), poly.transform, test_percent=test_percent, k_splits=k_splits, print_results=print_results) self.data["kfoldcv"] = kfcf_results # Resampling with bootstrapping bs_reg = bs.BootstrapRegression( cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()), sk_model.Ridge(**reg_kwargs), poly.transform) bs_reg.bootstrap(N_bs, test_percent=test_percent) self._fill_data(bs_reg, "bootstrap") if print_results: print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta coefs: {}".format(bs_reg.coef_)) print("Beta coefs variances: {}".format(bs_reg.coef_var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE)))
def __init__(self, x, y, z, deg=1, N_bs=100, N_cv_bs=100, k_splits=4, test_percent=0.4, print_results=False): """SK-Learn implementation of OLS.""" poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) X = poly.fit_transform(np.c_[cp.deepcopy(x).reshape(-1, 1), cp.deepcopy(y).reshape(-1, 1)]) linreg = sk_model.LinearRegression(fit_intercept=False) linreg.fit(X, z.ravel()) z_predict_ = linreg.predict(X) r2 = metrics.R2(z.ravel(), z_predict_) bias = metrics.bias2(z.ravel(), z_predict_) mse_error = metrics.mse(z.ravel(), z_predict_) N, P = X.shape z_variance = np.sum((z.ravel() - z_predict_)**2) / (N - P - 1) linreg_coef_var = np.diag(np.linalg.inv(X.T @ X)) * z_variance self.data["regression"] = { "y_pred": z_predict_, "r2": r2, "mse": mse_error, "bias": bias, "beta_coefs": linreg.coef_, "beta_coefs_var": linreg_coef_var, "beta_95c": np.sqrt(linreg_coef_var) * 2, } # Resampling coefs if print_results: print("R2: {:-20.16f}".format(r2)) print("MSE: {:-20.16f}".format(mse_error)) print("Bias: {:-20.16f}".format(bias)) print("Beta coefs: {}".format(linreg.coef_)) print("Beta coefs variances: {}".format(linreg_coef_var)) sk_kfold_res = sk_resampling.sk_learn_k_fold_cv( cp.deepcopy(x), cp.deepcopy(y), cp.deepcopy(z), sk_model.LinearRegression(fit_intercept=False), poly.transform, test_percent=test_percent, k_splits=k_splits, print_results=print_results) self.data["kfoldcv"] = sk_kfold_res bs_reg = bs.BootstrapRegression( cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()), sk_model.LinearRegression(fit_intercept=False), poly.transform) bs_reg.bootstrap(N_bs, test_percent=test_percent) self._fill_data(bs_reg, "bootstrap") if print_results: print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta coefs: {}".format(bs_reg.coef_)) print("Beta coefs variances: {}".format(bs_reg.coef_var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE)))
def __test_cross_validation_methods(): # A small implementation of a test case from regression import LinearRegression import matplotlib.pyplot as plt # Initial values n = 100 N_bs = 1000 k_splits = 4 test_percent = 0.2 noise = 0.3 np.random.seed(1234) # Sets up random matrices x = np.random.rand(n, 1) def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \ np.random.randn(_x.shape[0], _x.shape[1]) y = func_excact(x) def design_matrix(_x): return np.c_[np.ones(_x.shape), _x, _x * _x] # Sets up design matrix X = design_matrix(x) # Performs regression reg = LinearRegression() reg.fit(X, y) y = y.ravel() y_predict = reg.predict(X).ravel() print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(y, y_predict))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) # print (metrics.bias(y, y_predict)) print("Bias^2:{:-20.16f}".format(metrics.bias2(y, y_predict))) # Small plotter import matplotlib.pyplot as plt plt.plot(x, y, "o", label="data") plt.plot(x, y_predict, "o", label=r"Pred, $R^2={:.4f}$".format(reg.score(y, y_predict))) print("k-fold Cross Validation") kfcv = kFoldCrossValidation(x, y, LinearRegression, design_matrix) kfcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(kfcv.R2)) print("MSE: {:-20.16f}".format(kfcv.MSE)) print("Bias^2:{:-20.16f}".format(kfcv.bias)) print("Var(y):{:-20.16f}".format(kfcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var, kfcv.bias + kfcv.var)) print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE))) plt.errorbar(kfcv.x_pred_test, kfcv.y_pred, yerr=np.sqrt(kfcv.y_pred_var), fmt="o", label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.R2)) print("kk Cross Validation") kkcv = kkFoldCrossValidation(x, y, LinearRegression, design_matrix) kkcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(kkcv.R2)) print("MSE: {:-20.16f}".format(kkcv.MSE)) print("Bias^2:{:-20.16f}".format(kkcv.bias)) print("Var(y):{:-20.16f}".format(kkcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kkcv.MSE, kkcv.bias, kkcv.var, kkcv.bias + kkcv.var)) print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.MSE))) plt.errorbar(kkcv.x_pred_test.ravel(), kkcv.y_pred.ravel(), yerr=np.sqrt(kkcv.y_pred_var.ravel()), fmt="o", label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.R2)) print("Monte Carlo Cross Validation") mccv = MCCrossValidation(x, y, LinearRegression, design_matrix) mccv.cross_validate(N_bs, k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(mccv.R2)) print("MSE: {:-20.16f}".format(mccv.MSE)) print("Bias^2:{:-20.16f}".format(mccv.bias)) print("Var(y):{:-20.16f}".format(mccv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var, mccv.bias + mccv.var)) print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE))) print("\nCross Validation methods tested.") plt.errorbar(mccv.x_pred_test, mccv.y_pred, yerr=np.sqrt(mccv.y_pred_var), fmt="o", label=r"MC CV, $R^2={:.4f}$".format(mccv.R2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$y=2x^2$") plt.legend() plt.show()
def __test_cross_validation_methods(): # A small implementation of a test case from regression import OLSRegression import sklearn.preprocessing as sk_preproc import matplotlib.pyplot as plt # Initial values n = 100 N_bs = 200 deg = 2 k_splits = 4 test_percent = 0.35 noise = 0.3 np.random.seed(1234) # Sets up random matrices x = np.random.rand(n, 1) y = 2 * x * x + np.exp( -2 * x) + noise * np.random.randn(x.shape[0], x.shape[1]) # Sets up design matrix poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True) X = poly.fit_transform(x) # Performs regression reg = OLSRegression() reg.fit(X, y) y_predict = reg.predict(X) print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(X, y))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) print("Bias^2:{:-20.16f}".format(metrics.bias(y, y_predict))) # Small plotter plt.plot(x, y, "o", label="data") plt.plot(x, y_predict, "o", label=r"Pred, $R^2={:.4f}$".format(reg.score(X, y))) print("k-fold Cross Validation") kfcv = kFoldCrossValidation(X, y, OLSRegression()) kfcv.cross_validate(k_splits=k_splits, test_percent=test_percent) print("R2: {:-20.16f}".format(kfcv.r2)) print("MSE: {:-20.16f}".format(kfcv.mse)) print("Bias^2:{:-20.16f}".format(kfcv.bias)) print("Var(y):{:-20.16f}".format(kfcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kfcv.mse, kfcv.bias, kfcv.var, kfcv.bias + kfcv.var)) print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.mse))) plt.errorbar(kfcv.x_pred_test, kfcv.y_pred, yerr=np.sqrt(kfcv.y_pred_var), fmt="o", label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.r2)) print("kk Cross Validation") kkcv = kkFoldCrossValidation(X, y, OLSRegression()) kkcv.cross_validate(k_splits=k_splits, test_percent=test_percent) print("R2: {:-20.16f}".format(kkcv.r2)) print("MSE: {:-20.16f}".format(kkcv.mse)) print("Bias^2:{:-20.16f}".format(kkcv.bias)) print("Var(y):{:-20.16f}".format(kkcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kkcv.mse, kkcv.bias, kkcv.var, kkcv.bias + kkcv.var)) print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.mse))) plt.errorbar(kkcv.x_pred_test, kkcv.y_pred, yerr=np.sqrt(kkcv.y_pred_var), fmt="o", label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.r2)) print("Monte Carlo Cross Validation") mccv = MCCrossValidation(X, y, OLSRegression()) mccv.cross_validate(N_bs, k_splits=k_splits, test_percent=test_percent) print("R2: {:-20.16f}".format(mccv.r2)) print("MSE: {:-20.16f}".format(mccv.mse)) print("Bias^2:{:-20.16f}".format(mccv.bias)) print("Var(y):{:-20.16f}".format(mccv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(mccv.mse, mccv.bias, mccv.var, mccv.bias + mccv.var)) print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.mse))) print("\nCross Validation methods tested.") plt.errorbar(mccv.x_pred_test, mccv.y_pred, yerr=np.sqrt(mccv.y_pred_var), fmt="o", label=r"MC CV, $R^2={:.4f}$".format(mccv.r2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$y=2x^2 + e^{-2x}$") y = 2 * x * x + np.exp( -2 * x) + noise * np.random.randn(x.shape[0], x.shape[1]) plt.legend() plt.show()
def __test_bootstrap_fit(): # A small implementation of a test case from regression import LinearRegression N_bs = 1000 # Initial values n = 200 noise = 0.2 np.random.seed(1234) test_percent = 0.35 # Sets up random matrices x = np.random.rand(n, 1) def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \ np.random.randn(_x.shape[0], _x.shape[1]) y = func_excact(x) def design_matrix(_x): return np.c_[np.ones(_x.shape), _x, _x*_x] # Sets up design matrix X = design_matrix(x) # Performs regression reg = LinearRegression() reg.fit(X, y) y = y.ravel() y_predict = reg.predict(X).ravel() print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(y_predict, y))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) print("Beta: ", reg.coef_.ravel()) print("var(Beta): ", reg.coef_var.ravel()) print("") # Performs a bootstrap print("Bootstrapping") bs_reg = BootstrapRegression(x, y, LinearRegression, design_matrix) bs_reg.bootstrap(N_bs, test_percent=test_percent) print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta: ", bs_reg.coef_.ravel()) print("var(Beta): ", bs_reg.coef_var.ravel()) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE))) import matplotlib.pyplot as plt plt.plot(x.ravel(), y, "o", label="Data") plt.plot(x.ravel(), y_predict, "o", label=r"Pred, R^2={:.4f}".format(reg.score(y_predict, y))) print (bs_reg.y_pred.shape, bs_reg.y_pred_var.shape) plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred, yerr=np.sqrt(bs_reg.y_pred_var), fmt="o", label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.R2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$2x^2 + \sigma^2$") plt.legend() plt.show()