def find_minimum_MSE(x, y, z, hyperparams, degrees): """ Uses bootstrap resampling on data to find MSE of OLS, Ridge and Lasso-regression. Finds the minimum MSE for each method and returns it, along with the corresponding hyperparameter and degree. Arguments: x, y = coordinates (will generalise for arbitrary number of parameters) z = data hyperparams = list of hyperparameters to test degrees = list of polynomial degrees to test """ x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) ols_error = zeros(len(degrees)) ridge_error = zeros((len(degrees), len(hyperparams))) lasso_error = zeros((len(degrees), len(hyperparams))) #OLS for degree in degrees: [ols_error[degree], r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, pf.least_squares, degree=degree, hyperparam=0) #RIDGE for degree in degrees: for i in range(len(hyperparams)): [ridge_error[degree, i], r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, pf.ridge_regression, degree=degree, hyperparam=hyperparams[i]) #RIDGE for degree in degrees: for i in range(len(hyperparams)): [ridge_error[degree, i], r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, pf.lasso_regression, degree=degree, hyperparam=hyperparams[i])
def generate_test_vs_lambda(x, y, z, reg, degree, hyperparams, filename): """ Function for plotting the mse (and bias, variance) vs hyperparam calculated using bootstrap, where ax = matplotlib.axis object reg = regression function reg(X, data, hyperparam) degree = degree of polynomial hyperparams = hyperparameters to plot against show_bias_var = if True the bias and variance will also be plotted """ boot_mse = np.zeros(len(hyperparams)) boot_bias = np.zeros(len(hyperparams)) boot_r2 = np.zeros(len(hyperparams)) boot_var = np.zeros(len(hyperparams)) x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2) outfile = open(filename, "a") outfile.write("lambda mse r2 bias var\n") for hyperparam in hyperparams: [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam) outstring = f"{hyperparam} {mse} {r2} {bias} {var}\n" outfile.write(outstring) outfile.close()
def generate_test_vs_degree_boot(x, y, z, reg, degrees, hyperparam, filename, return_minimum=True): """ Function for plotting the mse (and bias, variance) vs complexity calculated using bootstrap, where ax = matplotlib.axis object reg = regression function reg(X, data, hyperparam) max_degree = maximum degree of polynomial hyperparam = hyperparameter for model show_bias_var = if True the bias and variance will also be plotted """ boot_error = np.zeros(len(degrees)) x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2) outfile = open(filename, "a") outfile.write("degree mse r2 bias var\n") for degree in degrees: [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam) outstring = f"{degree} {mse} {r2} {bias} {var}\n" outfile.write(outstring) outfile.close() if return_minimum: return [min(boot_error),np.argmin(boot_error)]
def generate_test_vs_degree_multiple_lambda(x, y, z, reg, degrees, hyperparams, filename, return_minimum=True): """ Function for plotting the mse vs complexity for multiple lambda calculated using bootstrap, where ax = matplotlib.axis object reg = regression function reg(X, data, hyperparam) max_degree = maximum degree of polynomial hyperparaml = list of hyperparameters for model """ x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2) if return_minimum: error = np.zeros((len(degrees),len(hyperparams))) hyper_index = 0 for hyperparam in hyperparams: outfile = open(filename[:-4] + f"_lambda{hyperparam:.0e}.txt", "a") outfile.write("degree mse r2 bias var\n") for degree in degrees: [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam) outstring = f"{degree} {mse} {r2} {bias} {var}\n" outfile.write(outstring) if return_minimum: error[degree,hyper_index] = mse hyper_index += 1 outfile.close() if return_minimum: index = (np.array(np.where(error == error.min())).flatten()) return [error.min(), degrees[index[0]], hyperparams[index[1]]]
def plot_test_vs_lambda(ax, x, y, z, reg, degree, hyperparams, show_bias_var=False, **kwargs): """ Function for plotting the mse (and bias, variance) vs hyperparam calculated using bootstrap, where ax = matplotlib.axis object reg = regression function reg(X, data, hyperparam) degree = degree of polynomial hyperparams = hyperparameters to plot against show_bias_var = if True the bias and variance will also be plotted """ boot_mse = np.zeros(len(hyperparams)) boot_bias = np.zeros(len(hyperparams)) boot_r2 = np.zeros(len(hyperparams)) boot_var = np.zeros(len(hyperparams)) x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) counter = 0 for hyperparam in hyperparams: [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam) boot_mse[counter] = mse boot_r2[counter] = r2 boot_bias[counter] = bias boot_var[counter] = var counter += 1 #Plot mse ax.plot(hyperparams, boot_mse, label='MSE', **kwargs) #Plots bias and variance if show_bias_var is True if show_bias_var: ax.plot(hyperparams, boot_var, label='variance', ls='--', **kwargs) ax.plot(hyperparams, boot_bias, label='bias^2', ls='--', **kwargs)
def plot_test_vs_degree_multiple_lambda(ax, x, y, z, reg, max_degree, hyperparams, return_minimum=True, **kwargs): """ Function for plotting the mse vs complexity for multiple lambda calculated using bootstrap, where ax = matplotlib.axis object reg = regression function reg(X, data, hyperparam) max_degree = maximum degree of polynomial hyperparaml = list of hyperparameters for model """ degrees = np.arange(0, max_degree + 1) k_fold_mse = np.zeros(len(degrees)) k_fold_bias = np.zeros(len(degrees)) k_fold_r2 = np.zeros(len(degrees)) k_fold_var = np.zeros(len(degrees)) x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) if return_minimum: error = np.zeros((len(degrees), len(hyperparams))) hyper_index = 0 for hyperparam in hyperparams: for degree in degrees: [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam) k_fold_mse[degree] = mse k_fold_r2[degree] = r2 k_fold_bias[degree] = bias k_fold_var[degree] = var if return_minimum: error[degree, hyper_index] = mse hyper_index += 1 #Plot mse ax.plot(degrees, k_fold_mse, label=f"$\lambda$={hyperparam:.2g}", **kwargs) if return_minimum: index = (np.array(np.where(error == error.min())).flatten()) return [error.min(), degrees[index[0]], hyperparams[index[1]]]
def plot_test_vs_degree_boot(ax, x, y, z, reg, max_degree, hyperparam, show_bias_var=False, plot_r2=False, return_minimum=True, **kwargs): """ Function for plotting the mse (and bias, variance) vs complexity calculated using bootstrap, where ax = matplotlib.axis object reg = regression function reg(X, data, hyperparam) max_degree = maximum degree of polynomial hyperparam = hyperparameter for model show_bias_var = if True the bias and variance will also be plotted """ degrees = np.arange(0, max_degree + 1) boot_error = np.zeros(len(degrees)) boot_bias = np.zeros(len(degrees)) boot_var = np.zeros(len(degrees)) x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) for degree in degrees: [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam) boot_bias[degree] = bias boot_var[degree] = var if plot_r2: boot_error[degree] = r2 label = 'r2 test' else: boot_error[degree] = mse label = 'MSE test' if show_bias_var: label = 'MSE' #Plot mse ax.plot(degrees, boot_error, label=label, **kwargs) #Plots bias and variance if show_bias_var is True if show_bias_var: ax.plot(degrees, boot_var, label='variance', ls='--', **kwargs) ax.plot(degrees, boot_bias, label='bias$^2$', ls='--', **kwargs) if return_minimum: return [min(boot_error), np.argmin(boot_error)]