Exemple #1
0
def find_minimum_MSE(x, y, z, hyperparams, degrees):
    """
    Uses bootstrap resampling on data to find MSE of OLS, Ridge and
    Lasso-regression. Finds the minimum MSE for each method and returns it,
    along with the corresponding hyperparameter and degree.
    Arguments:
        x, y = coordinates (will generalise for arbitrary number of parameters)
        z = data
        hyperparams = list of hyperparameters to test
        degrees = list of polynomial degrees to test
    """
    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)
    ols_error = zeros(len(degrees))
    ridge_error = zeros((len(degrees), len(hyperparams)))
    lasso_error = zeros((len(degrees), len(hyperparams)))

    #OLS
    for degree in degrees:
        [ols_error[degree], r2, bias, var] = pf.bootstrap(x_train,
                                                          x_test,
                                                          y_train,
                                                          y_test,
                                                          z_train,
                                                          z_test,
                                                          pf.least_squares,
                                                          degree=degree,
                                                          hyperparam=0)

    #RIDGE
    for degree in degrees:
        for i in range(len(hyperparams)):
            [ridge_error[degree, i], r2, bias,
             var] = pf.bootstrap(x_train,
                                 x_test,
                                 y_train,
                                 y_test,
                                 z_train,
                                 z_test,
                                 pf.ridge_regression,
                                 degree=degree,
                                 hyperparam=hyperparams[i])

    #RIDGE
    for degree in degrees:
        for i in range(len(hyperparams)):
            [ridge_error[degree, i], r2, bias,
             var] = pf.bootstrap(x_train,
                                 x_test,
                                 y_train,
                                 y_test,
                                 z_train,
                                 z_test,
                                 pf.lasso_regression,
                                 degree=degree,
                                 hyperparam=hyperparams[i])
Exemple #2
0
def generate_test_vs_lambda(x, y, z, reg, degree, hyperparams, filename):
    """
    Function for plotting the mse (and bias, variance) vs hyperparam
    calculated using bootstrap, where
        ax = matplotlib.axis object
        reg = regression function reg(X, data, hyperparam)
        degree = degree of polynomial
        hyperparams = hyperparameters to plot against
        show_bias_var = if True the bias and variance will also be plotted
    """

    boot_mse = np.zeros(len(hyperparams))
    boot_bias = np.zeros(len(hyperparams))
    boot_r2 = np.zeros(len(hyperparams))
    boot_var = np.zeros(len(hyperparams))

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2)

    outfile = open(filename, "a")
    outfile.write("lambda mse r2 bias var\n")
    for hyperparam in hyperparams:
        [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam)
        outstring = f"{hyperparam} {mse} {r2} {bias} {var}\n"
        outfile.write(outstring)
    outfile.close()
Exemple #3
0
def generate_test_vs_degree_boot(x, y, z,  reg, degrees, hyperparam, filename, return_minimum=True):
    """
    Function for plotting the mse (and bias, variance) vs complexity
    calculated using bootstrap, where
        ax = matplotlib.axis object
        reg = regression function reg(X, data, hyperparam)
        max_degree = maximum degree of polynomial
        hyperparam = hyperparameter for model
        show_bias_var = if True the bias and variance will also be plotted
    """

    boot_error = np.zeros(len(degrees))

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2)

    outfile = open(filename, "a")
    outfile.write("degree mse r2 bias var\n")
    for degree in degrees:
        [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam)
        outstring = f"{degree} {mse} {r2} {bias} {var}\n"
        outfile.write(outstring)
    outfile.close()

    if return_minimum:
        return  [min(boot_error),np.argmin(boot_error)]
Exemple #4
0
def generate_test_vs_degree_multiple_lambda(x, y, z,  reg, degrees, hyperparams, filename, return_minimum=True):
    """
    Function for plotting the mse vs complexity for multiple lambda
    calculated using bootstrap, where
        ax = matplotlib.axis object
        reg = regression function reg(X, data, hyperparam)
        max_degree = maximum degree of polynomial
        hyperparaml = list of hyperparameters for model

    """

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2)

    if return_minimum:
        error = np.zeros((len(degrees),len(hyperparams)))



    hyper_index = 0
    for hyperparam in hyperparams:
        outfile = open(filename[:-4] + f"_lambda{hyperparam:.0e}.txt", "a")
        outfile.write("degree mse r2 bias var\n")
        for degree in degrees:
            [mse, r2, bias, var] = pf.bootstrap(x_train, x_test, y_train, y_test, z_train, z_test, reg, degree=degree, hyperparam=hyperparam)
            outstring = f"{degree} {mse} {r2} {bias} {var}\n"
            outfile.write(outstring)

            if return_minimum:
                error[degree,hyper_index] = mse
        hyper_index += 1
        outfile.close()

    if return_minimum:
        index = (np.array(np.where(error == error.min())).flatten())
        return [error.min(), degrees[index[0]], hyperparams[index[1]]]
Exemple #5
0
def plot_test_vs_lambda(ax,
                        x,
                        y,
                        z,
                        reg,
                        degree,
                        hyperparams,
                        show_bias_var=False,
                        **kwargs):
    """
    Function for plotting the mse (and bias, variance) vs hyperparam
    calculated using bootstrap, where
        ax = matplotlib.axis object
        reg = regression function reg(X, data, hyperparam)
        degree = degree of polynomial
        hyperparams = hyperparameters to plot against
        show_bias_var = if True the bias and variance will also be plotted
    """

    boot_mse = np.zeros(len(hyperparams))
    boot_bias = np.zeros(len(hyperparams))
    boot_r2 = np.zeros(len(hyperparams))
    boot_var = np.zeros(len(hyperparams))

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    counter = 0
    for hyperparam in hyperparams:
        [mse, r2, bias, var] = pf.bootstrap(x_train,
                                            x_test,
                                            y_train,
                                            y_test,
                                            z_train,
                                            z_test,
                                            reg,
                                            degree=degree,
                                            hyperparam=hyperparam)

        boot_mse[counter] = mse
        boot_r2[counter] = r2
        boot_bias[counter] = bias
        boot_var[counter] = var

        counter += 1

    #Plot mse
    ax.plot(hyperparams, boot_mse, label='MSE', **kwargs)

    #Plots bias and variance if show_bias_var is True
    if show_bias_var:
        ax.plot(hyperparams, boot_var, label='variance', ls='--', **kwargs)
        ax.plot(hyperparams, boot_bias, label='bias^2', ls='--', **kwargs)
Exemple #6
0
def plot_test_vs_degree_multiple_lambda(ax,
                                        x,
                                        y,
                                        z,
                                        reg,
                                        max_degree,
                                        hyperparams,
                                        return_minimum=True,
                                        **kwargs):
    """
    Function for plotting the mse vs complexity for multiple lambda
    calculated using bootstrap, where
        ax = matplotlib.axis object
        reg = regression function reg(X, data, hyperparam)
        max_degree = maximum degree of polynomial
        hyperparaml = list of hyperparameters for model

    """
    degrees = np.arange(0, max_degree + 1)

    k_fold_mse = np.zeros(len(degrees))
    k_fold_bias = np.zeros(len(degrees))
    k_fold_r2 = np.zeros(len(degrees))
    k_fold_var = np.zeros(len(degrees))

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    if return_minimum:
        error = np.zeros((len(degrees), len(hyperparams)))

    hyper_index = 0

    for hyperparam in hyperparams:
        for degree in degrees:
            [mse, r2, bias, var] = pf.bootstrap(x_train,
                                                x_test,
                                                y_train,
                                                y_test,
                                                z_train,
                                                z_test,
                                                reg,
                                                degree=degree,
                                                hyperparam=hyperparam)

            k_fold_mse[degree] = mse
            k_fold_r2[degree] = r2
            k_fold_bias[degree] = bias
            k_fold_var[degree] = var

            if return_minimum:
                error[degree, hyper_index] = mse
        hyper_index += 1

        #Plot mse
        ax.plot(degrees,
                k_fold_mse,
                label=f"$\lambda$={hyperparam:.2g}",
                **kwargs)
    if return_minimum:
        index = (np.array(np.where(error == error.min())).flatten())
        return [error.min(), degrees[index[0]], hyperparams[index[1]]]
Exemple #7
0
def plot_test_vs_degree_boot(ax,
                             x,
                             y,
                             z,
                             reg,
                             max_degree,
                             hyperparam,
                             show_bias_var=False,
                             plot_r2=False,
                             return_minimum=True,
                             **kwargs):
    """
    Function for plotting the mse (and bias, variance) vs complexity
    calculated using bootstrap, where
        ax = matplotlib.axis object
        reg = regression function reg(X, data, hyperparam)
        max_degree = maximum degree of polynomial
        hyperparam = hyperparameter for model
        show_bias_var = if True the bias and variance will also be plotted
    """
    degrees = np.arange(0, max_degree + 1)

    boot_error = np.zeros(len(degrees))
    boot_bias = np.zeros(len(degrees))
    boot_var = np.zeros(len(degrees))

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    for degree in degrees:
        [mse, r2, bias, var] = pf.bootstrap(x_train,
                                            x_test,
                                            y_train,
                                            y_test,
                                            z_train,
                                            z_test,
                                            reg,
                                            degree=degree,
                                            hyperparam=hyperparam)

        boot_bias[degree] = bias
        boot_var[degree] = var

        if plot_r2:
            boot_error[degree] = r2
            label = 'r2 test'

        else:
            boot_error[degree] = mse
            label = 'MSE test'

    if show_bias_var:
        label = 'MSE'

    #Plot mse
    ax.plot(degrees, boot_error, label=label, **kwargs)

    #Plots bias and variance if show_bias_var is True
    if show_bias_var:
        ax.plot(degrees, boot_var, label='variance', ls='--', **kwargs)
        ax.plot(degrees, boot_bias, label='bias$^2$', ls='--', **kwargs)
    if return_minimum:
        return [min(boot_error), np.argmin(boot_error)]