def Ridge_unit_test(min_deg=2, max_deg=5, tol=1e-6):
    """
    Tests our implementation of Ridge against sci-kit learn up to a given tolerance
    """
    n = 100  # Number of data points
    # Prepare data set
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2
    degrees = np.arange(min_deg, max_deg + 1)
    for deg in degrees:
        # Set up design matrix
        X = linear_regression.design_matrix_2D(x, y, 5)
        for lamb in np.linspace(0, 1, 10):
            # Compute optimal parameters using our homegrown Ridge regression
            beta = linear_regression.Ridge_2D(X=X, z=z, lamb=lamb)
            # Compute optimal parameters using sklearn
            skl_reg = Ridge(alpha=lamb, fit_intercept=False).fit(X, z)
            beta_skl = skl_reg.coef_

            for i in range(len(beta)):
                if abs(beta[i] - beta_skl[i]) < tol:
                    pass
                else:
                    print(
                        "Warning! mismatch with SKL in Ridge_2D_unit_test with tol = %.0e"
                        % tol)
                    print("Parameter no. %i for deg = %i" % (i, deg))
                    print("-> (OUR) beta = %8.12f" % beta[i])
                    print("-> (SKL) beta = %8.12f" % beta_skl[i])
    return
Example #2
0
def OLS_unit_test(min_deg=0, max_deg=15, tol=1e-6):
    n = 100  # Number of data points
    # Prepare data set
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2
    degrees = np.arange(min_deg, max_deg + 1)
    for deg in degrees:
        # Set up design matrix
        X = linear_regression.design_matrix(x, y, 5)
        # Compute optimal parameters using our homegrown OLS
        beta = linear_regression.OLS(X=X, z=z)
        # Compute optimal parameters using sklearn
        skl_reg = LinearRegression(fit_intercept=False).fit(X, z)
        beta_skl = skl_reg.coef_

        for i in range(len(beta)):
            if abs(beta[i] - beta_skl[i]) < tol:
                pass
            else:
                print("Warning! mismatch with SKL in OLS_unit_test with tol = %.0e" % tol)
                print("Parameter no. %i for deg = %i" % (i, deg))
                print("-> (OUR) beta = %8.12f" % beta[i])
                print("-> (SKL) beta = %8.12f" % beta_skl[i])
    return
sys.path.insert(0, "../")

import linear_regression
import utils
import stat_tools
import crossvalidation
import bootstrap
from FrankeFunction import FrankeFunction

utils.plot_settings()  # LaTeX fonts in Plots!

n = 500
noise_scale = 0.2
x = np.random.uniform(0, 1, n)
y = np.random.uniform(0, 1, n)
z = FrankeFunction(x, y)
# Adding standard normal noise:
z = z + noise_scale * np.random.normal(0, 1, len(z))
max_degree = 15
n_lambdas = 30
n_bootstraps = 100
k_folds = 5
lambdas = np.logspace(-5, 0, n_lambdas)
subset_lambdas = lambdas[::60]

x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
    x, y, z, test_size=0.2)

#   Centering the response
z_intercept = np.mean(z)
z = z - z_intercept
Example #4
0
def franke_analysis_plots(
    n=1000,
    noise_scale=0.2,
    max_degree=20,
    n_bootstraps=100,
    k_folds=5,
    n_lambdas=30,
    do_boot=True,
    do_subset=True,
):

    # Note that max_degrees is the number of degrees, i.e. including 0.

    # n = 500
    # noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale * np.random.normal(0, 1, len(z))
    # max_degree = 15
    # n_lambdas = 30
    # n_bootstraps = 100
    # k_folds = 5
    lambdas = np.logspace(-6, 0, n_lambdas)
    subset_lambdas = lambdas[::12]

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    #   Centering the response
    z_intercept = np.mean(z)
    z = z - z_intercept

    #   Centering the response
    z_train_intercept = np.mean(z_train)
    z_train = z_train - z_train_intercept
    z_test = z_test - z_train_intercept

    ########### Setup of problem is completed above.

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x, y, degree)
        X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
        X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)
        # Scaling and feeding to CV.
        scaler = StandardScaler()
        scaler.fit(X)
        X_scaled = scaler.transform(X)
        #    X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants...

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        #    X_train_scaled[:,0] = 1 #maybe not for ridge+lasso
        #    X_test_scaled[:,0] = 1 #maybe not for ridge+lasso

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)

        # CV, find best lambdas and get mse vs lambda for given degree. Also, gets
        # ols_CV_MSE

        lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all(
            X_scaled, z, n_lambdas, lambdas, k_folds)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse
        ols_cv_mse[degree] = ols_cv_mse_deg

        if do_boot:
            # All regression bootstraps at once
            lamb_ridge = best_ridge_lambda[degree]
            lamb_lasso = best_lasso_lambda[degree]

            (
                ridge_mse,
                ridge_bias,
                ridge_variance,
                lasso_mse,
                lasso_bias,
                lasso_variance,
                ols_mse,
                ols_bias,
                ols_variance,
            ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train,
                                        z_test, n_bootstraps, lamb_lasso,
                                        lamb_ridge)

            (
                ridge_best_lambda_boot_mse[degree],
                ridge_best_lambda_boot_bias[degree],
                ridge_best_lambda_boot_variance[degree],
            ) = (ridge_mse, ridge_bias, ridge_variance)

            (
                lasso_best_lambda_boot_mse[degree],
                lasso_best_lambda_boot_bias[degree],
                lasso_best_lambda_boot_variance[degree],
            ) = (lasso_mse, lasso_bias, lasso_variance)

            ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[
                degree] = (
                    ols_mse,
                    ols_bias,
                    ols_variance,
                )

        if do_subset:
            # Bootstrapping for a selection of lambdas for ridge and lasso
            subset_lambda_index = 0
            for lamb in subset_lambdas:

                (
                    ridge_mse,
                    ridge_bias,
                    ridge_variance,
                    lasso_mse,
                    lasso_bias,
                    lasso_variance,
                ) = bootstrap.bootstrap_ridge_lasso(
                    X_train_scaled,
                    X_test_scaled,
                    z_train,
                    z_test,
                    n_bootstraps,
                    lamb_lasso,
                    lamb_ridge,
                )

                (
                    ridge_subset_lambda_boot_mse[degree, subset_lambda_index],
                    ridge_subset_lambda_boot_bias[degree, subset_lambda_index],
                    ridge_subset_lambda_boot_variance[degree,
                                                      subset_lambda_index],
                ) = (ridge_mse, ridge_bias, ridge_variance)

                (
                    lasso_subset_lambda_boot_mse[degree, subset_lambda_index],
                    lasso_subset_lambda_boot_bias[degree, subset_lambda_index],
                    lasso_subset_lambda_boot_variance[degree,
                                                      subset_lambda_index],
                ) = (lasso_mse, lasso_bias, lasso_variance)

                subset_lambda_index += 1

    # Plots go here.

    # CV MSE for OLS:
    plt.figure()
    plt.semilogy(ols_cv_mse)
    plt.title("OLS CV MSE")
    plt.show()

    # Bootstrap for OLS:
    plt.figure()
    plt.semilogy(ols_boot_mse, label="mse")
    plt.semilogy(ols_boot_bias, label="bias")
    plt.semilogy(ols_boot_variance, label="variance")
    plt.title("OLS bias-variance-MSE by bootstrap")
    plt.legend()
    plt.show()

    # CV for Ridge, best+low+middle+high lambdas
    plt.figure()
    plt.semilogy(best_ridge_mse, label="best for each degree")
    plt.semilogy(ridge_lamb_deg_mse[:, 0],
                 label="lambda={}".format(lambdas[0]))
    plt.semilogy(ridge_lamb_deg_mse[:, 12],
                 label="lambda={}".format(lambdas[12]))
    plt.semilogy(ridge_lamb_deg_mse[:, 24],
                 label="lambda={}".format(lambdas[24]))
    plt.title(
        "Ridge CV MSE for best lambda at each degree, plus for given lambdas across all degrees"
    )
    plt.legend()
    plt.show()

    # Bootstrap for the best ridge lambdas:
    plt.figure()
    plt.semilogy(ridge_best_lambda_boot_mse, label="mse")
    plt.semilogy(ridge_best_lambda_boot_bias, label="bias")
    plt.semilogy(ridge_best_lambda_boot_variance, label="variance")
    plt.title("Best ridge lambdas for each degree bootstrap")
    plt.legend()
    plt.show()

    # Bootstrap only bias and variance for low+middle+high ridge lambdas

    plt.figure()
    plt.semilogy(ridge_subset_lambda_boot_bias[:, 0],
                 label="bias, lambda = {}".format(subset_lambdas[0]))
    plt.semilogy(
        ridge_subset_lambda_boot_variance[:, 0],
        label="variance, lambda = {}".format(subset_lambdas[0]),
    )
    plt.semilogy(ridge_subset_lambda_boot_bias[:, 1],
                 label="bias, lambda = {}".format(subset_lambdas[1]))
    plt.semilogy(
        ridge_subset_lambda_boot_variance[:, 1],
        label="variance, lambda = {}".format(subset_lambdas[1]),
    )
    plt.semilogy(ridge_subset_lambda_boot_bias[:, 2],
                 label="bias, lambda = {}".format(subset_lambdas[2]))
    plt.semilogy(
        ridge_subset_lambda_boot_variance[:, 2],
        label="variance, lambda = {}".format(subset_lambdas[2]),
    )
    plt.title("Bias+variance for low, middle, high ridge lambdas")
    plt.legend()
    plt.show()

    # CV for lasso, best+low+middle+high lambdas
    plt.figure()
    plt.semilogy(best_lasso_mse, label="best lambda for each degree")
    plt.semilogy(lasso_lamb_deg_mse[:, 0],
                 label="lambda={}".format(lambdas[0]))
    plt.semilogy(lasso_lamb_deg_mse[:, 12],
                 label="lambda={}".format(lambdas[12]))
    plt.semilogy(lasso_lamb_deg_mse[:, 24],
                 label="lambda={}".format(lambdas[24]))
    plt.title(
        "Lasso CV MSE for best lambda at each degree, plus for given lambdas across all degrees"
    )
    plt.legend()
    plt.show()

    # Bootstrap for the best lasso lambdas:
    plt.figure()
    plt.semilogy(lasso_best_lambda_boot_mse, label="mse")
    plt.semilogy(lasso_best_lambda_boot_bias, label="bias")
    plt.semilogy(lasso_best_lambda_boot_variance, label="variance")
    plt.title("Best lasso lambdas for each degree bootstrap")
    plt.legend()
    plt.show()

    # Bootstrap only bias and variance for low+middle+high lasso lambdas

    plt.figure()
    plt.semilogy(lasso_subset_lambda_boot_bias[:, 0],
                 label="bias, lambda = {}".format(subset_lambdas[0]))
    plt.semilogy(
        lasso_subset_lambda_boot_variance[:, 0],
        label="variance, lambda = {}".format(subset_lambdas[0]),
    )
    plt.semilogy(lasso_subset_lambda_boot_bias[:, 1],
                 label="bias, lambda = {}".format(subset_lambdas[1]))
    plt.semilogy(
        lasso_subset_lambda_boot_variance[:, 1],
        label="variance, lambda = {}".format(subset_lambdas[1]),
    )
    plt.semilogy(lasso_subset_lambda_boot_bias[:, 2],
                 label="bias, lambda = {}".format(subset_lambdas[2]))
    plt.semilogy(
        lasso_subset_lambda_boot_variance[:, 2],
        label="variance, lambda = {}".format(subset_lambdas[2]),
    )
    plt.title("Bias+variance for low, middle, high lasso lambdas")
    plt.legend()
    plt.show()

    # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8

    plt.figure()
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 1],
        label="degree = {}".format(max_degree - 1),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 2],
        label="degree = {}".format(max_degree - 2),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 3],
        label="degree = {}".format(max_degree - 3),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 5],
        label="degree = {}".format(max_degree - 5),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 7],
        label="degree = {}".format(max_degree - 7),
    )
    plt.legend()
    plt.show()

    # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8.

    plt.figure()
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 1],
        label="degree = {}".format(max_degree - 1),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 2],
        label="degree = {}".format(max_degree - 2),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 3],
        label="degree = {}".format(max_degree - 3),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 5],
        label="degree = {}".format(max_degree - 5),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 7],
        label="degree = {}".format(max_degree - 7),
    )
    plt.legend()
    plt.show()

    print("best ridge lambda:")
    print(best_ridge_lambda)
    print("best lasso lambda:")
    print(best_lasso_lambda)
    return
from sklearn.linear_model import Lasso
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler


np.random.seed(16091995)

n_datapoints = 1000
bootstraps = 100

x = np.random.rand(n_datapoints)
y = np.random.rand(n_datapoints)
z = FrankeFunction(x, y) + 0.05*np.random.normal(0, 1, n_datapoints)


p_min = 20
p_max = 33
polynomial_degrees = np.arange(p_min, p_max + 1, 1)

lambdas = np.logspace(-20, -6, 10)

x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size = 0.2)



scaler = StandardScaler()

def franke_predictions(n=1000,
                       noise_scale=0.2,
                       degree=20,
                       ridge_lambda=1e-2,
                       lasso_lambda=1e-5,
                       plot_grid_size=2000):
    """For a given sample size n, noise_scale, max_degree and penalty parameters: produces ols,
    ridge and lasso predictions, as well as ground truth on a plotting meshgrid with input grid size.

    output:
        x_plot_mesh: meshgrid of x-coordinates
        y_plot_mesh: meshgrid of y-coordinates
        z_predict_ols: ols prediction of z on the meshgrid
        z_predict_ridge: ridge prediction of z on the meshgrid
        z_predict_lasso: lasso prediction of z on the meshgrid
        z_plot_franke: Actual Franke values on the meshgrid.

    """
    np.random.seed(2018)
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale * np.random.normal(0, 1, len(z))
    #   Centering the response
    z_intercept = np.mean(z)
    z = z - z_intercept
    # Scaling
    X = linear_regression.design_matrix_2D(x, y, degree)
    scaler = StandardScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)
    X_scaled = X_scaled[:, 1:]

    # Setting up plotting grid
    x_plot = np.linspace(0, 1, plot_grid_size)
    y_plot = np.linspace(0, 1, plot_grid_size)
    x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot, y_plot)
    x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(
    ), y_plot_mesh.flatten()

    X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,
                                                       y_plot_mesh_flat,
                                                       degree)
    X_plot_design_scaled = scaler.transform(X_plot_design)
    X_plot_design_scaled = X_plot_design_scaled[:, 1:]

    z_plot_franke = FrankeFunction(x_plot_mesh, y_plot_mesh)

    # OLS
    betas = linear_regression.OLS_SVD_2D(X_scaled, z)
    z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept
    z_predict_ols = z_predict_flat_ols.reshape(plot_grid_size, -1)

    # Ridge

    betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda)
    z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept
    z_predict_ridge = z_predict_flat_ridge.reshape(plot_grid_size, -1)
    # Lasso

    clf_Lasso = skl.Lasso(alpha=lasso_lambda,
                          fit_intercept=False,
                          max_iter=10000).fit(X_scaled, z)
    z_predict_flat_lasso = clf_Lasso.predict(
        X_plot_design_scaled) + z_intercept
    z_predict_lasso = z_predict_flat_lasso.reshape(plot_grid_size, -1)

    return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_plot_franke