Example #1
0
def franke_predictions(n=1000, noise_scale=0.2, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5, plot_grid_size=2000):
    """ For a given sample size n, noise_scale, max_degree and penalty parameters: produces ols,
        ridge and lasso predictions, as well as ground truth on a plotting meshgrid with input grid size.

        output:
            x_plot_mesh: meshgrid of x-coordinates
            y_plot_mesh: meshgrid of y-coordinates
            z_predict_ols: ols prediction of z on the meshgrid
            z_predict_ridge: ridge prediction of z on the meshgrid
            z_predict_lasso: lasso prediction of z on the meshgrid
            z_plot_franke: Actual Franke values on the meshgrid.

    """

    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale*np.random.normal(0,1,len(z))
    #   Centering the response
    z_intercept = np.mean(z)
    z = z - z_intercept
    # Scaling
    X = linear_regression.design_matrix_2D(x,y,degree)
    scaler = StandardScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    #Setting up plotting grid
    x_plot = np.linspace(0,1,plot_grid_size)
    y_plot = np.linspace(0,1,plot_grid_size)
    x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot,y_plot)
    x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(), y_plot_mesh.flatten()

    X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,y_plot_mesh_flat,degree)
    X_plot_design_scaled = scaler.transform(X_plot_design)


    z_plot_franke = FrankeFunction(x_plot_mesh, y_plot_mesh)

    # OLS
    betas = linear_regression.OLS_SVD_2D(X_scaled, z)
    z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept
    z_predict_ols = z_predict_flat_ols.reshape(plot_grid_size,-1)

    # Ridge

    betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda)
    z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept
    z_predict_ridge = z_predict_flat_ridge.reshape(plot_grid_size,-1)
    # Lasso

    clf_Lasso = skl.Lasso(alpha=lasso_lambda,fit_intercept=False, max_iter=10000).fit(X_scaled,z)
    z_predict_flat_lasso = clf_Lasso.predict(X_plot_design_scaled) + z_intercept
    z_predict_lasso = z_predict_flat_lasso.reshape(plot_grid_size,-1)

    return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_plot_franke
def Ridge_unit_test(min_deg=2, max_deg=5, tol=1e-6):
    """
    Tests our implementation of Ridge against sci-kit learn up to a given tolerance
    """
    n = 100  # Number of data points
    # Prepare data set
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2
    degrees = np.arange(min_deg, max_deg + 1)
    for deg in degrees:
        # Set up design matrix
        X = linear_regression.design_matrix_2D(x, y, 5)
        for lamb in np.linspace(0, 1, 10):
            # Compute optimal parameters using our homegrown Ridge regression
            beta = linear_regression.Ridge_2D(X=X, z=z, lamb=lamb)
            # Compute optimal parameters using sklearn
            skl_reg = Ridge(alpha=lamb, fit_intercept=False).fit(X, z)
            beta_skl = skl_reg.coef_

            for i in range(len(beta)):
                if abs(beta[i] - beta_skl[i]) < tol:
                    pass
                else:
                    print(
                        "Warning! mismatch with SKL in Ridge_2D_unit_test with tol = %.0e"
                        % tol)
                    print("Parameter no. %i for deg = %i" % (i, deg))
                    print("-> (OUR) beta = %8.12f" % beta[i])
                    print("-> (SKL) beta = %8.12f" % beta_skl[i])
    return
def OLS_unit_test(min_deg=2, max_deg=5, tol=1e-6):
    n = 100  # Number of data points
    # Prepare data set
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2
    degrees = np.arange(min_deg, max_deg + 1)
    for deg in degrees:
        # Set up design matrix
        X = linear_regression.design_matrix_2D(x, y, 5)
        # Compute optimal parameters using our homegrown OLS
        beta = linear_regression.OLS_2D(X=X, z=z)
        # Compute optimal parameters using sklearn
        skl_reg = LinearRegression(fit_intercept=False).fit(X, z)
        beta_skl = skl_reg.coef_

        for i in range(len(beta)):
            if abs(beta[i] - beta_skl[i]) < tol:
                pass
            else:
                print(
                    "Warning! mismatch with SKL in OLS_unit_test with tol = %.0e" % tol
                )
                print("Parameter no. %i for deg = %i" % (i, deg))
                print("-> (OUR) beta = %8.12f" % beta[i])
                print("-> (SKL) beta = %8.12f" % beta_skl[i])
    return
Example #4
0
def terrain_analysis_plots(
    spacing=100,
    max_degree=20,
    n_lambdas=30,
    k_folds=5,
    n_bootstraps=50,
    do_boot=False,
    do_subset=False,
):

    # Setting up the terrain data:
    # Note structure! X-coordinates are on the rows of terrain_data
    # Point_selection.flatten() moves most rapidly over the x-coordinates
    # Meshgrids flattened also move most rapidly over the x-coordinates. Thus
    # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data

    terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif")
    point_selection = terrain_data[:1801:spacing, :1801:
                                   spacing]  # Make square and downsample
    x_terrain_selection = np.linspace(0, 1, point_selection.shape[1])
    y_terrain_selection = np.linspace(0, 1, point_selection.shape[0])
    X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection,
                                                       y_terrain_selection)
    z_terrain_selection = point_selection.flatten()  # the response values
    x_terrain_selection_flat = X_coord_selection.flatten(
    )  # the first degree feature variables
    y_terrain_selection_flat = Y_coord_selection.flatten(
    )  # the first degree feature variables

    lambdas = np.logspace(-6, 0, n_lambdas)
    subset_lambdas = lambdas[::12]

    x = x_terrain_selection_flat
    y = y_terrain_selection_flat
    z = z_terrain_selection

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    # Centering
    z_intercept = np.mean(z)
    z = z - z_intercept

    z_train_intercept = np.mean(z_train)
    z_train = z_train - z_train_intercept
    z_test = z_test - z_train_intercept

    ##### Setup of problem is completede above.

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x, y, degree)
        X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
        X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)
        # Scaling and feeding to CV.

        scaler = StandardScaler()
        scaler.fit(X)
        X_scaled = scaler.transform(X)
        #    X_scaled[:,0] = 1 # Probably should not have this.

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        #    X_train_scaled[:,0] = 1 # Probably actually not
        #    X_test_scaled[:,0] = 1 # Have a bad feeling about how this might affect ridge/lasso.

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)

        # CV, find best lambdas and get mse vs lambda for given degree.

        lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all(
            X_scaled, z, n_lambdas, lambdas, k_folds)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse
        ols_cv_mse[degree] = ols_cv_mse_deg

        if do_boot:
            # All regression bootstraps at once

            lamb_ridge = best_ridge_lambda[degree]
            lamb_lasso = best_lasso_lambda[degree]

            (
                ridge_mse,
                ridge_bias,
                ridge_variance,
                lasso_mse,
                lasso_bias,
                lasso_variance,
                ols_mse,
                ols_bias,
                ols_variance,
            ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train,
                                        z_test, n_bootstraps, lamb_lasso,
                                        lamb_ridge)

            (
                ridge_best_lambda_boot_mse[degree],
                ridge_best_lambda_boot_bias[degree],
                ridge_best_lambda_boot_variance[degree],
            ) = (ridge_mse, ridge_bias, ridge_variance)

            (
                lasso_best_lambda_boot_mse[degree],
                lasso_best_lambda_boot_bias[degree],
                lasso_best_lambda_boot_variance[degree],
            ) = (lasso_mse, lasso_bias, lasso_variance)

            ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[
                degree] = (
                    ols_mse,
                    ols_bias,
                    ols_variance,
                )

        if do_subset:
            # Bootstrapping for a selection of lambdas for ridge and lasso
            subset_lambda_index = 0
            for lamb in subset_lambdas:

                (
                    ridge_mse,
                    ridge_bias,
                    ridge_variance,
                    lasso_mse,
                    lasso_bias,
                    lasso_variance,
                ) = bootstrap.bootstrap_ridge_lasso(
                    X_train_scaled,
                    X_test_scaled,
                    z_train,
                    z_test,
                    n_bootstraps,
                    lamb_lasso,
                    lamb_ridge,
                )

                (
                    ridge_subset_lambda_boot_mse[degree, subset_lambda_index],
                    ridge_subset_lambda_boot_bias[degree, subset_lambda_index],
                    ridge_subset_lambda_boot_variance[degree,
                                                      subset_lambda_index],
                ) = (ridge_mse, ridge_bias, ridge_variance)

                (
                    lasso_subset_lambda_boot_mse[degree, subset_lambda_index],
                    lasso_subset_lambda_boot_bias[degree, subset_lambda_index],
                    lasso_subset_lambda_boot_variance[degree,
                                                      subset_lambda_index],
                ) = (lasso_mse, lasso_bias, lasso_variance)

                subset_lambda_index += 1

    # Plots go here.

    plt.figure()
    plt.semilogy(ols_cv_mse, label="ols")
    plt.semilogy(best_ridge_mse, label="ridge")
    plt.semilogy(best_lasso_mse, label="lasso")
    plt.title(
        "CV MSE for OLS, Ridge and Lasso, with the best lambdas for each degree"
    )
    plt.legend()
    plt.show()

    # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8

    plt.figure()
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 1],
        label="degree = {}".format(max_degree - 1),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 2],
        label="degree = {}".format(max_degree - 2),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 3],
        label="degree = {}".format(max_degree - 3),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 5],
        label="degree = {}".format(max_degree - 5),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 7],
        label="degree = {}".format(max_degree - 7),
    )
    plt.legend()
    plt.show()

    # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8.

    plt.figure()
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 1],
        label="degree = {}".format(max_degree - 1),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 2],
        label="degree = {}".format(max_degree - 2),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 3],
        label="degree = {}".format(max_degree - 3),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 5],
        label="degree = {}".format(max_degree - 5),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 7],
        label="degree = {}".format(max_degree - 7),
    )
    plt.legend()
    plt.show()

    print("best ridge lambdas:")
    print(best_ridge_lambda)
    print("best lasso lambda")
    print(best_lasso_lambda)

    return
Example #5
0
def terrain_predictions(spacing=40,
                        degree=20,
                        ridge_lambda=1e-2,
                        lasso_lambda=1e-5):
    """For a given sampling spacing, degree and penalty parameters: produces ols,
    ridge and lasso predictions, as well as ground truth on a plotting meshgrid.

    output:
        x_plot_mesh: meshgrid of x-coordinates
        y_plot_mesh: meshgrid of y-coordinates
        z_predict_ols: ols prediction of z on the meshgrid
        z_predict_ridge: ridge prediction of z on the meshgrid
        z_predict_lasso: lasso prediction of z on the meshgrid
        z_true: Actual terrain values on the meshgrid.

    """
    # #control variables, resticted to upper half of plot currently.
    # spacing = 10
    # degree = 25
    # ridge_lambda = 1e-2
    # lasso_lambda = 1e-5

    # Setting up the terrain data:
    # Note structure! X-coordinates are on the rows of terrain_data
    # Point_selection.flatten() moves most rapidly over the x-coordinates
    # Meshgrids flattened also move most rapidly over the x-coordinates. Thus
    # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data
    terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif")
    point_selection = terrain_data[:1801:spacing, :1801:
                                   spacing]  # Make quadratic and downsample
    x_terrain_selection = np.linspace(0, 1, point_selection.shape[1])
    y_terrain_selection = np.linspace(0, 1, point_selection.shape[0])
    X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection,
                                                       y_terrain_selection)
    z_terrain_selection = point_selection.flatten()  # the response values
    x_terrain_selection_flat = X_coord_selection.flatten(
    )  # the first degree feature variables
    y_terrain_selection_flat = Y_coord_selection.flatten(
    )  # the first degree feature variables

    x = x_terrain_selection_flat
    y = y_terrain_selection_flat
    z = z_terrain_selection

    # Centering
    z_intercept = np.mean(z)
    z = z - z_intercept
    # Scaling
    X = linear_regression.design_matrix_2D(x, y, degree)
    scaler = StandardScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    x_plot = np.linspace(0, 1, 1801)
    y_plot = np.linspace(0, 1, 1801)
    x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot, y_plot)
    x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(
    ), y_plot_mesh.flatten()

    X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,
                                                       y_plot_mesh_flat,
                                                       degree)
    X_plot_design_scaled = scaler.transform(X_plot_design)

    # Ground truth

    z_true = terrain_data[:1801, :1801]

    # OLS
    betas = linear_regression.OLS_SVD_2D(X_scaled, z)
    z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept
    z_predict_ols = z_predict_flat_ols.reshape(-1, 1801)

    # Ridge
    betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda)
    z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept
    z_predict_ridge = z_predict_flat_ridge.reshape(-1, 1801)
    # Lasso

    clf_Lasso = skl.Lasso(alpha=lasso_lambda,
                          fit_intercept=False).fit(X_scaled, z)
    z_predict_flat_lasso = clf_Lasso.predict(
        X_plot_design_scaled) + z_intercept
    z_predict_lasso = z_predict_flat_lasso.reshape(-1, 1801)

    return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_true
lasso_best_lambda_boot_bias = np.zeros(max_degree)
lasso_best_lambda_boot_variance = np.zeros(max_degree)

ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas)))
lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas)))

# Actual computations
for degree in range(max_degree):
    X = linear_regression.design_matrix_2D(x,y,degree)
    X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
    X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)
    # Scaling and feeding to CV.
    scaler = StandardScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)
#    X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants...




    # Scaling and feeding to bootstrap and OLS
    scaler_boot = StandardScaler()
    scaler_boot.fit(X_train)
    X_train_scaled = scaler_boot.transform(X_train)
Example #7
0
def franke_analysis_plots(
    n=1000,
    noise_scale=0.2,
    max_degree=20,
    n_bootstraps=100,
    k_folds=5,
    n_lambdas=30,
    do_boot=True,
    do_subset=True,
):

    # Note that max_degrees is the number of degrees, i.e. including 0.

    # n = 500
    # noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale * np.random.normal(0, 1, len(z))
    # max_degree = 15
    # n_lambdas = 30
    # n_bootstraps = 100
    # k_folds = 5
    lambdas = np.logspace(-6, 0, n_lambdas)
    subset_lambdas = lambdas[::12]

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    #   Centering the response
    z_intercept = np.mean(z)
    z = z - z_intercept

    #   Centering the response
    z_train_intercept = np.mean(z_train)
    z_train = z_train - z_train_intercept
    z_test = z_test - z_train_intercept

    ########### Setup of problem is completed above.

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x, y, degree)
        X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
        X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)
        # Scaling and feeding to CV.
        scaler = StandardScaler()
        scaler.fit(X)
        X_scaled = scaler.transform(X)
        #    X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants...

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        #    X_train_scaled[:,0] = 1 #maybe not for ridge+lasso
        #    X_test_scaled[:,0] = 1 #maybe not for ridge+lasso

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)

        # CV, find best lambdas and get mse vs lambda for given degree. Also, gets
        # ols_CV_MSE

        lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all(
            X_scaled, z, n_lambdas, lambdas, k_folds)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse
        ols_cv_mse[degree] = ols_cv_mse_deg

        if do_boot:
            # All regression bootstraps at once
            lamb_ridge = best_ridge_lambda[degree]
            lamb_lasso = best_lasso_lambda[degree]

            (
                ridge_mse,
                ridge_bias,
                ridge_variance,
                lasso_mse,
                lasso_bias,
                lasso_variance,
                ols_mse,
                ols_bias,
                ols_variance,
            ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train,
                                        z_test, n_bootstraps, lamb_lasso,
                                        lamb_ridge)

            (
                ridge_best_lambda_boot_mse[degree],
                ridge_best_lambda_boot_bias[degree],
                ridge_best_lambda_boot_variance[degree],
            ) = (ridge_mse, ridge_bias, ridge_variance)

            (
                lasso_best_lambda_boot_mse[degree],
                lasso_best_lambda_boot_bias[degree],
                lasso_best_lambda_boot_variance[degree],
            ) = (lasso_mse, lasso_bias, lasso_variance)

            ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[
                degree] = (
                    ols_mse,
                    ols_bias,
                    ols_variance,
                )

        if do_subset:
            # Bootstrapping for a selection of lambdas for ridge and lasso
            subset_lambda_index = 0
            for lamb in subset_lambdas:

                (
                    ridge_mse,
                    ridge_bias,
                    ridge_variance,
                    lasso_mse,
                    lasso_bias,
                    lasso_variance,
                ) = bootstrap.bootstrap_ridge_lasso(
                    X_train_scaled,
                    X_test_scaled,
                    z_train,
                    z_test,
                    n_bootstraps,
                    lamb_lasso,
                    lamb_ridge,
                )

                (
                    ridge_subset_lambda_boot_mse[degree, subset_lambda_index],
                    ridge_subset_lambda_boot_bias[degree, subset_lambda_index],
                    ridge_subset_lambda_boot_variance[degree,
                                                      subset_lambda_index],
                ) = (ridge_mse, ridge_bias, ridge_variance)

                (
                    lasso_subset_lambda_boot_mse[degree, subset_lambda_index],
                    lasso_subset_lambda_boot_bias[degree, subset_lambda_index],
                    lasso_subset_lambda_boot_variance[degree,
                                                      subset_lambda_index],
                ) = (lasso_mse, lasso_bias, lasso_variance)

                subset_lambda_index += 1

    # Plots go here.

    # CV MSE for OLS:
    plt.figure()
    plt.semilogy(ols_cv_mse)
    plt.title("OLS CV MSE")
    plt.show()

    # Bootstrap for OLS:
    plt.figure()
    plt.semilogy(ols_boot_mse, label="mse")
    plt.semilogy(ols_boot_bias, label="bias")
    plt.semilogy(ols_boot_variance, label="variance")
    plt.title("OLS bias-variance-MSE by bootstrap")
    plt.legend()
    plt.show()

    # CV for Ridge, best+low+middle+high lambdas
    plt.figure()
    plt.semilogy(best_ridge_mse, label="best for each degree")
    plt.semilogy(ridge_lamb_deg_mse[:, 0],
                 label="lambda={}".format(lambdas[0]))
    plt.semilogy(ridge_lamb_deg_mse[:, 12],
                 label="lambda={}".format(lambdas[12]))
    plt.semilogy(ridge_lamb_deg_mse[:, 24],
                 label="lambda={}".format(lambdas[24]))
    plt.title(
        "Ridge CV MSE for best lambda at each degree, plus for given lambdas across all degrees"
    )
    plt.legend()
    plt.show()

    # Bootstrap for the best ridge lambdas:
    plt.figure()
    plt.semilogy(ridge_best_lambda_boot_mse, label="mse")
    plt.semilogy(ridge_best_lambda_boot_bias, label="bias")
    plt.semilogy(ridge_best_lambda_boot_variance, label="variance")
    plt.title("Best ridge lambdas for each degree bootstrap")
    plt.legend()
    plt.show()

    # Bootstrap only bias and variance for low+middle+high ridge lambdas

    plt.figure()
    plt.semilogy(ridge_subset_lambda_boot_bias[:, 0],
                 label="bias, lambda = {}".format(subset_lambdas[0]))
    plt.semilogy(
        ridge_subset_lambda_boot_variance[:, 0],
        label="variance, lambda = {}".format(subset_lambdas[0]),
    )
    plt.semilogy(ridge_subset_lambda_boot_bias[:, 1],
                 label="bias, lambda = {}".format(subset_lambdas[1]))
    plt.semilogy(
        ridge_subset_lambda_boot_variance[:, 1],
        label="variance, lambda = {}".format(subset_lambdas[1]),
    )
    plt.semilogy(ridge_subset_lambda_boot_bias[:, 2],
                 label="bias, lambda = {}".format(subset_lambdas[2]))
    plt.semilogy(
        ridge_subset_lambda_boot_variance[:, 2],
        label="variance, lambda = {}".format(subset_lambdas[2]),
    )
    plt.title("Bias+variance for low, middle, high ridge lambdas")
    plt.legend()
    plt.show()

    # CV for lasso, best+low+middle+high lambdas
    plt.figure()
    plt.semilogy(best_lasso_mse, label="best lambda for each degree")
    plt.semilogy(lasso_lamb_deg_mse[:, 0],
                 label="lambda={}".format(lambdas[0]))
    plt.semilogy(lasso_lamb_deg_mse[:, 12],
                 label="lambda={}".format(lambdas[12]))
    plt.semilogy(lasso_lamb_deg_mse[:, 24],
                 label="lambda={}".format(lambdas[24]))
    plt.title(
        "Lasso CV MSE for best lambda at each degree, plus for given lambdas across all degrees"
    )
    plt.legend()
    plt.show()

    # Bootstrap for the best lasso lambdas:
    plt.figure()
    plt.semilogy(lasso_best_lambda_boot_mse, label="mse")
    plt.semilogy(lasso_best_lambda_boot_bias, label="bias")
    plt.semilogy(lasso_best_lambda_boot_variance, label="variance")
    plt.title("Best lasso lambdas for each degree bootstrap")
    plt.legend()
    plt.show()

    # Bootstrap only bias and variance for low+middle+high lasso lambdas

    plt.figure()
    plt.semilogy(lasso_subset_lambda_boot_bias[:, 0],
                 label="bias, lambda = {}".format(subset_lambdas[0]))
    plt.semilogy(
        lasso_subset_lambda_boot_variance[:, 0],
        label="variance, lambda = {}".format(subset_lambdas[0]),
    )
    plt.semilogy(lasso_subset_lambda_boot_bias[:, 1],
                 label="bias, lambda = {}".format(subset_lambdas[1]))
    plt.semilogy(
        lasso_subset_lambda_boot_variance[:, 1],
        label="variance, lambda = {}".format(subset_lambdas[1]),
    )
    plt.semilogy(lasso_subset_lambda_boot_bias[:, 2],
                 label="bias, lambda = {}".format(subset_lambdas[2]))
    plt.semilogy(
        lasso_subset_lambda_boot_variance[:, 2],
        label="variance, lambda = {}".format(subset_lambdas[2]),
    )
    plt.title("Bias+variance for low, middle, high lasso lambdas")
    plt.legend()
    plt.show()

    # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8

    plt.figure()
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 1],
        label="degree = {}".format(max_degree - 1),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 2],
        label="degree = {}".format(max_degree - 2),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 3],
        label="degree = {}".format(max_degree - 3),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 5],
        label="degree = {}".format(max_degree - 5),
    )
    plt.plot(
        np.log10(lambdas),
        ridge_lamb_deg_mse[max_degree - 7],
        label="degree = {}".format(max_degree - 7),
    )
    plt.legend()
    plt.show()

    # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8.

    plt.figure()
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 1],
        label="degree = {}".format(max_degree - 1),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 2],
        label="degree = {}".format(max_degree - 2),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 3],
        label="degree = {}".format(max_degree - 3),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 5],
        label="degree = {}".format(max_degree - 5),
    )
    plt.plot(
        np.log10(lambdas),
        lasso_lamb_deg_mse[max_degree - 7],
        label="degree = {}".format(max_degree - 7),
    )
    plt.legend()
    plt.show()

    print("best ridge lambda:")
    print(best_ridge_lambda)
    print("best lasso lambda:")
    print(best_lasso_lambda)
    return
Example #8
0
def part_1a():
    # Sample the franke function n times at randomly chosen points
    n = 100
    deg = 5
    noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z_noisy = z + noise_scale * np.random.normal(0, 1, len(z))
    # Making the design matrix
    X = linear_regression.design_matrix_2D(x, y, deg)
    # Find the least-squares solution
    beta = linear_regression.OLS_2D(X, z)
    beta_noisy = linear_regression.OLS_2D(X, z_noisy)

    # Split into training and test data with ratio 0.2
    X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2)
    # Scale data according to sklearn, beware possible problems with intercept and std.
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # For ridge and lasso, lasso directly from sklearn.
    # For given polynomial degree, input X and z. X should be prescaled.

    n_lambdas = 100
    lambdas = np.logspace(-3, 0, n_lambdas)
    k_folds = 5
    ridge_fold_score = np.zeros(n_lambdas, k_folds)
    lasso_fold_score = np.zeros(n_lambdas, k_folds)
    test_list, train_list = k_fold_selection(z, k_folds)
    for i in range(n_lambdas):

        for j in range(k_folds):
            test_ind_cv = test_list[j]
            train_ind_cv = train_list[j]
            X_train_cv = X[train_ind_cv]
            z_train_cv = z[train_ind_cv]
            X_test_cv = X[test_ind_cv]
            z_test_cv = z[test_ind_cv]
            clf_Lasso = skl.Lasso(alpha=lamb).fit(X_train_cv, z_train_cv)
            z_lasso_test = clf_Lasso.predict(X_test_cv)
            ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv,
                                                     lamb)
            z_ridge_test = X_test_cv @ ridge_betas
            ridge_fold_score[i, j] = stat_tools.MSE(z, z_ridge_test)
            lasso_fold_score[i, j] = stat_tools.MSE(z, z_lasso_test)

    lasso_cv_mse = np.mean(lasso_fold_score, axis=1, keepdims=True)
    ridge_cv_mse = np.mean(ridge_fold_score, axis=1, keepdims=True)
    best_lambda_lasso = lambdas[np.argmin(lasso_cv_mse)]
    best_lambda_ridge = lambdas[np.argmin(ridge_cv_mse)]

    # Bootstrap skeleton
    # For given polynomial degree, input X_train, z_train, X_test and z_test.
    # X_train and X_test should be scaled?
    n_bootstraps = 100
    z_boot_model = np.zeros(len(z_test), n_bootstraps)
    for bootstrap_number in range(n_bootstraps):
        # For the number of data value points (len_z) in the training set, pick a random
        # data value (z_train[random]) and its corresponding row in the design matrix
        shuffle = np.random.randint(0, len(z_train), len(z_train))
        X_boot, z_boot = X_train[shuffle], z_train[shuffle]
        betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
        #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
        #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot)
        #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda
        z_boot_model[:, i] = X_test @ betas_boot
    mse, bias, variance = stat_tools.compute_mse_bias_variance(
        z_test, z_boot_model)

    # Check MSE
    print("MSE = %.3f" %
          MSE(z, linear_regression.evaluate_poly_2D(x, y, beta, deg)))
    # And with noise
    print("Including standard normal noise scaled by {}, MSE = {:.3f}".format(
        noise_scale,
        MSE(z_noisy, linear_regression.evaluate_poly_2D(x, y, beta_noisy,
                                                        deg))))
    # Evaluate the Franke function & least-squares
    x = np.linspace(0, 1, 30)
    y = np.linspace(0, 1, 30)
    X, Y = np.meshgrid(x, y)

    z_analytic = FrankeFunction(X, Y)
    z_fit = linear_regression.evaluate_poly_2D(X, Y, beta, deg)
    z_fit_noisy = linear_regression.evaluate_poly_2D(X, Y, beta_noisy, deg)

    fig = plt.figure()

    # Plot the analytic curve
    ax = fig.add_subplot(1, 3, 1, projection="3d")
    ax.set_title("Franke Function")
    ax.view_init(azim=45)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    surf = ax.plot_surface(X, Y, z_analytic, cmap=cm.coolwarm)

    # Plot the fitted curve
    ax = fig.add_subplot(1, 3, 2, projection="3d")
    ax.set_title("OLS")
    ax.view_init(azim=45)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    surf = ax.plot_surface(X, Y, z_fit, cmap=cm.coolwarm)

    # Plot fitted curve, with noisy beta estimates
    ax = fig.add_subplot(1, 3, 3, projection="3d")
    ax.set_title("OLS with noise")
    ax.view_init(azim=45)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    surf = ax.plot_surface(X, Y, z_fit_noisy, cmap=cm.coolwarm)

    plt.show()

    return
Example #9
0
def terrain_analysis():
    # Setting up the terrain data:
    terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif')
    x_terrain = np.arange(terrain_data.shape[1])
    y_terrain = np.arange(terrain_data.shape[0])
    X_coord, Y_coord = np.meshgrid(x_terrain, y_terrain)
    z_terrain = terrain_data.flatten()  # the response values
    x_terrain_flat = X_coord.flatten()  # the first degree feature variables
    y_terrain_flat = Y_coord.flatten()  # the first degree feature variables

    max_degree = 20
    n_lambdas = 30
    n_bootstraps = 50
    k_folds = 5
    lambdas = np.logspace(-3, 0, n_lambdas)
    subset_lambdas = lambdas[::5]

    #### Should select a subset in some manner of the terrain points
    #### Should probably also make the feature variables be float that range from [0,1]

    x = x_terrain_flat[::20]
    y = y_terrain_flat[::20]
    z = z_terrain[::20]

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x, y, degree)
        X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
        X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)
        # Scaling and feeding to CV.

        scaler = StandardScaler()
        scaler.fit(X)
        X_scaled = scaler.transform(X)
        X_scaled[:, 0] = 1  # Probably should not have this.

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        X_train_scaled[:, 0] = 1  # Probably actually not
        X_test_scaled[:,
                      0] = 1  # Have a bad feeling about how this might affect ridge/lasso.

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)

        # CV, find best lambdas and get mse vs lambda for given degree.

        lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all(
            X_scaled, z, n_lambdas, lambdas, k_folds)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse

        # All regression bootstraps at once

        lamb_ridge = best_ridge_lambda[degree]
        lamb_lasso = best_lasso_lambda[degree]

        ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \
        stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge)

        ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \
        ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance

        lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \
        lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance

        ols_boot_mse[degree], ols_boot_bias[degree], \
        ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance

        # Bootstrapping for a selection of lambdas for ridge and lasso
        subset_lambda_index = 0
        for lamb in subset_lambdas:

            ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \
            stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge)

            ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance

            lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance

            subset_lambda_index += 1


################ All necessary computations should have been done above. Below follows
################ the plotting part.

    return
Example #10
0
def franke_analysis():
    n = 1000
    noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale * np.random.normal(0, 1, len(z))
    max_degree = 20
    n_lambdas = 30
    n_bootstraps = 50
    k_folds = 5
    lambdas = np.logspace(-3, 0, n_lambdas)
    subset_lambdas = lambdas[::5]

    x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(
        x, y, z, test_size=0.2)

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros(
        (max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x, y, degree)
        X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
        X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)
        # Scaling and feeding to CV.
        scaler = StandardScaler()
        scaler.fit(X)
        X_scaled = scaler.transform(X)
        X_scaled[:,
                 0] = 1  # Maybe not for ridge+lasso. Don't want to penalize constants...

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        X_train_scaled[:, 0] = 1  #maybe not for ridge+lasso
        X_test_scaled[:, 0] = 1  #maybe not for ridge+lasso

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)

        # CV, find best lambdas and get mse vs lambda for given degree. Also, gets
        # ols_CV_MSE

        lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all(
            X_scaled, z, n_lambdas, lambdas, k_folds)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse

        # All regressions bootstraps at once
        lamb_ridge = best_ridge_lambda[degree]
        lamb_lasso = best_lasso_lambda[degree]

        ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \
        stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge)

        ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \
        ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance

        lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \
        lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance

        ols_boot_mse[degree], ols_boot_bias[degree], \
        ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance

        # Bootstrapping for a selection of lambdas for ridge and lasso
        subset_lambda_index = 0
        for lamb in subset_lambdas:

            ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \
            stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge)

            ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance

            lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance

            subset_lambda_index += 1
def deprecated_franke_analysis_full():
    n = 1000
    noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale*np.random.normal(0,1,len(z))
    max_degree = 20
    n_lambdas = 30
    n_bootstraps = 50
    k_folds = 5
    lambdas = np.logspace(-3,0,n_lambdas)
    subset_lambdas = lambdas[::5]

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x,y,degree)
        X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = 0.2)
        # Scaling and feeding to CV.
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:,0] = 1

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        X_train_scaled[:,0] = 1
        X_test_scaled[:,0] = 1

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)


        # CV, find best lambdas and get mse vs lambda for given degree.

        ridge_fold_score = np.zeros((n_lambdas, k_folds))
        lasso_fold_score = np.zeros((n_lambdas, k_folds))
        test_list, train_list = stat_tools.k_fold_selection(z, k_folds)
        for i in range(n_lambdas):
            lamb = lambdas[i]
            for j in range(k_folds):
                test_ind_cv = test_list[j]
                train_ind_cv = train_list[j]
                X_train_cv = X[train_ind_cv]
                z_train_cv = z[train_ind_cv]
                X_test_cv = X[test_ind_cv]
                z_test_cv = z[test_ind_cv]
                clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv)
                z_lasso_test = clf_Lasso.predict(X_test_cv)
                ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb)
                z_ridge_test = X_test_cv @ ridge_betas
                ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test)
                lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test)

        lasso_cv_mse = np.mean(lasso_fold_score, axis=1)
        ridge_cv_mse = np.mean(ridge_fold_score, axis=1)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse

        # All regressions bootstraps at once
        lamb_ridge = best_ridge_lambda[degree]
        lamb_lasso = best_lasso_lambda[degree]
        z_boot_ols = np.zeros((len(z_test),n_bootstraps))
        z_boot_ridge = np.zeros((len(z_test),n_bootstraps))
        z_boot_lasso= np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda
            clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot)
            z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda
            z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge
            z_boot_ols[:,i] = X_test_scaled @ betas_boot_ols

        ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \
        ridge_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge)

        lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \
        lasso_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso)

        ols_boot_mse[degree], ols_boot_bias[degree], \
        ols_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols)

        # Bootstrapping for a selection of lambdas for ridge and lasso
        subset_lambda_index = 0
        for lamb in subset_lambdas:
            z_boot_ridge = np.zeros((len(z_test),n_bootstraps))
            z_boot_lasso= np.zeros((len(z_test),n_bootstraps))
            for i in range(n_bootstraps):
                shuffle = np.random.randint(0,len(z_train),len(z_train))
                X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
                betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
                clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot)
                z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda
                z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge

            ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge)

            lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso)

            subset_lambda_index  += 1
def terrain_analysis():
    # Setting up the terrain data:
    terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif')
    x_terrain = np.arange(terrain_data.shape[1]) #apparently, from the problem description.
    y_terrain = np.arange(terrain_data.shape[0])
    X_coord, Y_coord = np.meshgrid(x_terrain,y_terrain)
    z_terrain = terrain_data.flatten() # the response values
    x_terrain_flat = X_coord.flatten() # the first degree feature variables
    y_terrain_flat = Y_coord.flatten() # the first degree feature variables
    max_degree = 10
    n_lambdas = 15
    n_bootstraps = 20
    k_folds = 5
    lambdas = np.logspace(-3,0,n_lambdas)

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    # Actual computations
    for degree in range(max_degree):
        X_terrain_design = linear_regression.design_matrix_2D(x_terrain_flat,y_terrain_flat,degree)
        X_train, X_test, z_train, z_test = train_test_split(X_terrain_design, z_terrain, test_size = 0.2)
        # Scaling and feeding to CV.
        z = z_terrain
        X = X_terrain_design
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:,0] = 1

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        X_train_scaled[:,0] = 1
        X_test_scaled[:,0] = 1

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)


        # CV, find best lambdas and get mse vs lambda for given degree.

        ridge_fold_score = np.zeros((n_lambdas, k_folds))
        lasso_fold_score = np.zeros((n_lambdas, k_folds))
        test_list, train_list = stat_tools.k_fold_selection(z, k_folds)
        for i in range(n_lambdas):
            lamb = lambdas[i]
            for j in range(k_folds):
                test_ind_cv = test_list[j]
                train_ind_cv = train_list[j]
                X_train_cv = X[train_ind_cv]
                z_train_cv = z[train_ind_cv]
                X_test_cv = X[test_ind_cv]
                z_test_cv = z[test_ind_cv]
                clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv)
                z_lasso_test = clf_Lasso.predict(X_test_cv)
                ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb)
                z_ridge_test = X_test_cv @ ridge_betas
                ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test)
                lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test)

        lasso_cv_mse = np.mean(lasso_fold_score, axis=1)
        ridge_cv_mse = np.mean(ridge_fold_score, axis=1)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse


        # OLS bootstap, get bootstrapped mse, bias and variance for given degree.
        z_boot_model = np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
            #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot)
            #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda
            z_boot_model[:,i] = X_test_scaled @ betas_boot
        mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model)
        ols_boot_mse[degree] = mse
        ols_boot_bias[degree] = bias
        ols_boot_variance[degree] = variance

        # Ridge bootstrap, get bootstrapped mse, bias and variance for given degree and lambda
        lamb = best_ridge_lambda[degree]
        z_boot_model = np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
            #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot)
            #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda
            z_boot_model[:,i] = X_test_scaled @ betas_boot
        mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model)
        ridge_best_lambda_boot_mse[degree] = mse
        ridge_best_lambda_boot_bias[degree] = bias
        ridge_best_lambda_boot_variance[degree] = variance

        # Lasso bootstrap, get bootstrapped mse, bias and variance for given degree and lambda.
        lamb = best_lasso_lambda[degree]
        z_boot_model = np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
            clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot)
            z_boot_model[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda
            #z_boot_model[:,i] = X_test_scaled @ betas_boot
        mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model)
        lasso_best_lambda_boot_mse[degree] = mse
        lasso_best_lambda_boot_bias[degree] = bias
        lasso_best_lambda_boot_variance[degree] = variance

################ All necessary computations should have been done above. Below follows
################ the plotting part.




        return
Example #13
0
    return w


if __name__ == "__main__":
    """
    Some very early testing
    """

    np.random.seed(123)
    x = np.random.uniform(0, 1, 500)
    y = np.random.uniform(0, 1, 500)
    z = FrankeFunction(x, y)

    deg = 2

    X = linear_regression.design_matrix_2D(x, y, deg)
    N_predictors = int((deg + 1) * (deg + 2) / 2)
    w_init = np.random.randn(N_predictors)

    w_SGD_OLS = SGD(
        X,
        z,
        M=250,
        init_w=w_init,
        n_epochs=100,
        learning_rate=0.01,
        cost_gradient=CostFunctions.OLS_cost_gradient,
    )
    w_SGDM_OLS = SGDM(
        X,
        z,