예제 #1
0
def cv_analysis(x,
                y,
                z,
                degrees,
                k,
                regression=linear_regression.OLS_SVD,
                lambd=None):

    scaler = StandardScaler()
    MSE_test = np.empty(degrees.size)

    for i, deg in enumerate(degrees):
        X = linear_regression.design_matrix(x, y, deg)
        # Normalize the design matrix
        X = scaler.fit_transform(X)
        X[:, 0] = np.ones(X.shape[0])

        if regression != "LASSO":
            MSE_test[i] = cross_validation.cross_validation(
                X, z, k_folds=k, regression=regression)
        else:
            MSE_test[i] = cross_validation.cross_validation_lasso(X,
                                                                  z,
                                                                  k_folds=k,
                                                                  lambd=lambd)

    return MSE_test
예제 #2
0
def OLS_unit_test(min_deg=0, max_deg=15, tol=1e-6):
    n = 100  # Number of data points
    # Prepare data set
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2
    degrees = np.arange(min_deg, max_deg + 1)
    for deg in degrees:
        # Set up design matrix
        X = linear_regression.design_matrix(x, y, 5)
        # Compute optimal parameters using our homegrown OLS
        beta = linear_regression.OLS(X=X, z=z)
        # Compute optimal parameters using sklearn
        skl_reg = LinearRegression(fit_intercept=False).fit(X, z)
        beta_skl = skl_reg.coef_

        for i in range(len(beta)):
            if abs(beta[i] - beta_skl[i]) < tol:
                pass
            else:
                print("Warning! mismatch with SKL in OLS_unit_test with tol = %.0e" % tol)
                print("Parameter no. %i for deg = %i" % (i, deg))
                print("-> (OUR) beta = %8.12f" % beta[i])
                print("-> (SKL) beta = %8.12f" % beta_skl[i])
    return
예제 #3
0
def bootstrap_analysis(x,
                       y,
                       z,
                       degrees,
                       N_bootstraps,
                       regression=linear_regression.OLS_SVD,
                       lambd=None):

    scaler = StandardScaler()
    MSE_test = np.empty(degrees.size)
    bias2_test = np.empty(degrees.size)
    variance_test = np.empty(degrees.size)

    for i, deg in enumerate(degrees):
        X = linear_regression.design_matrix(x, y, deg)
        # Split data, but don't shuffle. OK since data is already randomly sampled!
        # Facilitates a direct comparison of the clean & Noisy data
        X_train, X_test, z_train, z_test = train_test_split(X,
                                                            z,
                                                            test_size=0.2,
                                                            shuffle=False)

        # Normalize data sets
        X_train = scaler.fit_transform(X_train)
        X_train[:, 0] = np.ones(X_train.shape[0])
        X_test = scaler.fit_transform(X_test)
        X_test[:, 0] = np.ones(X_test.shape[0])

        # Lasso requires special treatment due to difference between SKL's way of doing things
        # And our codes

        if regression != "LASSO":
            MSE_test[i], bias2_test[i], variance_test[i] = bootstrap.bootstrap(
                X_train,
                X_test,
                z_train,
                z_test,
                bootstraps=N_bootstraps,
                regression=regression)
        else:
            MSE_test[i], bias2_test[i], variance_test[
                i] = bootstrap.bootstrap_lasso(X_train,
                                               X_test,
                                               z_train,
                                               z_test,
                                               bootstraps=N_bootstraps,
                                               lambd=lambd)

    return MSE_test, bias2_test, variance_test