Example #1
0
def bootstrap(x, y, max_deg, boots_num):
    np.random.seed(130)
    """
    applies the bootstrap algorithm

    args:
        x,y (np.array): initial datapoints
        max_deg (int):
        boots_num (int): number of bootstraps
    """

    x, y = np.meshgrid(x, y)
    z = np.ravel(
        f.FrankeFunction(x, y) +
        0.5 * np.random.randn(np.shape(x)[0],
                              np.shape(y)[1]))

    MSE_degree_values = np.zeros(max_deg)
    MSE_test_degree_values = np.zeros(max_deg)
    MSE_train_values = np.zeros(boots_num)
    MSE_test_values = np.zeros(boots_num)
    for k, deg in enumerate(range(1, max_deg)):
        #Degrees loop that contains K-fold
        X_design = f.X_make(x, y, deg)
        scaler = StandardScaler()

        X_tr, X_te, z_tr, z_te = train_test_split(X_design, z, test_size=0.2)
        scaler.fit(X_tr)

        X_train = scaler.transform(X_tr)
        X_test = scaler.transform(X_te)
        #doing this AFTER train test split. otherwise the test data
        #gets affected by the train data
        z_bootstrap = np.empty(int(len(z_tr)))
        index_array = np.arange(0, len(z_tr), 1)

        for i in range(boots_num):
            indx = resample(index_array, random_state=0)
            z_bootstrap = z_tr[indx]

            z_test = X_test.dot(f.OLS(X_train[indx, :], z_bootstrap))
            z_train = X_train.dot(f.OLS(X_train[indx, :], z_bootstrap))
            MSE_train_values[i] = f.MSE(z_tr, z_train)
            MSE_test_values[i] = f.MSE(z_te, z_test)

        MSE_degree_values[k] = np.sum(MSE_train_values) / boots_num
        MSE_test_degree_values[k] = np.sum(MSE_test_values) / boots_num
    return MSE_degree_values, MSE_test_degree_values
Example #2
0
def sgdm(m, degrees, n_epochs, b, eta, noise=0, gamma = 0): #stocastic gradient decent with momentum
    np.random.seed(1337)
    x = np.random.rand(m,degrees) #+1?
    y = np.random.rand(m,degrees) #+1?

    X_mesh, Y_mesh = np.meshgrid(x, y)

    z = f.FrankeFunction(X_mesh, Y_mesh) + noise*np.random.randn(X_mesh.shape[0], Y_mesh.shape[0])

    z= np.ravel(z)
    X = f.X_make(X_mesh,Y_mesh, degrees)

    #SPLIT AND SCALE
    X_tr, X_te, z_tr, z_te = train_test_split(X,z, test_size=0.3)
    scaler = StandardScaler()
    # X_tr = scaler.fit(X_tr).transform(X_tr)
    # z_tr = scaler.transform(z_tr.reshape(-1,1))
    # z_te = scaler.fit(z_te).transform(z_te)

                                                # removes the mean and scales each feature/variable to unit variance
    scaler.fit(X_tr)                         # compute the mean and std to be used for later scaling
    X_tr= scaler.transform(X_tr)  # perform standardization by centering and scaling
    X_te = scaler.transform(X_te)    # fit to data, then transform it
    z_tr = z_tr.reshape(-1,1)
    z_te = z_te.reshape(-1,1)
    scaler.fit(z_tr)
    z_tr = scaler.transform(z_tr)
    z_te = scaler.transform(z_te)

    l = int((degrees+1)*(degrees+2)/2) #length of design matrix row
    beta = np.random.randn(l,1) #length of a design matrix row
    #b = int(m/batch_num) #batch size
    batch_num = int(m/b)
    if m%batch_num:
        print('warning; batch number and dataset not compatible')

    v = 0
    mse_eval = np.zeros(n_epochs)
    index_array = np.arange(m) #indexes of rows
    batch_array= np.arange(batch_num)
    batch_array *=b
    for epoch in range(n_epochs):
        np.random.shuffle(index_array)
        for i in range(batch_num): #m is number of batches
            xi = X_tr[index_array[batch_array[i]]: (index_array[(batch_array[i]+1)])]
            zi = z_tr[index_array[batch_array[i]]: (index_array[(batch_array[i]+1)])]

            gradients = 2/b * xi.T @ (xi @ beta - zi.reshape(-1,1)) #derived from cost function
            #eta = 0.001#learning_rate(epoch*m+i)
            v = gamma*v + eta*gradients
            beta = beta - v
        z_eval = X_te.dot(beta)
        mse_eval[epoch] = f.MSE(z_te, z_eval)
    beta_ols = f.OLS(X_tr, z_tr)
    z_ols = X_te.dot(beta_ols)
    mse_beta = f.MSE(z_te, z_ols)
    return beta, mse_eval, mse_beta
Example #3
0
def cross_validation(n,
                     maxdegree,
                     noise,
                     n_folds,
                     method=f.OLS,
                     seed=130,
                     lmbda=0,
                     datatype='Franke',
                     filename='SRTM_data_Minneapolis'):
    """
    cross_validation

    Input:
        n           -   number of datapoints before meshgrid
        maxdegree   -   max degree to iterate over
        noise       -   amount of noise
        n_folds     -   number of folds in cross validation
        method      -   regression method (OLS, Ridge, Lasso)
        seed        -   seed to random number generator
        lmbda       -   lambda value to use in Ridge and Lasso
        datatype    -   datatype to fit (Franke, Terrain)
        filename    -   file with terrain data

    Output:
        polydegree      -   array with model complexity
        MSE_mean        -   array with mean MSE from each cross validation
        MSE_best        -   MSE for the best fit
        R2Score_skl     -   array with R2Score for Scikit Learn cross validation
        R2Score_mean    -   array with mean R2Score from each cross validation
    """
    if n % n_folds != 0:
        raise Exception("Can't divide data set in n_folds equally sized folds")

    polydegree = np.zeros(maxdegree)
    MSE_mean = np.zeros(maxdegree)
    MSE_mean_sklearn = np.zeros(maxdegree)
    R2Score_mean = np.zeros(maxdegree)
    R2Score_skl = np.zeros(maxdegree)

    # Make data
    np.random.seed(int(seed))

    if datatype == 'Franke':
        x_train, x_test, y_train, y_test, z_train, z_test = f.FrankeData(
            n, noise, test_size=0.3)

    elif datatype == 'Terrain':
        x_train, x_test, y_train, y_test, z_train, z_test = f.TerrainData(
            n, filename)

    for degree in range(0, maxdegree):
        polydegree[degree] = degree

        # Create design matrix
        X_train = f.design_matrix(x_train, y_train, degree)

        # Shuffle data to get random folds
        index = np.arange(0, np.shape(X_train)[0], 1)
        np.random.seed(int(seed))
        np.random.shuffle(index)
        X_train_random = X_train[index, :]
        z_train_random = z_train[index]

        # Split data in n_folds folds
        X_folds = np.array(np.array_split(X_train_random, n_folds))
        z_folds = np.array(np.array_split(z_train_random, n_folds))

        if method == f.OLS:
            clf = skl.LinearRegression()
            scores = cross_val_score(clf,
                                     X_train,
                                     z_train,
                                     cv=n_folds,
                                     scoring='neg_mean_squared_error')
            MSE_mean_sklearn[degree] = np.abs(np.mean(scores))
            best_degree_sklearn = np.argmin(MSE_mean_sklearn)

            # Make fit to holy test data
            X_train_best = f.design_matrix(x_train, y_train,
                                           best_degree_sklearn)
            scaler = StandardScaler()
            scaler.fit(X_train_best)
            X_train_best_scaled = scaler.transform(X_train_best)
            X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn)
            X_test_best_scaled = scaler.transform(X_test_best)

            X_train_best_scaled[:, 0] = 1
            X_test_best_scaled[:, 0] = 1

            scaler.fit(z_train.reshape(-1, 1))
            z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
            z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

            beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled)

        elif method == f.Ridge:
            clf = skl.Ridge()
            scores = cross_val_score(clf,
                                     X_train,
                                     z_train,
                                     cv=n_folds,
                                     scoring='neg_mean_squared_error')
            MSE_mean_sklearn[degree] = np.abs(np.mean(scores))
            best_degree_sklearn = np.argmin(MSE_mean_sklearn)

            # Make fit to holy test data
            X_train_best = f.design_matrix(x_train, y_train,
                                           best_degree_sklearn)
            scaler = StandardScaler()
            scaler.fit(X_train_best)
            X_train_best_scaled = scaler.transform(X_train_best)
            X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn)
            X_test_best_scaled = scaler.transform(X_test_best)

            X_train_best_scaled[:, 0] = 1
            X_test_best_scaled[:, 0] = 1

            scaler.fit(z_train.reshape(-1, 1))
            z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
            z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

            beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled)

        elif method == 'Lasso':
            clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False)
            scores = cross_val_score(clf_lasso,
                                     X_train,
                                     z_train,
                                     cv=n_folds,
                                     scoring='neg_mean_squared_error')
            MSE_mean_sklearn[degree] = np.abs(np.mean(scores))
            best_degree_sklearn = np.argmin(MSE_mean_sklearn)

            # Make fit to holy test data
            X_train_best = f.design_matrix(x_train, y_train,
                                           best_degree_sklearn)
            scaler = StandardScaler()
            scaler.fit(X_train_best)
            X_train_best_scaled = scaler.transform(X_train_best)
            X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn)
            X_test_best_scaled = scaler.transform(X_test_best)

            X_train_best_scaled[:, 0] = 1
            X_test_best_scaled[:, 0] = 1

            scaler.fit(z_train.reshape(-1, 1))
            z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
            z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

            beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled)

        # cross validation
        for k in range(n_folds):
            # Validation data
            X_val = X_folds[k]
            z_val = np.reshape(z_folds[k], (-1, 1))

            # Training data
            idx = np.ones(n_folds, dtype=bool)
            idx[k] = False
            X_train_fold = X_folds[idx]

            # Combine folds
            X_train_fold = np.reshape(
                X_train_fold, (X_train_fold.shape[0] * X_train_fold.shape[1],
                               X_train_fold.shape[2]))
            z_train_fold = np.reshape(np.ravel(z_folds[idx]), (-1, 1))

            # Scaling data
            scaler = StandardScaler(
            )  # removes the mean and scales each feature/variable to unit variance
            scaler.fit(
                X_train_fold
            )  # compute the mean and std to be used for later scaling
            X_train_fold_scaled = scaler.transform(
                X_train_fold
            )  # perform standardization by centering and scaling
            X_val_scaled = scaler.transform(X_val)
            # Set first column to one as StandardScaler sets it to zero
            X_train_fold_scaled[:, 0] = 1
            X_val_scaled[:, 0] = 1

            # scaler.fit(z_train_fold)
            # z_train_fold_scaled = scaler.transform(z_train_fold)
            # z_val_scaled = scaler.transform(z_val)
            z_train_fold_scaled = z_train_fold
            z_val_scaled = z_val

            # Choose method for calculating coefficients beta
            if method == f.OLS:
                beta_fold = method(X_train_fold_scaled, z_train_fold_scaled)
                z_tilde_fold = X_val_scaled @ beta_fold
                # z_tilde_fold_train = X_train_
            elif method == f.Ridge:
                beta_fold = method(X_train_fold_scaled, z_train_fold_scaled,
                                   lmbda, degree)
                z_tilde_fold = X_val_scaled @ beta_fold

            elif method == 'Lasso':
                clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False).fit(
                    X_train_fold_scaled, z_train_fold_scaled)
                z_tilde_fold = clf_lasso.predict(X_val_scaled)

            MSE_mean[degree] += f.MSE(z_val_scaled, z_tilde_fold)
            R2Score_mean[degree] += f.R2Score(z_val_scaled, z_tilde_fold)

        MSE_mean[degree] /= n_folds
        R2Score_mean[degree] /= n_folds

        # # Cross-validation using Scikit-Learn
        # clf = skl.LinearRegression()
        # R2Score_skl[degree] = np.mean(cross_val_score(clf, X_train, z_train, scoring='r2', cv=n_folds))

    # Find the degree with smallest MSE
    best_degree = np.argmin(MSE_mean)
    print(best_degree)

    # Make fit to holy test data
    X_train_best = f.design_matrix(x_train, y_train, best_degree)
    scaler.fit(X_train_best)
    X_train_best_scaled = scaler.transform(X_train_best)
    X_test_best = f.design_matrix(x_test, y_test, best_degree)
    X_test_best_scaled = scaler.transform(X_test_best)

    X_train_best_scaled[:, 0] = 1
    X_test_best_scaled[:, 0] = 1

    scaler.fit(z_train.reshape(-1, 1))
    z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
    z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

    beta_best = f.OLS(X_train_best_scaled, z_train_scaled)
    z_tilde_best = X_test_best_scaled @ beta_best
    MSE_best = f.MSE(z_test_scaled, z_tilde_best)
    print(MSE_best)

    return polydegree, MSE_mean, MSE_best, R2Score_skl, R2Score_mean, beta_best, best_degree, MSE_mean_sklearn, best_degree_sklearn, beta_best_sklearn
Example #4
0
def no_resampling(n, maxdegree, noise, method=f.OLS, lmbda=0, seed=7053):
    # arrays for plotting of error
    polydegree = np.zeros(maxdegree)
    MSE_OLS = np.zeros(maxdegree)
    R2Score_OLS = np.zeros(maxdegree)
    MSE_test = np.zeros(maxdegree)
    MSE_train = np.zeros(maxdegree)
    MSE_train_scaled = np.zeros(maxdegree)
    MSE_test_scaled = np.zeros(maxdegree)
    R2Score_scaled = np.zeros(maxdegree)

    # Make data
    np.random.seed(seed)
    x = np.sort(np.random.uniform(0, 1, n))
    y = np.sort(np.random.uniform(0, 1, n))
    x, y = np.meshgrid(x, y)

    # Franke Function
    z = np.ravel(f.FrankeFunction(x, y) + noise * np.random.randn(n, n))

    for degree in range(0, maxdegree):
        polydegree[degree] = degree

        #Create design matrix
        X = f.design_matrix(x, y, degree)

        # Split in training and test data
        X_train, X_test, z_train, z_test = train_test_split(X,
                                                            z.reshape(-1, 1),
                                                            test_size=0.3)

        # OLS estimate train/test without scaled
        beta_OLS_train = f.OLS(X_train, z_train)
        ztilde_test = X_test @ beta_OLS_train
        ztilde_train = X_train @ beta_OLS_train

        MSE_train[degree] = f.MSE(z_train, ztilde_train)
        MSE_test[degree] = f.MSE(z_test, ztilde_test)

        # Scale data
        scaler = StandardScaler(
        )  # removes the mean and scales each feature/variable to unit variance
        scaler.fit(
            X_train)  # compute the mean and std to be used for later scaling
        X_train_scaled = scaler.transform(
            X_train)  # perform standardization by centering and scaling
        X_test_scaled = scaler.transform(
            X_test)  # fit to data, then transform it
        scaler.fit(z_train)
        # z_train_scaled = scaler.transform(z_train)
        # z_test_scaled = scaler.transform(z_test)
        z_train_scaled = z_train
        z_test_scaled = z_test

        # Set the first column to 1 since StandardScaler sets it to 0
        X_train_scaled[:, 0] = 1
        X_test_scaled[:, 0] = 1

        if method == f.OLS:
            beta_train_scaled = method(X_train_scaled, z_train_scaled)
            z_tilde_test_scaled = X_test_scaled @ beta_train_scaled
            z_tilde_train_scaled = X_train_scaled @ beta_train_scaled

        elif method == f.Ridge:
            beta_train_scaled = method(X_train_scaled, z_train_scaled, lmbda,
                                       degree)
            z_tilde_test_scaled = X_test_scaled @ beta_train_scaled
            z_tilde_train_scaled = X_train_scaled @ beta_train_scaled

        elif method == 'Lasso':
            clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False).fit(
                X_train_scaled, z_train_scaled)
            z_tilde_test_scaled = clf_lasso.predict(X_test_scaled)
            z_tilde_train_scaled = clf_lasso.predict(X_train_scaled)

        MSE_train_scaled[degree] = f.MSE(z_train_scaled, z_tilde_train_scaled)
        MSE_test_scaled[degree] = f.MSE(z_test_scaled, z_tilde_test_scaled)
        R2Score_scaled[degree] = f.R2Score(z_test_scaled, z_tilde_test_scaled)

    return polydegree, MSE_train, MSE_test, MSE_train_scaled, MSE_test_scaled, R2Score_scaled

    # Start figure
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    # Plot the surface
    # ztilde_plot = np.reshape(ztilde, (n, n))
    # surf = ax.plot_surface(x, y, ztilde_plot, cmap=cm.coolwarm, linewidth=0, antialiased=False)

    # Customize the z axis
    ax.set_zlim(-0.10, 1.40)
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

    # Add a colar bar which maps values to colors
    fig.colorbar(surf, shrink=0.5, aspect=5)
    plt.show()
np.random.seed(seed)
x = np.sort(np.random.uniform(0, 1, n))
y = np.sort(np.random.uniform(0, 1, n))
x, y = np.meshgrid(x, y)

# Franke Function
z = np.ravel(f.FrankeFunction(x, y) + noise*np.random.randn(n, n))

for degree in range(maxdegree):
    #Create design matrix
    X = f.design_matrix(x, y, degree)

    X_train, X_test, z_train, z_test = train_test_split(X, z)

    # Ordinary least squares
    beta_OLS = f.OLS(X, z)
    ztilde = X @ beta_OLS

    # Confidence interval as function of beta
    var_beta = f.variance_beta(X, noise)
    err_beta = 1.96*np.sqrt(var_beta)           # 95% confidence interval
    beta_idx = np.linspace(0, X.shape[1] - 1, X.shape[1])


    # Plot of confidence interval for OLS
    plt.style.use('ggplot')
    plt.errorbar(beta_idx, beta_OLS, err_beta, fmt='.')
    plt.xlabel("n", size=12)
    plt.ylabel('Confidence interval', size=12)
    plt.title("95 % confidence interval as function of $\\beta$", size=16)
    plt.show()
Example #6
0
 def OLS(self):
     """Calculates the ordinary least squares and its mean squared error."""
     beta = f.OLS(self.X_train, self.z_train)
     z_tilde = self.X_test @ beta
     self.MSE = f.MSE(self.z_test, z_tilde)