Esempio n. 1
0
def plot_MSE_Complexity(degree=5, graph=True):
    """
    Plots mean squared error (MSE) as a function of the complexity (i.e. polynomial degree) parameter 
    on the train and test data set. MSE is calculated using the OLS on the train and test data sets.
    
    Arguments:
    degree: integer type. complexity of the model (i.e. polynomial degree)
    graph: Binary type with inputs True/False. If True, plots the MSE on the train and test data
    """

    n = 300  ##Make synthetic data
    np.random.seed(18271)
    x1 = np.random.rand(n)
    np.random.seed(91837)
    x2 = np.random.rand(n)
    y = franke(x1, x2) + 0.1 * np.random.normal(0, 1, x1.size)

    ##split in train and test set
    x1_train, x1_test, x2_train, x2_test, y_train, y_test = train_test_split(
        x1, x2, y, test_size=0.2)
    MSE_train = []
    MSE_test = []

    ##fit OLS with polynomial of different degree and compute MSE on the train and test data(with scaling)

    for degs in range(1, degree + 1):
        X_train_ = designMatrix(x1_train, x2_train, degs)
        scaler = StandardScaler()
        scaler.fit(X_train_)
        X_train_ = scaler.transform(X_train_)
        X_train_[:, 0] = 1
        X_test_ = designMatrix(x1_test, x2_test, degs)
        X_test_ = scaler.transform(X_test_)
        X_test_[:, 0] = 1
        linreg = linregOwn()
        beta_ = linreg.fit(X_train_, y_train)
        pred_train = linreg.predict(X_train_)
        MSE_train_ = linreg.MSE(y_train)
        MSE_train.append(MSE_train_)

        pred_test_ = linreg.predict(X_test_)
        MSE_test_ = linreg.MSE(y_test)
        MSE_test.append(MSE_test_)

    print('-------------------------------------------------')
    print('MSE_test: {}'.format(np.round(MSE_test, 4)))
    print('MSE_train: {}'.format(np.round(MSE_train, 4)))
    print('The polynomial fit of degree {} performs best'.format(
        MSE_test.index(min(MSE_test)) + 1))
    print('-------------------------------------------------')

    if graph == True:
        plot, ax = plt.subplots()
        plt.xlabel('Complexity (Order of polynomial)')
        plt.ylabel('MSE')
        plt.title('Change in MSE depending on the complexity of the model')
        plt.plot(range(1, degree + 1),
                 np.round(MSE_train, 4),
                 'k--',
                 label='Training Sample')
        plt.plot(range(1, degree + 1),
                 np.round((MSE_test), 4),
                 'r-',
                 label='Test Sample')
        ax.axis([1, degree, 0, max(MSE_test)])
        ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
        plt.legend()
        plt.subplots_adjust(left=0.2, bottom=0.2, right=0.9)
        #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'MSE_train_test.png'), transparent=True, bbox_inches='tight')

    return plt.show()
Esempio n. 2
0
def MSE_Ridge_Lasso(method='lasso'):
    """
    Plots mean squared error (MSE) as a function of the polyomial degree (complexity parameter p) 
    for different values of the shrinkage parameter
    when lasso and ridge are used as the methods
    
    Arguments:
    method: character type, activated only when method = 'ridge' or 'lasso'
    """

    R2_noise = []
    MSE_noise = []
    MSE = []
    R2 = []

    k = 1
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    ind = -1

    for lambda_ in np.logspace(-4, 0, 5):  ##10 is a default base
        ind += 1
        MSE_noise = []

        for deg in range(1, 18):
            if ind == 0:
                linreg = linregOwn(method='ols')
            else:
                linreg = linregOwn(method=method)

            ##Compute MSE using cross-validation. Might take some time before we get plots, should be optimized with jit
            ##but no time to do that for all classes

            n = int(1000)
            np.random.seed(18271)
            x1 = np.random.rand(n)
            np.random.seed(91837)
            x2 = np.random.rand(n)
            y_data = franke(x1, x2)
            y_data_noise = y_data + 0.1 * np.random.standard_normal(size=n)
            CV_instance = CrossValidation(linreg, designMatrix)
            means_noise = CV_instance.kFoldCV(x1,
                                              x2,
                                              y_data_noise,
                                              10,
                                              lambda_=lambda_,
                                              degree=deg)
            means = CV_instance.kFoldCV(x1,
                                        x2,
                                        y_data,
                                        10,
                                        lambda_=lambda_,
                                        degree=deg)

            MSE_noise.append(means_noise[0])
            MSE.append(means[0])

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

        if ind == 0:
            plt.plot(range(1, 18),
                     np.array(MSE_noise),
                     colors[ind] + '-o',
                     markersize=5,
                     label=r"OLS")
        else:
            plt.plot(range(1, 18),
                     np.array(MSE_noise),
                     colors[ind] + '-o',
                     markersize=1,
                     label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_))))

        plt.ylabel(r"MSE", fontsize=10)
        plt.xlabel(r"Polynomial degree $p$", fontsize=10)
        plt.subplots_adjust(left=0.2, bottom=0.2)

        #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])

    ax1.legend()
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'MSE_lasso_noise_2.png'), transparent=True, bbox_inches='tight')
    plt.show()
Esempio n. 3
0
def plot_franke_noise():
    """
    Plots mean squared error (MSE) and R2 score as a function of the noise scalor 
    (i.e. parameter controlling the amount of noise).
    
    Arguments:
    method: Character type. Activated only when method = 'ols', 'ridge', or 'lasso'. Else raises an error
    
    """
    ##Make synthetic data

    n = 1000
    np.random.seed(18271)
    x1 = np.random.rand(n)
    np.random.seed(91837)
    x2 = np.random.rand(n)
    y = franke(x1, x2) + 0.1 * np.random.normal(0, 1, n)

    R2 = []
    MSE = []
    R2_noise = []
    MSE_noise = []

    noise = np.logspace(-4, 0, 50)
    k = 1

    for eta in noise:
        y_data_noise = y + eta * np.random.standard_normal(size=y.size)
        linreg = linregOwn(method='ols')
        x1_train, x1_test, x2_train, x2_test, y_train, y_test, y_noise_train, y_noise_test = train_test_split(
            x1, x2, y, y_data_noise, test_size=0.35, random_state=42)
        X_train = designMatrix(x1_train, x2_train, 3)
        X_test = designMatrix(x1_test, x2_test, 3)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        X_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        linreg.fit(X_train, y_noise_train)
        linreg.predict(X_test)
        MSE_noise.append(linreg.MSE(y_noise_test))
        R2_noise.append(linreg.R2(y_noise_test))

        linreg_NOnoise = linregOwn()
        linreg_NOnoise.fit(X_train, y_train)
        linreg_NOnoise.predict(X_test)
        MSE.append(linreg.MSE(y_test))
        R2.append(linreg.R2(y_test))

    fig, ax1 = plt.subplots()
    ax1.loglog(noise, 1 - np.array(R2_noise), 'k-o', markersize=2)
    ax1.loglog(noise, 1 - np.array(R2), 'k--', markersize=2)
    plt.xlabel(r"noise scalor $\eta$", fontsize=10)
    plt.ylabel(r"$1-R^2$", color='k', fontsize=10)

    ax2 = ax1.twinx()
    ax2.loglog(noise, np.array(MSE_noise), 'b-o', markersize=2)
    ax2.loglog(noise, np.array(MSE), 'b--', markersize=2)
    plt.ylabel(r"MSE", color='b', fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2, right=0.9)

    ax1.set_ylim([
        0.95 * min(min(MSE_noise), min(R2_noise)),
        1.05 * (max(max(MSE_noise), max(R2_noise)))
    ])
    ax2.set_ylim([
        0.95 * min(min(MSE_noise), min(R2_noise)),
        1.05 * (max(max(MSE_noise), max(R2_noise)))
    ])
    ax2.get_yaxis().set_ticks([])

    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight')
    return plt.show()
Esempio n. 4
0
def plot_beta(method='ridge'):
    """
    Plots coefficients (beta) for ridge and lasso methods for different values of the shrinkage parameter (lambda)
    
    Arguments:
    method: character type, accepts arguments 'ridge' or 'lasso'
    """
    beta = []
    beta_variance = []

    k = 10000
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    ind = -1
    lam = np.logspace(-3, 5, 20)

    for lambda_ in lam:
        if ind == 0:
            linreg = linregOwn(method='ols')
        else:
            linreg = linregOwn(method=method)

        ind += 1
        n = int(500)
        np.random.seed(18271)
        x1 = np.random.rand(n)
        np.random.seed(91837)
        x2 = np.random.rand(n)
        y_data = franke(x1, x2)
        eta = 0.1
        y_data_noise = y_data + eta * np.random.standard_normal(size=n)
        x1_train, x1_test, x2_train, x2_test, y_train, y_test, y_noise_train, y_noise_test = train_test_split(
            x1, x2, y_data, y_data_noise, test_size=0.35, random_state=42)
        X_train = designMatrix(x1_train, x2_train, 3)
        X_test = designMatrix(x1_test, x2_test, 3)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        x_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        linreg.fit(X_train, y_noise_train, lambda_)
        linreg.predict(X_test)
        var, low, up = linreg.CI(y_test)

        beta.append(linreg.fit(X_train, y_noise_train,
                               lambda_))  ##Append lists together
        beta_variance.append(np.sqrt(var))

    beta = np.array(beta)
    print(beta)
    beta_variance = np.array(beta_variance)

    monomial = [
        '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3'
    ]
    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]

    for i in range(10):
        plt.errorbar(lam[1:],
                     beta[1:, i],
                     yerr=2 * beta_variance[1:, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5,
                     label=r"$\beta_{%s}$" % (monomial[i]))
    plt.rc('text', usetex=True)
    plt.ylabel(r"$\beta_j$", fontsize=10)
    plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2)
    plt.legend(fontsize=6)

    fig.gca().set_xscale('log')
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'beta_lasso.png'), transparent=True, bbox_inches='tight')
    plt.show()
Esempio n. 5
0
def Bootstrap(x1, x2, y, N_boot=500, method='ols', degrees=5, random_state=42):
    """
    Computes bias^2, variance and the mean squared error using bootstrap resampling method
    for the provided data and the method.
    
    Arguments:
    x1: 1D numpy array, covariate
    x2: 1D numpy array, covariate
    N_boot: integer type, the number of bootstrap samples
    method: string type, accepts 'ols', 'ridge' or 'lasso' as arguments
    degree: integer type, polynomial degree for generating the design matrix
    random_state: integer, ensures the same split when using the train_test_split functionality
    
    Returns: Bias_vec, Var_vec, MSE_vec, betaVariance_vec
             numpy arrays. Bias, Variance, MSE and the variance of beta for the predicted model
    """
    ##split x1, x2 and y arrays as a train and test data and generate design matrix
    x1_train, x1_test, x2_train, x2_test, y_train, y_test = train_test_split(
        x1, x2, y, test_size=0.2, random_state=random_state)
    y_pred_test = np.zeros((y_test.shape[0], N_boot))
    X_test = designMatrix(x1_test, x2_test, degrees)

    betaMatrix = np.zeros((X_test.shape[1], N_boot))

    ##resample and fit the corresponding method on the train data
    for i in range(N_boot):
        x1_, x2_, y_ = resample(x1_train, x2_train, y_train)
        X_train = designMatrix(x1_, x2_, degrees)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        X_test = designMatrix(x1_test, x2_test, degrees)
        X_test = scaler.transform(X_test)
        X_test[:, 0] = 1

        if method == 'ols':
            manual_regression = linregOwn(method='ols')
            beta = manual_regression.fit(X_train, y_)
        if method == 'ridge':
            manual_regression = linregOwn(method='ridge')
            beta = manual_regression.fit(X_train, y_, lambda_=0.05)
        if method == 'lasso':
            manual_regression = linregOwn(method='lasso')
            beta = manual_regression.fit(X_train, y_, lambda_=0.05)

        ##predict on the same test data
        y_pred_test[:, i] = np.dot(X_test, beta)
        betaMatrix[:, i] = beta
    y_test = y_test.reshape(len(y_test), 1)

    Bias_vec = []
    Var_vec = []
    MSE_vec = []
    betaVariance_vec = []
    R2_score = []
    y_test = y_test.reshape(len(y_test), 1)
    MSE = np.mean(np.mean((y_test - y_pred_test)**2, axis=1, keepdims=True))
    bias = np.mean((y_test - np.mean(y_pred_test, axis=1, keepdims=True))**2)
    variance = np.mean(np.var(y_pred_test, axis=1, keepdims=True))
    betaVariance = np.var(betaMatrix, axis=1)
    print("-------------------------------------------------------------")
    print("Degree: %d" % degrees)
    print('MSE:', np.round(MSE, 3))
    print('Bias^2:', np.round(bias, 3))
    print('Var:', np.round(variance, 3))
    print('{} >= {} + {} = {}'.format(MSE, bias, variance, bias + variance))
    print("-------------------------------------------------------------")

    Bias_vec.append(bias)
    Var_vec.append(variance)
    MSE_vec.append(MSE)
    betaVariance_vec.append(betaVariance)
    return Bias_vec, Var_vec, MSE_vec, betaVariance_vec
Esempio n. 6
0
def plot_franke(x,
                y,
                franke_=False,
                noise=False,
                scalor=0.05,
                method='ols',
                seed1=8172,
                lambda_=0.005,
                absolute_error=False):
    """
    Plots the franke function.Franke's function has two Gaussian peaks of different heights, 
    and a smaller dip. It is used as a test function in interpolation problems.
    The function is evaluated on the square xi ∈ [0, 1], for all i = 1, 2.
    
    Reference: Franke, R. (1979). A critical comparison of some methods for interpolation of 
    scattered data (No. NPS53-79-003). NAVAL POSTGRADUATE SCHOOL MONTEREY CA.
    
    
    Arguments:
    x:  1-dimensional numpy array (1D np.array)
    y:  1-dimensional numpy array (1D np.array)
    franke_: binary argument with inputs True/False. If 'True', plots the franke function
    noise: binary argument with inputs True/False. If 'True', plots the franke function with added noise. 
           Activated only when franke_ == True.
    scalor: float type,  controls the amount of noise to be added to the franke function. Activated only when
            noise == True.
    method: character input accepting 'ols', 'ridge', 'lasso'. Plots the corresponding model fit.
    seed1: float type. used for reproducable output
    lambda_: float type. Activated only when method = 'ridge' or 'lasso'. Controls the amount of shrinkage of the
             parameters. Higher number indicates higher shrinkage.
    absolute_error: Binary type with inputs True/False. If 'True', outputs a plot of absolute deviation of the true
                    franke values and the fit of the corresponding model. Activated only when method is either 'ols',
                    'ridge' or 'lasso
    """
    x, y = np.meshgrid(x, y)
    f = franke(x, y)  ##true franke values

    if (noise):  ##noisy franke values
        f = franke(x, y) + scalor * np.random.normal(0, 1, franke(x, y).shape)

    if method == 'ols':  ##fit and predict ols
        np.random.seed(seed1)
        x_new = np.random.rand(500)
        y_new = np.random.rand(500)
        xn = x_new.ravel()
        yn = y_new.ravel()
        fn = franke(xn, yn)
        X = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:, 0] = 1
        linreg = linregOwn(method='ols')
        beta = linreg.fit(X, fn)

        xnew = np.linspace(0, 1, np.size(x_new))
        ynew = np.linspace(0, 1, np.size(x_new))
        Xnew, Ynew = np.meshgrid(xnew, ynew)
        F_true = franke(Xnew, Ynew)

        xn = Xnew.ravel()
        yn = Ynew.ravel()
        xb_new = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(xb_new)
        xb_new = scaler.transform(xb_new)
        xb_new[:, 0] = 1

        f_predict = np.dot(xb_new, beta)
        F_predict = f_predict.reshape(F_true.shape)

    if method == 'ridge':  ##fit and predict ridge
        np.random.seed(seed1)
        x_new = np.random.rand(500)
        y_new = np.random.rand(500)
        xn = x_new.ravel()
        yn = y_new.ravel()
        fn = franke(xn, yn)
        X = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:, 0] = 1
        linreg = linregOwn(method='ridge')
        beta = linreg.fit(X, fn, lambda_=0.1)

        xnew = np.linspace(0, 1, np.size(x_new))
        ynew = np.linspace(0, 1, np.size(x_new))
        Xnew, Ynew = np.meshgrid(xnew, ynew)
        F_true = franke(Xnew, Ynew)

        xn = Xnew.ravel()
        yn = Ynew.ravel()
        xb_new = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(xb_new)
        xb_new = scaler.transform(xb_new)
        xb_new[:, 0] = 1

        f_predict = np.dot(xb_new, beta)
        F_predict = f_predict.reshape(F_true.shape)

    if method == 'lasso':  ##fit and predict lasso
        np.random.seed(seed1)
        x_new = np.random.rand(500)
        y_new = np.random.rand(500)
        xn = x_new.ravel()
        yn = y_new.ravel()
        fn = franke(xn, yn)
        X = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:, 0] = 1
        linreg = linregOwn(method='lasso')
        beta = linreg.fit(X, fn, lambda_=0.1)

        xnew = np.linspace(0, 1, np.size(x_new))
        ynew = np.linspace(0, 1, np.size(x_new))
        Xnew, Ynew = np.meshgrid(xnew, ynew)
        F_true = franke(Xnew, Ynew)

        xn = Xnew.ravel()
        yn = Ynew.ravel()
        xb_new = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(xb_new)
        xb_new = scaler.transform(xb_new)
        xb_new[:, 0] = 1

        f_predict = np.dot(xb_new, beta)
        F_predict = f_predict.reshape(F_true.shape)

    #Plot the Franke Function
    fig = plt.figure()
    ax = fig.gca(projection='3d')  ##get current axis
    ## antialiased controls the transparency of the surface

    if method == 'ols':
        if absolute_error == True:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   abs(F_predict - F_true),
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
        else:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   F_predict,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
    if method == 'ridge':
        if absolute_error == True:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   abs(F_predict - F_true),
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
        else:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   F_predict,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
    if method == 'lasso':
        if absolute_error == True:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   abs(F_predict - F_true),
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
        else:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   F_predict,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)

    #Customize z axis
    if franke_ == True:
        surf = ax.plot_surface(x,
                               y,
                               f,
                               cmap='coolwarm',
                               linewidth=0,
                               antialiased=False)  ## colormap is coolwarm,
        ax.set_title('Franke function without noise')
        if (noise):
            ax.set_title('Franke function with noise')

    ax.set_zlim(-0.10, 1.4)
    ax.zaxis.set_major_locator(LinearLocator(5))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
    ax.view_init(30, 45)
    #Labeling axes and title

    if method == 'ols':
        ax.set_title('OLS Fit')
    if method == 'ridge':
        ax.set_title('Ridge Fit')
    if method == 'lasso':
        ax.set_title('Lasso Fit')

    ax.set_xlabel('X')
    ax.set_ylabel('Y')

    #Add colour bar
    fig.colorbar(surf, shrink=0.5, aspect=0.5)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'franke_abs_lasso.png'), transparent=True, bbox_inches='tight')
    return plt.show()
Esempio n. 7
0
def fit_terrain(plot=False):
    """
    Fits OLS, Ridge and Lasso on the terrain data and plots the fit 
    Before fitting, divides the data into train and test data and scales
    
    Arguments:
    plotting: Binary type, accepting arguments True/False. If True, plots the fit.
    """
    x1_train, x2_train, y_train, x1_test, x2_test, y_test = data(
        image_number=2, plotting=False)

    for method in ['ols', 'ridge', 'lasso']:
        lambda_ = 0.1
        linreg = linregOwn(method=method)

        X_train = designMatrix(x1_train, x2_train)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1

        if method == 'ols':
            linreg.fit(X_train, y_train, lambda_=0)
        else:
            linreg.fit(X_train, y_train, lambda_=lambda_)

        X_test = designMatrix(x1_test, x2_test)
        x_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        linreg.predict(X_test)
        print(linreg.MSE(y_test))

        if plot:
            x = np.linspace(0, 1, 60)
            y = np.copy(x)
            XX, YY = np.meshgrid(x, y)
            print(XX.shape)
            ZZ = np.reshape(linreg.yHat, XX.shape)

            fig = plt.figure()
            ax = fig.gca(projection='3d')
            ax.plot_surface(XX,
                            YY,
                            ZZ,
                            cmap=cm.coolwarm,
                            linewidth=0,
                            antialiased=False)
            ax.set_zlim(-0.10, 1.40)
            ax.zaxis.set_major_locator(LinearLocator(5))
            ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
            ax.view_init(30, 45 + 90)
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', method+'terrain.png'), transparent=True, bbox_inches='tight')
            plt.show()

    if plot:  ##Plots the extracted test data
        x = np.linspace(0, 1, 60)
        y = np.copy(x)
        XX, YY = np.meshgrid(x, y)
        ZZ = np.reshape(y_test, XX.shape)

        fig = plt.figure()
        ax = fig.gca(projection='3d')
        ax.plot_surface(XX,
                        YY,
                        ZZ,
                        cmap=cm.coolwarm,
                        linewidth=0,
                        antialiased=False)
        ax.set_zlim(-0.10, 1.40)
        ax.zaxis.set_major_locator(LinearLocator(5))
        ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
        ax.view_init(30, 45 + 90)
        #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'test_terrain.png'), transparent=True, bbox_inches='tight')
        plt.show()
Esempio n. 8
0
def MSE_terrain():
    """
    Plots the mean squared error (MSE)  for OLS, Lasso and Ridge methods on the test data set for different values of
    the shrinkage parameter. Shrinkage parameter is ignored when OLS is used for fitting the data
    """
    x1_train, x2_train, y_train, x1_test, x2_test, y_test = data(
        image_number=2, plotting=False)

    degree = 10
    linreg = linregOwn(method='ols')  ## Fit and predict ols
    X_train = designMatrix(x1_train, x2_train, degree)
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_train[:, 0] = 1
    linreg.fit(X_train, y_train)

    X_test = designMatrix(x1_test, x2_test, degree)
    X_test = scaler.transform(X_test)
    X_test[:, 0] = 1
    linreg.predict(X_test)
    ols_MSE = linreg.MSE(y_test)
    ols_MSE = np.array([ols_MSE, ols_MSE])
    ols_lambda = np.array([1e-4, 1])  ##lambda just for plotting

    ###Choose lambda for ridge and fit and compute MSE on the test data
    ridge_lambda = np.logspace(-4, 0, 10)
    ridge_MSE = []

    for lambda_ in ridge_lambda:
        print("ridge " + str(lambda_))

        linreg = linregOwn(method='ridge')

        X_train = designMatrix(x1_train, x2_train, degree)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        linreg.fit(X_train, y_train, lambda_)

        X_test = designMatrix(x1_test, x2_test, degree)
        X_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        linreg.predict(X_test)
        yHat = linreg.predict(X_test)
        ridge_MSE.append(np.sum((y_test - yHat)**2) / len(y_test))

    ridge_MSE = np.array(ridge_MSE)

    ##Choose lambda for lasso and fit and compute MSE on the test data
    lasso_lambda = np.logspace(-4, 0, 10)
    lasso_MSE = []

    for lambda_ in lasso_lambda:
        print("lasso " + str(lambda_))
        linreg = linregOwn(method='lasso')

        X_train = designMatrix(x1_train, x2_train, degree)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        linreg.fit(X_train, y_train, lambda_)

        X_test = designMatrix(x1_test, x2_test, degree)
        X_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        yHat = linreg.predict(X_test)
        lasso_MSE.append(np.sum((y_test - yHat)**2) / len(y_test))

    lasso_MSE = np.array(lasso_MSE)

    ######################################################## plot
    plt.rc('text', usetex=True)

    plt.loglog(ols_lambda,
               ols_MSE,
               'k--o',
               markersize=1,
               linewidth=1,
               label=r'OLS')
    plt.loglog(ridge_lambda,
               ridge_MSE,
               'r-o',
               markersize=1,
               linewidth=3,
               label=r'Ridge')
    plt.loglog(lasso_lambda,
               lasso_MSE,
               'b-o',
               markersize=1,
               linewidth=1,
               label=r'Lasso')

    plt.xlabel(r"$\lambda$", fontsize=10)
    plt.ylabel(r"MSE", fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2)
    plt.legend(fontsize=10)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'MSE_lambda_terrain.png'), transparent=True, bbox_inches='tight')
    plt.show()