Ejemplo n.º 1
0
def plot_bias_var_tradeoff(ndegree=5, method='ols'):
    """
    Plots bias-variance tradeoff using either bootstrap or cross-validation
    
    Arguments:
    ndegree: integer type, complexity of the model, number of polynomials
    sampling_method: character type, accepts arguments 'bootstrap' or 'cv'
    method: character type, accepts arguments 'ols', 'ridge' or 'lasso'
    """

    n = 500  ##make synthetic data
    np.random.seed(18271)
    x1 = np.random.rand(n)
    np.random.seed(91837)
    x2 = np.random.rand(n)
    y = franke(x1, x2) + 0.1 * np.random.normal(0, 1, x1.size)
    bias = []
    var = []
    MSE = []

    for deg in range(1, ndegree + 1):
        bias_, var_, mse_, betavar_ = Bootstrap(x1,
                                                x2,
                                                y,
                                                degrees=deg,
                                                method=method)
        bias.append(bias_)
        var.append(var_)
        MSE.append(mse_)

    plot, ax = plt.subplots()
    plt.xlabel('Complexity (Order of polynomial)')
    plt.ylabel('MSE')

    if method == 'ols':
        plt.title('Bias-Variance tradeoff using bootstrap (OLS)')
    if method == 'ridge':
        plt.title('Bias-Variance tradeoff using bootstrap (Ridge)')
    if method == 'lasso':
        plt.title('Bias-Variance tradeoff using bootstrap (Lasso)')

    plt.plot(range(1, ndegree + 1), MSE, 'k-o', label='MSE')
    plt.plot(range(1, ndegree + 1), bias, 'b-o', label='Bias')
    plt.plot(range(1, ndegree + 1), var, 'r-o', label='Variance')
    #ax.axis([1,ndegree, 0, 1.1*np.max(var)])
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    plt.legend()
    plt.subplots_adjust(left=0.2, bottom=0.2, right=0.9)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'lasso_bias_var.png'), transparent=True, bbox_inches='tight')

    return plt.show()
Ejemplo n.º 2
0
def plot_MSE_Complexity(degree=5, graph=True):
    """
    Plots mean squared error (MSE) as a function of the complexity (i.e. polynomial degree) parameter 
    on the train and test data set. MSE is calculated using the OLS on the train and test data sets.
    
    Arguments:
    degree: integer type. complexity of the model (i.e. polynomial degree)
    graph: Binary type with inputs True/False. If True, plots the MSE on the train and test data
    """

    n = 300  ##Make synthetic data
    np.random.seed(18271)
    x1 = np.random.rand(n)
    np.random.seed(91837)
    x2 = np.random.rand(n)
    y = franke(x1, x2) + 0.1 * np.random.normal(0, 1, x1.size)

    ##split in train and test set
    x1_train, x1_test, x2_train, x2_test, y_train, y_test = train_test_split(
        x1, x2, y, test_size=0.2)
    MSE_train = []
    MSE_test = []

    ##fit OLS with polynomial of different degree and compute MSE on the train and test data(with scaling)

    for degs in range(1, degree + 1):
        X_train_ = designMatrix(x1_train, x2_train, degs)
        scaler = StandardScaler()
        scaler.fit(X_train_)
        X_train_ = scaler.transform(X_train_)
        X_train_[:, 0] = 1
        X_test_ = designMatrix(x1_test, x2_test, degs)
        X_test_ = scaler.transform(X_test_)
        X_test_[:, 0] = 1
        linreg = linregOwn()
        beta_ = linreg.fit(X_train_, y_train)
        pred_train = linreg.predict(X_train_)
        MSE_train_ = linreg.MSE(y_train)
        MSE_train.append(MSE_train_)

        pred_test_ = linreg.predict(X_test_)
        MSE_test_ = linreg.MSE(y_test)
        MSE_test.append(MSE_test_)

    print('-------------------------------------------------')
    print('MSE_test: {}'.format(np.round(MSE_test, 4)))
    print('MSE_train: {}'.format(np.round(MSE_train, 4)))
    print('The polynomial fit of degree {} performs best'.format(
        MSE_test.index(min(MSE_test)) + 1))
    print('-------------------------------------------------')

    if graph == True:
        plot, ax = plt.subplots()
        plt.xlabel('Complexity (Order of polynomial)')
        plt.ylabel('MSE')
        plt.title('Change in MSE depending on the complexity of the model')
        plt.plot(range(1, degree + 1),
                 np.round(MSE_train, 4),
                 'k--',
                 label='Training Sample')
        plt.plot(range(1, degree + 1),
                 np.round((MSE_test), 4),
                 'r-',
                 label='Test Sample')
        ax.axis([1, degree, 0, max(MSE_test)])
        ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
        plt.legend()
        plt.subplots_adjust(left=0.2, bottom=0.2, right=0.9)
        #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'MSE_train_test.png'), transparent=True, bbox_inches='tight')

    return plt.show()
Ejemplo n.º 3
0
def plot_franke_noise():
    """
    Plots mean squared error (MSE) and R2 score as a function of the noise scalor 
    (i.e. parameter controlling the amount of noise).
    
    Arguments:
    method: Character type. Activated only when method = 'ols', 'ridge', or 'lasso'. Else raises an error
    
    """
    ##Make synthetic data

    n = 1000
    np.random.seed(18271)
    x1 = np.random.rand(n)
    np.random.seed(91837)
    x2 = np.random.rand(n)
    y = franke(x1, x2) + 0.1 * np.random.normal(0, 1, n)

    R2 = []
    MSE = []
    R2_noise = []
    MSE_noise = []

    noise = np.logspace(-4, 0, 50)
    k = 1

    for eta in noise:
        y_data_noise = y + eta * np.random.standard_normal(size=y.size)
        linreg = linregOwn(method='ols')
        x1_train, x1_test, x2_train, x2_test, y_train, y_test, y_noise_train, y_noise_test = train_test_split(
            x1, x2, y, y_data_noise, test_size=0.35, random_state=42)
        X_train = designMatrix(x1_train, x2_train, 3)
        X_test = designMatrix(x1_test, x2_test, 3)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        X_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        linreg.fit(X_train, y_noise_train)
        linreg.predict(X_test)
        MSE_noise.append(linreg.MSE(y_noise_test))
        R2_noise.append(linreg.R2(y_noise_test))

        linreg_NOnoise = linregOwn()
        linreg_NOnoise.fit(X_train, y_train)
        linreg_NOnoise.predict(X_test)
        MSE.append(linreg.MSE(y_test))
        R2.append(linreg.R2(y_test))

    fig, ax1 = plt.subplots()
    ax1.loglog(noise, 1 - np.array(R2_noise), 'k-o', markersize=2)
    ax1.loglog(noise, 1 - np.array(R2), 'k--', markersize=2)
    plt.xlabel(r"noise scalor $\eta$", fontsize=10)
    plt.ylabel(r"$1-R^2$", color='k', fontsize=10)

    ax2 = ax1.twinx()
    ax2.loglog(noise, np.array(MSE_noise), 'b-o', markersize=2)
    ax2.loglog(noise, np.array(MSE), 'b--', markersize=2)
    plt.ylabel(r"MSE", color='b', fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2, right=0.9)

    ax1.set_ylim([
        0.95 * min(min(MSE_noise), min(R2_noise)),
        1.05 * (max(max(MSE_noise), max(R2_noise)))
    ])
    ax2.set_ylim([
        0.95 * min(min(MSE_noise), min(R2_noise)),
        1.05 * (max(max(MSE_noise), max(R2_noise)))
    ])
    ax2.get_yaxis().set_ticks([])

    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight')
    return plt.show()
Ejemplo n.º 4
0
def MSE_Ridge_Lasso(method='lasso'):
    """
    Plots mean squared error (MSE) as a function of the polyomial degree (complexity parameter p) 
    for different values of the shrinkage parameter
    when lasso and ridge are used as the methods
    
    Arguments:
    method: character type, activated only when method = 'ridge' or 'lasso'
    """

    R2_noise = []
    MSE_noise = []
    MSE = []
    R2 = []

    k = 1
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    ind = -1

    for lambda_ in np.logspace(-4, 0, 5):  ##10 is a default base
        ind += 1
        MSE_noise = []

        for deg in range(1, 18):
            if ind == 0:
                linreg = linregOwn(method='ols')
            else:
                linreg = linregOwn(method=method)

            ##Compute MSE using cross-validation. Might take some time before we get plots, should be optimized with jit
            ##but no time to do that for all classes

            n = int(1000)
            np.random.seed(18271)
            x1 = np.random.rand(n)
            np.random.seed(91837)
            x2 = np.random.rand(n)
            y_data = franke(x1, x2)
            y_data_noise = y_data + 0.1 * np.random.standard_normal(size=n)
            CV_instance = CrossValidation(linreg, designMatrix)
            means_noise = CV_instance.kFoldCV(x1,
                                              x2,
                                              y_data_noise,
                                              10,
                                              lambda_=lambda_,
                                              degree=deg)
            means = CV_instance.kFoldCV(x1,
                                        x2,
                                        y_data,
                                        10,
                                        lambda_=lambda_,
                                        degree=deg)

            MSE_noise.append(means_noise[0])
            MSE.append(means[0])

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

        if ind == 0:
            plt.plot(range(1, 18),
                     np.array(MSE_noise),
                     colors[ind] + '-o',
                     markersize=5,
                     label=r"OLS")
        else:
            plt.plot(range(1, 18),
                     np.array(MSE_noise),
                     colors[ind] + '-o',
                     markersize=1,
                     label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_))))

        plt.ylabel(r"MSE", fontsize=10)
        plt.xlabel(r"Polynomial degree $p$", fontsize=10)
        plt.subplots_adjust(left=0.2, bottom=0.2)

        #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])

    ax1.legend()
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'MSE_lasso_noise_2.png'), transparent=True, bbox_inches='tight')
    plt.show()
Ejemplo n.º 5
0
def plot_beta(method='ridge'):
    """
    Plots coefficients (beta) for ridge and lasso methods for different values of the shrinkage parameter (lambda)
    
    Arguments:
    method: character type, accepts arguments 'ridge' or 'lasso'
    """
    beta = []
    beta_variance = []

    k = 10000
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    ind = -1
    lam = np.logspace(-3, 5, 20)

    for lambda_ in lam:
        if ind == 0:
            linreg = linregOwn(method='ols')
        else:
            linreg = linregOwn(method=method)

        ind += 1
        n = int(500)
        np.random.seed(18271)
        x1 = np.random.rand(n)
        np.random.seed(91837)
        x2 = np.random.rand(n)
        y_data = franke(x1, x2)
        eta = 0.1
        y_data_noise = y_data + eta * np.random.standard_normal(size=n)
        x1_train, x1_test, x2_train, x2_test, y_train, y_test, y_noise_train, y_noise_test = train_test_split(
            x1, x2, y_data, y_data_noise, test_size=0.35, random_state=42)
        X_train = designMatrix(x1_train, x2_train, 3)
        X_test = designMatrix(x1_test, x2_test, 3)
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_train[:, 0] = 1
        x_test = scaler.transform(X_test)
        X_test[:, 0] = 1
        linreg.fit(X_train, y_noise_train, lambda_)
        linreg.predict(X_test)
        var, low, up = linreg.CI(y_test)

        beta.append(linreg.fit(X_train, y_noise_train,
                               lambda_))  ##Append lists together
        beta_variance.append(np.sqrt(var))

    beta = np.array(beta)
    print(beta)
    beta_variance = np.array(beta_variance)

    monomial = [
        '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3'
    ]
    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]

    for i in range(10):
        plt.errorbar(lam[1:],
                     beta[1:, i],
                     yerr=2 * beta_variance[1:, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5,
                     label=r"$\beta_{%s}$" % (monomial[i]))
    plt.rc('text', usetex=True)
    plt.ylabel(r"$\beta_j$", fontsize=10)
    plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2)
    plt.legend(fontsize=6)

    fig.gca().set_xscale('log')
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'beta_lasso.png'), transparent=True, bbox_inches='tight')
    plt.show()
Ejemplo n.º 6
0
def plot_franke(x,
                y,
                franke_=False,
                noise=False,
                scalor=0.05,
                method='ols',
                seed1=8172,
                lambda_=0.005,
                absolute_error=False):
    """
    Plots the franke function.Franke's function has two Gaussian peaks of different heights, 
    and a smaller dip. It is used as a test function in interpolation problems.
    The function is evaluated on the square xi ∈ [0, 1], for all i = 1, 2.
    
    Reference: Franke, R. (1979). A critical comparison of some methods for interpolation of 
    scattered data (No. NPS53-79-003). NAVAL POSTGRADUATE SCHOOL MONTEREY CA.
    
    
    Arguments:
    x:  1-dimensional numpy array (1D np.array)
    y:  1-dimensional numpy array (1D np.array)
    franke_: binary argument with inputs True/False. If 'True', plots the franke function
    noise: binary argument with inputs True/False. If 'True', plots the franke function with added noise. 
           Activated only when franke_ == True.
    scalor: float type,  controls the amount of noise to be added to the franke function. Activated only when
            noise == True.
    method: character input accepting 'ols', 'ridge', 'lasso'. Plots the corresponding model fit.
    seed1: float type. used for reproducable output
    lambda_: float type. Activated only when method = 'ridge' or 'lasso'. Controls the amount of shrinkage of the
             parameters. Higher number indicates higher shrinkage.
    absolute_error: Binary type with inputs True/False. If 'True', outputs a plot of absolute deviation of the true
                    franke values and the fit of the corresponding model. Activated only when method is either 'ols',
                    'ridge' or 'lasso
    """
    x, y = np.meshgrid(x, y)
    f = franke(x, y)  ##true franke values

    if (noise):  ##noisy franke values
        f = franke(x, y) + scalor * np.random.normal(0, 1, franke(x, y).shape)

    if method == 'ols':  ##fit and predict ols
        np.random.seed(seed1)
        x_new = np.random.rand(500)
        y_new = np.random.rand(500)
        xn = x_new.ravel()
        yn = y_new.ravel()
        fn = franke(xn, yn)
        X = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:, 0] = 1
        linreg = linregOwn(method='ols')
        beta = linreg.fit(X, fn)

        xnew = np.linspace(0, 1, np.size(x_new))
        ynew = np.linspace(0, 1, np.size(x_new))
        Xnew, Ynew = np.meshgrid(xnew, ynew)
        F_true = franke(Xnew, Ynew)

        xn = Xnew.ravel()
        yn = Ynew.ravel()
        xb_new = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(xb_new)
        xb_new = scaler.transform(xb_new)
        xb_new[:, 0] = 1

        f_predict = np.dot(xb_new, beta)
        F_predict = f_predict.reshape(F_true.shape)

    if method == 'ridge':  ##fit and predict ridge
        np.random.seed(seed1)
        x_new = np.random.rand(500)
        y_new = np.random.rand(500)
        xn = x_new.ravel()
        yn = y_new.ravel()
        fn = franke(xn, yn)
        X = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:, 0] = 1
        linreg = linregOwn(method='ridge')
        beta = linreg.fit(X, fn, lambda_=0.1)

        xnew = np.linspace(0, 1, np.size(x_new))
        ynew = np.linspace(0, 1, np.size(x_new))
        Xnew, Ynew = np.meshgrid(xnew, ynew)
        F_true = franke(Xnew, Ynew)

        xn = Xnew.ravel()
        yn = Ynew.ravel()
        xb_new = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(xb_new)
        xb_new = scaler.transform(xb_new)
        xb_new[:, 0] = 1

        f_predict = np.dot(xb_new, beta)
        F_predict = f_predict.reshape(F_true.shape)

    if method == 'lasso':  ##fit and predict lasso
        np.random.seed(seed1)
        x_new = np.random.rand(500)
        y_new = np.random.rand(500)
        xn = x_new.ravel()
        yn = y_new.ravel()
        fn = franke(xn, yn)
        X = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:, 0] = 1
        linreg = linregOwn(method='lasso')
        beta = linreg.fit(X, fn, lambda_=0.1)

        xnew = np.linspace(0, 1, np.size(x_new))
        ynew = np.linspace(0, 1, np.size(x_new))
        Xnew, Ynew = np.meshgrid(xnew, ynew)
        F_true = franke(Xnew, Ynew)

        xn = Xnew.ravel()
        yn = Ynew.ravel()
        xb_new = designMatrix(xn, yn)
        scaler = StandardScaler()
        scaler.fit(xb_new)
        xb_new = scaler.transform(xb_new)
        xb_new[:, 0] = 1

        f_predict = np.dot(xb_new, beta)
        F_predict = f_predict.reshape(F_true.shape)

    #Plot the Franke Function
    fig = plt.figure()
    ax = fig.gca(projection='3d')  ##get current axis
    ## antialiased controls the transparency of the surface

    if method == 'ols':
        if absolute_error == True:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   abs(F_predict - F_true),
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
        else:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   F_predict,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
    if method == 'ridge':
        if absolute_error == True:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   abs(F_predict - F_true),
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
        else:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   F_predict,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
    if method == 'lasso':
        if absolute_error == True:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   abs(F_predict - F_true),
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
        else:
            surf = ax.plot_surface(Xnew,
                                   Ynew,
                                   F_predict,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)

    #Customize z axis
    if franke_ == True:
        surf = ax.plot_surface(x,
                               y,
                               f,
                               cmap='coolwarm',
                               linewidth=0,
                               antialiased=False)  ## colormap is coolwarm,
        ax.set_title('Franke function without noise')
        if (noise):
            ax.set_title('Franke function with noise')

    ax.set_zlim(-0.10, 1.4)
    ax.zaxis.set_major_locator(LinearLocator(5))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
    ax.view_init(30, 45)
    #Labeling axes and title

    if method == 'ols':
        ax.set_title('OLS Fit')
    if method == 'ridge':
        ax.set_title('Ridge Fit')
    if method == 'lasso':
        ax.set_title('Lasso Fit')

    ax.set_xlabel('X')
    ax.set_ylabel('Y')

    #Add colour bar
    fig.colorbar(surf, shrink=0.5, aspect=0.5)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'Plots', 'franke_abs_lasso.png'), transparent=True, bbox_inches='tight')
    return plt.show()