Beispiel #1
0
def test_LeastSquares_R2() :
    """Tests the R2 score method of the Least Squares class

    The test is done with a known beta array, comparing results to a known
    MSE value.
    """
    N = 5
    P = 3
    x = np.linspace(0,1,N)
    random.seed(10)
    y = 3*x**2 - 9*x - 2.4*x**5 + 3.1
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0

    for j in range(1,P) :
        X[:,j] = x**j

    OLS = LeastSquares(backend='skl')
    beta_skl   = OLS.fit(X,y)
    R2_skl     = OLS.R2()

    OLS = LeastSquares(backend='manual')
    beta_manual = OLS.fit(X,y)

    # Ensure the manual and the skl fit both use the exact same beta 
    # values.
    OLS.beta   = beta_skl
    R2_manual  = OLS.R2()

    yHat = np.dot(X, beta_skl)
    R2_true = 1.0 - np.sum((y - yHat)**2) / np.sum((y - np.mean(y))**2)
    
    # beta: 
    #    2.98147321428571299151
    #   -6.48616071428570872826
    #   -1.66071428571428914012
    #
    # yHat = beta0 + beta1 x + beta2 x^2
    #    2.98147321428571299151
    #    1.25613839285714279370
    #   -0.67678571428571365765
    #   -2.81729910714285569640
    #   -5.16540178571428487686
    #
    # y = 3x^2 - 9x -2.4x^5 + 3.1
    #    3.10000000000000008882
    #    1.03515625000000000000
    #   -0.72500000000000008882
    #   -2.53203124999999973355
    #   -5.30000000000000071054
    #
    # R2 = 1.0 - sum(yHat - y)**2 / sum(yHat - mean(y))**2
    #    0.99605957942938250227

    assert R2_skl    == pytest.approx(R2_manual, abs=1e-15)
    assert R2_skl    == pytest.approx(R2_true, abs=1e-15)
    assert R2_manual == pytest.approx(R2_true, abs=1e-15)
Beispiel #2
0
def plot_MSE_R2() :
    leastSquares = LeastSquares(backend='manual')

    N = int(1e4)
    x = np.random.rand(N)
    y = np.random.rand(N)
    x_data = np.zeros(shape=(N,2))
    x_data[:,0] = x
    x_data[:,1] = y
    y_data = np.zeros(shape=(N))

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y) :    
        N = x_data.shape[0]
        for i in range(N) :
            y[i] = franke(x_data[i,0], x_data[i,1])

    computeFrankeValues(x_data, y_data)

    p_max = 10
    p   = [i for i in range(2, p_max+1)]
    R2  = [None for i in range(2, p_max+1)]
    MSE = [None for i in range(2, p_max+1)]

    for degree in p :
        designMatrix = DesignMatrix('polynomial2D', degree)
        X = designMatrix.getMatrix(x_data) 
        leastSquares.fit(X, y_data)
        _ = leastSquares.predict()

        R2[degree-2] = leastSquares.R2()
        MSE[degree-2] = leastSquares.MSE()

        print(R2[degree-2])
        print(MSE[degree-2])

    p = np.array(p)

    plt.semilogy(p, 1-np.array(R2),'b-o', markersize=3)
    plt.rc('text', usetex=True)
    plt.xlabel(r"$p$", fontsize=10)
    plt.ylabel(r"$1-(R^2$ score$)$", fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_R2.png'), transparent=True, bbox_inches='tight')
    plt.show()

    plt.figure()
    ax = plt.gca()
    plt.semilogy(p, MSE, 'r-o', markersize=3)

    plt.rc('text', usetex=True)
    plt.xlabel(r"$p$", fontsize=10)
    plt.ylabel(r"MSE", fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_MSE.png'), transparent=True, bbox_inches='tight')
    plt.show()
Beispiel #3
0
def part_a(plotting=False) :
    MSE_degree          = []
    R2_degree           = []
    betaVariance_degree = []

    for degree in [2,3,4,5]: #,6,7,8,9] :
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual')
        bootstrap    = Bootstrap(leastSquares, designMatrix)

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N,2))
        x_data[:,0] = x
        x_data[:,1] = y
        y_data = np.zeros(shape=(N))

        @jit(nopython=True, cache=True)
        def computeFrankeValues(x_data, y) :    
            N = x_data.shape[0]
            for i in range(N) :
                y[i] = franke(x_data[i,0], x_data[i,1])

        computeFrankeValues(x_data, y_data)
        bootstrap.resample(x_data, y_data, 1000)
        
        MSE_degree.         append(leastSquares.MSE())
        R2_degree.          append(leastSquares.R2())
        betaVariance_degree.append(bootstrap.betaVariance)
        if plotting :
            print("MSE: ", MSE_degree[-1])
            print("R2:  ", R2_degree[-1])
            print("Beta Variance: ")
            for b in betaVariance_degree[-1] : print(b)
            print("Beta: ")
            for b in leastSquares.beta : print(b)
            print(" ")

            M = 100
            fig = plt.figure()
            ax = fig.gca(projection='3d')

            x = np.linspace(0, 1, M)
            y = np.linspace(0, 1, M)
            X, Y = np.meshgrid(x,y)
            x_data = np.vstack([X.ravel(), Y.ravel()]).T
            
            # When plotting the Franke function itself, we use these lines.
            yy_data = np.zeros(shape=(x_data.data.shape[0]))
            computeFrankeValues(x_data, yy_data)

            # When plotting the linear regression model:
            XX = designMatrix.getMatrix(x_data)
            leastSquares.X = XX
            y_data = leastSquares.predict()

            Z = np.reshape(y_data.T, X.shape)
            ZF = np.reshape(yy_data.T, X.shape)

            #ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0, antialiased=False)
            ax.plot_surface(X,Y,abs(Z-ZF),cmap=cm.coolwarm,linewidth=0, antialiased=False)
            ax.set_zlim(-0.10, 1.40)
            ax.zaxis.set_major_locator(LinearLocator(5))
            ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
            ax.view_init(30, 45)

            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'franke.png'), transparent=True, bbox_inches='tight')
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'.png'), transparent=True, bbox_inches='tight')
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'_diff.png'), transparent=True, bbox_inches='tight')
            plt.show()

            print("\nMSE :")
            print(MSE_degree)
            print("\nR2 :")
            print(R2_degree)
            print("\nσ²(β) :")
            print(betaVariance_degree)

    return MSE_degree, R2_degree, betaVariance_degree
Beispiel #4
0
def fit_franke_noise() :

    R2           = []
    MSE          = []

    R2_noise           = []
    MSE_noise          = []
    beta_noise         = []
    betaVariance_noise = []

    noise = np.logspace(-4,0,50)
    k = 1

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y) :    
        N = x_data.shape[0]
        for i in range(N) :
            y[i] = franke(x_data[i,0], x_data[i,1])
    
    for eta in noise :
        designMatrix = DesignMatrix('polynomial2D', 10)
        leastSquares = LeastSquares(backend='manual')
        bootstrap    = Bootstrap(leastSquares, designMatrix)

        N = int(1e5)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N,2))
        x_data[:,0] = x
        x_data[:,1] = y
        y_data = np.zeros(shape=(N))
        computeFrankeValues(x_data, y_data)
        y_data_noise = y_data +  eta * np.random.standard_normal(size=N)

        bootstrap.resample(x_data, y_data_noise, k)
        

        MSE_noise.         append(leastSquares.MSE())
        R2_noise.          append(leastSquares.R2())
        beta_noise.        append(bootstrap.beta)
        betaVariance_noise.append(bootstrap.betaVariance)

        leastSquares.y = y_data
        MSE.append(leastSquares.MSE())
        R2. append(leastSquares.R2())
    """
    betaVariance_noise = np.array(betaVariance_noise)
    for beta in betaVariance_noise :
        print(beta)

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
              '#bcbd22', '#17becf']
    for i in range(6) :
        plt.loglog(noise, betaVariance_noise[:,i], colors[i]+'-o', markersize=2)
        
    
    plt.rc('text', usetex=True)
    #plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
    ## for Palatino and other serif fonts use:
    #plt.rc('font',**{'family':'serif','serif':['Palatino']})    plt.xlabel(r"$p$", fontsize=16)
    plt.xlabel(r"noise scale $\eta$", fontsize=10)
    plt.ylabel(r"$ \sigma^2(\beta_j)$", fontsize=10)
    plt.legend([r"intercept", 
                r"$\beta_{x}$", 
                r"$\beta_{y}$", 
                r"$\beta_{x^2}$", 
                r"$\beta_{xy}$", 
                r"$\beta_{y^2}$"], fontsize=10)

    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_variance_OLS_noise.png'), transparent=True, bbox_inches='tight')
    #plt.show()

    """
    print(R2_noise)
    print(1-np.array(R2_noise))
    fig, ax1 = plt.subplots()
    ax1.loglog(noise, 1-np.array(R2_noise),'k-o',markersize=2)
    ax1.loglog(noise, 1-np.array(R2),'k--',markersize=2)
    plt.xlabel(r"noise scale $\eta$", fontsize=10)
    plt.ylabel(r"$1-R^2$", color='k', fontsize=10)

    ax2 = ax1.twinx()
    ax2.loglog(noise, np.array(MSE_noise), 'b-o',markersize=2)
    ax2.loglog(noise, np.array(MSE), 'b--',markersize=2)
    plt.ylabel(r"MSE", color='b', fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2,right=0.9)

    ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])
    ax2.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])
    ax2.get_yaxis().set_ticks([])
    
    plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight')
    plt.show()
Beispiel #5
0
def part_b():

    R2 = []
    MSE = []

    R2_noise = []
    MSE_noise = []
    beta_noise = []
    betaVariance_noise = []

    noise = np.linspace(0, 1.0, 50)
    k = 1
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y):
        N = x_data.shape[0]
        for i in range(N):
            y[i] = franke(x_data[i, 0], x_data[i, 1])

    ind = -1
    for lambda_ in np.logspace(-2, 0, 3):
        ind += 1
        MSE_noise = []

        for eta in noise:
            designMatrix = DesignMatrix('polynomial2D', 10)
            if ind == 0:
                leastSquares = LeastSquares(backend='manual', method='ols')
            else:
                leastSquares = LeastSquares(backend='manual', method='ridge')

            leastSquares.setLambda(lambda_)
            bootstrap = Bootstrap(leastSquares, designMatrix)

            N = int(1000)
            x = np.random.rand(N)
            y = np.random.rand(N)
            x_data = np.zeros(shape=(N, 2))
            x_data[:, 0] = x
            x_data[:, 1] = y
            y_data = np.zeros(shape=(N))
            computeFrankeValues(x_data, y_data)
            y_data_noise = y_data + eta * np.random.standard_normal(size=N)

            bootstrap.resample(x_data, y_data_noise, k)

            MSE_noise.append(leastSquares.MSE())
            R2_noise.append(leastSquares.R2())
            beta_noise.append(bootstrap.beta)
            betaVariance_noise.append(bootstrap.betaVariance)

            # Different noise, test data
            N = int(1000)
            x = np.random.rand(N)
            y = np.random.rand(N)
            x_data = np.zeros(shape=(N, 2))
            x_data[:, 0] = x
            x_data[:, 1] = y
            y_data = np.zeros(shape=(N))
            computeFrankeValues(x_data, y_data)
            y_data_noise = y_data + eta * np.random.standard_normal(size=N)

            X = designMatrix.getMatrix(x_data)
            leastSquares.X = X
            leastSquares.predict()
            leastSquares.y = y_data_noise

            MSE.append(leastSquares.MSE())
            R2.append(leastSquares.R2())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

        if ind == 0:
            ax1.loglog(noise,
                       np.array(MSE_noise),
                       colors[ind] + '--',
                       markersize=1,
                       label=r"OLS")
        else:
            ax1.loglog(noise,
                       np.array(MSE_noise),
                       colors[ind] + '-',
                       markersize=1,
                       label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_))))
        plt.ylabel(r"MSE", fontsize=10)
        plt.xlabel(r"noise scale $\eta$", fontsize=10)
        plt.subplots_adjust(left=0.2, bottom=0.2)

        #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])

    ax1.legend()
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'MSE_ridge_noise.png'), transparent=True, bbox_inches='tight')
    plt.show()
Beispiel #6
0
def plot_beta_ridge():
    beta = []
    betaVariance = []
    MSE = []
    R2 = []

    k = 10000
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y):
        N = x_data.shape[0]
        for i in range(N):
            y[i] = franke(x_data[i, 0], x_data[i, 1])

    ind = -1
    lam = np.logspace(-3, 5, 20)

    for lambda_ in lam:
        if ind == 0:
            leastSquares = LeastSquares(backend='manual', method='ols')
        else:
            leastSquares = LeastSquares(backend='manual', method='ridge')

        designMatrix = DesignMatrix('polynomial2D', 3)
        bootstrap = Bootstrap(leastSquares, designMatrix)
        leastSquares.setLambda(lambda_)
        ind += 1

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N, 2))
        x_data[:, 0] = x
        x_data[:, 1] = y
        y_data = np.zeros(shape=(N))
        computeFrankeValues(x_data, y_data)
        eta = 1.0
        y_data_noise = y_data + eta * np.random.standard_normal(size=N)

        bootstrap.resample(x_data, y_data_noise, k)

        MSE.append(leastSquares.MSE())
        R2.append(leastSquares.R2())
        beta.append(bootstrap.beta)
        betaVariance.append(bootstrap.betaVariance)

        leastSquares.y = y_data
        MSE.append(leastSquares.MSE())
        R2.append(leastSquares.R2())

    beta = np.array(beta)
    betaVariance = np.array(betaVariance)

    monomial = [
        '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3'
    ]
    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]

    for i in range(10):
        plt.errorbar(lam[1:],
                     beta[1:, i],
                     yerr=2 * betaVariance[1:, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5,
                     label=r"$\beta_{%s}$" % (monomial[i]))
    plt.rc('text', usetex=True)
    plt.ylabel(r"$\beta_j$", fontsize=10)
    plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2)
    plt.legend(fontsize=8)

    for i in range(10):
        plt.errorbar(1e-3,
                     beta[0, i],
                     yerr=2 * betaVariance[0, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5)

    fig.gca().set_xscale('log')
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_ridge.png'), transparent=True, bbox_inches='tight')

    plt.show()