예제 #1
0
def part_e(plotting=False):
    x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False)

    for method in ['ols', 'ridge', 'lasso']:
        designMatrix = DesignMatrix('polynomial2D', 10)
        leastSquares = LeastSquares(backend='manual', method=method)
        leastSquares.setLambda(1e-3)
        if method == 'lasso':
            leastSquares.setLambda(1e-4)

        X = designMatrix.getMatrix(x_train)
        leastSquares.fit(X, y_train)

        X_test = designMatrix.getMatrix(x_test)
        leastSquares.predict()
        leastSquares.y = y_test
        print(leastSquares.MSE())

        if plotting:
            x = np.linspace(0, 1, 60)
            y = np.copy(x)
            XX, YY = np.meshgrid(x, y)
            ZZ = np.reshape(leastSquares.yHat, XX.shape)

            fig = plt.figure()
            ax = fig.gca(projection='3d')
            ax.plot_surface(XX,
                            YY,
                            ZZ,
                            cmap=cm.coolwarm,
                            linewidth=0,
                            antialiased=False)
            ax.set_zlim(-0.10, 1.40)
            ax.zaxis.set_major_locator(LinearLocator(5))
            ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
            ax.view_init(30, 45 + 90)
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', method+'terrain.png'), transparent=True, bbox_inches='tight')
            plt.show()
    if plotting:
        x = np.linspace(0, 1, 60)
        y = np.copy(x)
        XX, YY = np.meshgrid(x, y)
        ZZ = np.reshape(y_test, XX.shape)

        fig = plt.figure()
        ax = fig.gca(projection='3d')
        ax.plot_surface(XX,
                        YY,
                        ZZ,
                        cmap=cm.coolwarm,
                        linewidth=0,
                        antialiased=False)
        ax.set_zlim(-0.10, 1.40)
        ax.zaxis.set_major_locator(LinearLocator(5))
        ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
        ax.view_init(30, 45 + 90)
        #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'test_terrain.png'), transparent=True, bbox_inches='tight')
        plt.show()
예제 #2
0
def test_bootstrap_resample():
    """Tests the resample method of the Bootstrap class

    Tests comprise of simple resampling cases for which we can evalue 
    the exact answer by hand.
    """

    # All data is the same, variance should be zero.
    OLS = LeastSquares()
    DM = DesignMatrix('polynomial', 3)
    bootstrap = Bootstrap(OLS, DM)

    x = np.ones(10) * 2.25
    y = x**3

    #bootstrap.resample(x, y, 10)

    # This fails with an raise LinAlgError("Singular matrix") error on
    # TravisCI, but passes locally. Removing for now.
    #assert bootstrap.betaVariance == pytest.approx(np.zeros(4), abs=1e-15)

    # Ensure that larger noise in the data set gives larger computed
    # variance in the beta values from resampling.
    functions = {
        0: lambda x: np.sin(x),
        1: lambda x: np.cos(x),
        2: lambda x: np.sin(2 * x),
        3: lambda x: np.cos(2 * x),
        4: lambda x: np.sin(3 * x),
        5: lambda x: np.cos(3 * x),
        6: lambda x: np.sin(4 * x),
        7: lambda x: np.cos(4 * x),
        8: lambda x: np.sin(5 * x),
        9: lambda x: np.cos(5 * x),
        10: lambda x: np.sin(x)**2,
        11: lambda x: np.cos(x)**2,
        12: lambda x: np.sin(2 * x)**2,
        13: lambda x: np.cos(2 * x)**2,
        14: lambda x: np.sin(3 * x)**2,
        15: lambda x: np.cos(3 * x)**2,
    }
    DM = DesignMatrix(lambda j, x: functions[j](x), 9)
    OLS = LeastSquares()
    bootstrap = Bootstrap(OLS, DM)
    N = 100
    x = np.linspace(0, 2 * np.pi, N)
    meanBetaVariance = np.zeros(6)

    ind = 0
    for noiseScale in [0.0, 0.1, 1.0]:
        y = np.sin(1.5 * x) - 0.5 * np.cos(2 * x)**2 + np.random.normal(
            0, noiseScale, N)
        bootstrap.resample(x, y, 100)
        meanBetaVariance[ind] = np.mean(bootstrap.betaVariance)
        if ind > 0:
            assert meanBetaVariance[ind - 1] < meanBetaVariance[ind]
        ind += 1
예제 #3
0
def plot_MSE_R2() :
    leastSquares = LeastSquares(backend='manual')

    N = int(1e4)
    x = np.random.rand(N)
    y = np.random.rand(N)
    x_data = np.zeros(shape=(N,2))
    x_data[:,0] = x
    x_data[:,1] = y
    y_data = np.zeros(shape=(N))

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y) :    
        N = x_data.shape[0]
        for i in range(N) :
            y[i] = franke(x_data[i,0], x_data[i,1])

    computeFrankeValues(x_data, y_data)

    p_max = 10
    p   = [i for i in range(2, p_max+1)]
    R2  = [None for i in range(2, p_max+1)]
    MSE = [None for i in range(2, p_max+1)]

    for degree in p :
        designMatrix = DesignMatrix('polynomial2D', degree)
        X = designMatrix.getMatrix(x_data) 
        leastSquares.fit(X, y_data)
        _ = leastSquares.predict()

        R2[degree-2] = leastSquares.R2()
        MSE[degree-2] = leastSquares.MSE()

        print(R2[degree-2])
        print(MSE[degree-2])

    p = np.array(p)

    plt.semilogy(p, 1-np.array(R2),'b-o', markersize=3)
    plt.rc('text', usetex=True)
    plt.xlabel(r"$p$", fontsize=10)
    plt.ylabel(r"$1-(R^2$ score$)$", fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_R2.png'), transparent=True, bbox_inches='tight')
    plt.show()

    plt.figure()
    ax = plt.gca()
    plt.semilogy(p, MSE, 'r-o', markersize=3)

    plt.rc('text', usetex=True)
    plt.xlabel(r"$p$", fontsize=10)
    plt.ylabel(r"MSE", fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_MSE.png'), transparent=True, bbox_inches='tight')
    plt.show()
예제 #4
0
def test_DesignMatrix_polynomial2D():
    """Tests the polynomial2D method of the DesignMatrix class

    The tests comprise setting up design matrices of different 
    polynomials orders and comparing to manually setup matrices.
    """
    x = np.array([[2.0, 3.0], [4.0, 5.0], [6.0, 7.0], [8.0, 9.0]])
    DM = DesignMatrix('polynomial2D', 2)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0, 3.0, 2.0**2, 2.0 * 3.0, 3.0**2],
                       [1.0, 4.0, 5.0, 4.0**2, 4.0 * 5.0, 5.0**2],
                       [1.0, 6.0, 7.0, 6.0**2, 6.0 * 7.0, 7.0**2],
                       [1.0, 8.0, 9.0, 8.0**2, 8.0 * 9.0, 9.0**2]])

    assert X == pytest.approx(X_true, abs=1e-15)
예제 #5
0
def test_DesignMatrix_function():
    """Tests the function method of the DesignMatrix class

    The tests comprise setting up design matrices with different
    functions and comparing to manually setup matrices.
    """
    class f1:
        def __init__(self, degree):
            self.degree = degree

        def __call__(self, i, x):
            if i > self.degree:
                raise ValueError(
                    "Specified function index is larger than the number of available functions."
                )
            if i == 0:
                return self._f0(x)
            elif i == 1:
                return self._f1(x)
            elif i == 2:
                return self._f2(x)
            elif i == 3:
                return self._f3(x)
            elif i == 4:
                return self._f4(x)
            elif i == 5:
                return self._f5(x)
            elif i == 6:
                return self._f6(x)

        def _f1(self, x):
            return np.cos(x)

        def _f2(self, x):
            return np.sin(x)

        def _f3(self, x):
            return np.tan(x)

        def _f4(self, x):
            return np.cosh(x)

        def _f5(self, x):
            return np.sinh(x)

        def _f6(self, x):
            return np.tanh(x)

    # 1 function
    numberOfFunctions = 1
    f = f1(numberOfFunctions)
    x = np.array([np.pi / 2])
    DM = DesignMatrix(f, numberOfFunctions)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, np.cos(np.pi / 2)]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # 2 functions
    numberOfFunctions = 2
    f = f1(numberOfFunctions)
    x = np.array([np.pi / 2, np.pi / 3])
    DM = DesignMatrix(f, numberOfFunctions)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, np.cos(np.pi / 2),
                        np.sin(np.pi / 2)],
                       [1.0, np.cos(np.pi / 3),
                        np.sin(np.pi / 3)]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # 3 functions
    numberOfFunctions = 3
    f = f1(numberOfFunctions)
    x = np.array([np.pi / 2, np.pi / 3, np.pi / 4, np.pi / 5, np.pi / 6])
    DM = DesignMatrix(f, numberOfFunctions)
    X = DM.getMatrix(x)
    X_true = np.array(
        [[1.0, np.cos(np.pi / 2),
          np.sin(np.pi / 2),
          np.tan(np.pi / 2)],
         [1.0, np.cos(np.pi / 3),
          np.sin(np.pi / 3),
          np.tan(np.pi / 3)],
         [1.0, np.cos(np.pi / 4),
          np.sin(np.pi / 4),
          np.tan(np.pi / 4)],
         [1.0, np.cos(np.pi / 5),
          np.sin(np.pi / 5),
          np.tan(np.pi / 5)],
         [1.0, np.cos(np.pi / 6),
          np.sin(np.pi / 6),
          np.tan(np.pi / 6)]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # 4 functions
    numberOfFunctions = 4
    f = f1(numberOfFunctions)
    x = np.array([np.pi / 2, np.pi / 3])
    DM = DesignMatrix(f, numberOfFunctions)
    X = DM.getMatrix(x)
    X_true = np.array([[
        1.0,
        np.cos(np.pi / 2),
        np.sin(np.pi / 2),
        np.tan(np.pi / 2),
        np.cosh(np.pi / 2)
    ],
                       [
                           1.0,
                           np.cos(np.pi / 3),
                           np.sin(np.pi / 3),
                           np.tan(np.pi / 3),
                           np.cosh(np.pi / 3)
                       ]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # 6 functions
    numberOfFunctions = 6
    f = f1(numberOfFunctions)
    x = np.array([
        np.pi / 2, np.pi / 3, np.pi / 4, np.pi / 5, np.pi / 6, np.pi / 7,
        np.pi / 8, np.pi / 9
    ])
    DM = DesignMatrix(f, numberOfFunctions)
    X = DM.getMatrix(x)
    X_true = np.array([[
        1.0,
        np.cos(np.pi / 2),
        np.sin(np.pi / 2),
        np.tan(np.pi / 2),
        np.cosh(np.pi / 2),
        np.sinh(np.pi / 2),
        np.tanh(np.pi / 2)
    ],
                       [
                           1.0,
                           np.cos(np.pi / 3),
                           np.sin(np.pi / 3),
                           np.tan(np.pi / 3),
                           np.cosh(np.pi / 3),
                           np.sinh(np.pi / 3),
                           np.tanh(np.pi / 3)
                       ],
                       [
                           1.0,
                           np.cos(np.pi / 4),
                           np.sin(np.pi / 4),
                           np.tan(np.pi / 4),
                           np.cosh(np.pi / 4),
                           np.sinh(np.pi / 4),
                           np.tanh(np.pi / 4)
                       ],
                       [
                           1.0,
                           np.cos(np.pi / 5),
                           np.sin(np.pi / 5),
                           np.tan(np.pi / 5),
                           np.cosh(np.pi / 5),
                           np.sinh(np.pi / 5),
                           np.tanh(np.pi / 5)
                       ],
                       [
                           1.0,
                           np.cos(np.pi / 6),
                           np.sin(np.pi / 6),
                           np.tan(np.pi / 6),
                           np.cosh(np.pi / 6),
                           np.sinh(np.pi / 6),
                           np.tanh(np.pi / 6)
                       ],
                       [
                           1.0,
                           np.cos(np.pi / 7),
                           np.sin(np.pi / 7),
                           np.tan(np.pi / 7),
                           np.cosh(np.pi / 7),
                           np.sinh(np.pi / 7),
                           np.tanh(np.pi / 7)
                       ],
                       [
                           1.0,
                           np.cos(np.pi / 8),
                           np.sin(np.pi / 8),
                           np.tan(np.pi / 8),
                           np.cosh(np.pi / 8),
                           np.sinh(np.pi / 8),
                           np.tanh(np.pi / 8)
                       ],
                       [
                           1.0,
                           np.cos(np.pi / 9),
                           np.sin(np.pi / 9),
                           np.tan(np.pi / 9),
                           np.cosh(np.pi / 9),
                           np.sinh(np.pi / 9),
                           np.tanh(np.pi / 9)
                       ]])
    assert X == pytest.approx(X_true, abs=1e-15)
예제 #6
0
def test_DesignMatrix_polynomial():
    """Tests the polynomial method of the DesignMatrix class

    The tests comprise setting up design matrices of different 
    polynomials orders and comparing to manually setup matrices.
    """
    # Degree 1 polynomial
    x = np.array([2.0])
    DM = DesignMatrix('polynomial', 1)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # Degree 2 polynomial
    x = np.array([2.0, 3.0])
    DM = DesignMatrix('polynomial', 2)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0, 4.0], [1.0, 3.0, 9.0]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # Degree 3 polynomial
    x = np.array([2.0, 3.0, 4.0])
    DM = DesignMatrix('polynomial', 3)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0, 4.0, 8.0], [1.0, 3.0, 9.0, 27.0],
                       [1.0, 4.0, 16.0, 64.0]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # Degree 4 polynomial
    x = np.array([2.0, 3.0, 4.0, 5.0])
    DM = DesignMatrix('polynomial', 4)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0], [1.0, 3.0, 9.0, 27.0, 81.0],
                       [1.0, 4.0, 16.0, 64.0, 256.0],
                       [1.0, 5.0, 25.0, 125.0, 625.0]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # Degree 5 polynomial
    x = np.array([2.0, 3.0, 4.0, 5.0, 6.0])
    DM = DesignMatrix('polynomial', 5)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0, 32.0],
                       [1.0, 3.0, 9.0, 27.0, 81.0, 243.0],
                       [1.0, 4.0, 16.0, 64.0, 256.0, 1024.0],
                       [1.0, 5.0, 25.0, 125.0, 625.0, 3125.0],
                       [1.0, 6.0, 36.0, 216.0, 1296.0, 7776.0]])
    assert X == pytest.approx(X_true, abs=1e-15)

    # Degree 6 polynomial
    x = np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
    DM = DesignMatrix('polynomial', 6)
    X = DM.getMatrix(x)
    X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
                       [1.0, 3.0, 9.0, 27.0, 81.0, 243.0, 729.0],
                       [1.0, 4.0, 16.0, 64.0, 256.0, 1024.0, 4096.0],
                       [1.0, 5.0, 25.0, 125.0, 625.0, 3125.0, 15625.0],
                       [1.0, 6.0, 36.0, 216.0, 1296.0, 7776.0, 46656.0],
                       [1.0, 7.0, 49.0, 343.0, 2401.0, 16807.0, 117649.0]])
    assert X == pytest.approx(X_true, abs=1e-15)
예제 #7
0
            np.mean(MSE_train)
        ]
        return averages


if __name__ == '__main__':

    Degree = 1 + np.arange(20)

    train_MSE = []
    test_MSE = []
    print("#########################################################")
    for degree in Degree:

        degree = int(degree)
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(method="ridge", backend='manual')
        Lambda = 4
        leastSquares.setLambda(Lambda)
        crossvalidation = CrossValidation(leastSquares, designMatrix)

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N, 2))
        x_data[:, 0] = x
        x_data[:, 1] = y
        y_data = np.zeros(shape=(N))

        X = designMatrix.getMatrix(x_data)
        XT_X = np.dot(X.T, X)
예제 #8
0
def part_a(plotting=False) :
    MSE_degree          = []
    R2_degree           = []
    betaVariance_degree = []

    for degree in [2,3,4,5]: #,6,7,8,9] :
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual')
        bootstrap    = Bootstrap(leastSquares, designMatrix)

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N,2))
        x_data[:,0] = x
        x_data[:,1] = y
        y_data = np.zeros(shape=(N))

        @jit(nopython=True, cache=True)
        def computeFrankeValues(x_data, y) :    
            N = x_data.shape[0]
            for i in range(N) :
                y[i] = franke(x_data[i,0], x_data[i,1])

        computeFrankeValues(x_data, y_data)
        bootstrap.resample(x_data, y_data, 1000)
        
        MSE_degree.         append(leastSquares.MSE())
        R2_degree.          append(leastSquares.R2())
        betaVariance_degree.append(bootstrap.betaVariance)
        if plotting :
            print("MSE: ", MSE_degree[-1])
            print("R2:  ", R2_degree[-1])
            print("Beta Variance: ")
            for b in betaVariance_degree[-1] : print(b)
            print("Beta: ")
            for b in leastSquares.beta : print(b)
            print(" ")

            M = 100
            fig = plt.figure()
            ax = fig.gca(projection='3d')

            x = np.linspace(0, 1, M)
            y = np.linspace(0, 1, M)
            X, Y = np.meshgrid(x,y)
            x_data = np.vstack([X.ravel(), Y.ravel()]).T
            
            # When plotting the Franke function itself, we use these lines.
            yy_data = np.zeros(shape=(x_data.data.shape[0]))
            computeFrankeValues(x_data, yy_data)

            # When plotting the linear regression model:
            XX = designMatrix.getMatrix(x_data)
            leastSquares.X = XX
            y_data = leastSquares.predict()

            Z = np.reshape(y_data.T, X.shape)
            ZF = np.reshape(yy_data.T, X.shape)

            #ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0, antialiased=False)
            ax.plot_surface(X,Y,abs(Z-ZF),cmap=cm.coolwarm,linewidth=0, antialiased=False)
            ax.set_zlim(-0.10, 1.40)
            ax.zaxis.set_major_locator(LinearLocator(5))
            ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
            ax.view_init(30, 45)

            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'franke.png'), transparent=True, bbox_inches='tight')
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'.png'), transparent=True, bbox_inches='tight')
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'_diff.png'), transparent=True, bbox_inches='tight')
            plt.show()

            print("\nMSE :")
            print(MSE_degree)
            print("\nR2 :")
            print(R2_degree)
            print("\nσ²(β) :")
            print(betaVariance_degree)

    return MSE_degree, R2_degree, betaVariance_degree
예제 #9
0
def fit_franke_noise() :

    R2           = []
    MSE          = []

    R2_noise           = []
    MSE_noise          = []
    beta_noise         = []
    betaVariance_noise = []

    noise = np.logspace(-4,0,50)
    k = 1

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y) :    
        N = x_data.shape[0]
        for i in range(N) :
            y[i] = franke(x_data[i,0], x_data[i,1])
    
    for eta in noise :
        designMatrix = DesignMatrix('polynomial2D', 10)
        leastSquares = LeastSquares(backend='manual')
        bootstrap    = Bootstrap(leastSquares, designMatrix)

        N = int(1e5)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N,2))
        x_data[:,0] = x
        x_data[:,1] = y
        y_data = np.zeros(shape=(N))
        computeFrankeValues(x_data, y_data)
        y_data_noise = y_data +  eta * np.random.standard_normal(size=N)

        bootstrap.resample(x_data, y_data_noise, k)
        

        MSE_noise.         append(leastSquares.MSE())
        R2_noise.          append(leastSquares.R2())
        beta_noise.        append(bootstrap.beta)
        betaVariance_noise.append(bootstrap.betaVariance)

        leastSquares.y = y_data
        MSE.append(leastSquares.MSE())
        R2. append(leastSquares.R2())
    """
    betaVariance_noise = np.array(betaVariance_noise)
    for beta in betaVariance_noise :
        print(beta)

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
              '#bcbd22', '#17becf']
    for i in range(6) :
        plt.loglog(noise, betaVariance_noise[:,i], colors[i]+'-o', markersize=2)
        
    
    plt.rc('text', usetex=True)
    #plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
    ## for Palatino and other serif fonts use:
    #plt.rc('font',**{'family':'serif','serif':['Palatino']})    plt.xlabel(r"$p$", fontsize=16)
    plt.xlabel(r"noise scale $\eta$", fontsize=10)
    plt.ylabel(r"$ \sigma^2(\beta_j)$", fontsize=10)
    plt.legend([r"intercept", 
                r"$\beta_{x}$", 
                r"$\beta_{y}$", 
                r"$\beta_{x^2}$", 
                r"$\beta_{xy}$", 
                r"$\beta_{y^2}$"], fontsize=10)

    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_variance_OLS_noise.png'), transparent=True, bbox_inches='tight')
    #plt.show()

    """
    print(R2_noise)
    print(1-np.array(R2_noise))
    fig, ax1 = plt.subplots()
    ax1.loglog(noise, 1-np.array(R2_noise),'k-o',markersize=2)
    ax1.loglog(noise, 1-np.array(R2),'k--',markersize=2)
    plt.xlabel(r"noise scale $\eta$", fontsize=10)
    plt.ylabel(r"$1-R^2$", color='k', fontsize=10)

    ax2 = ax1.twinx()
    ax2.loglog(noise, np.array(MSE_noise), 'b-o',markersize=2)
    ax2.loglog(noise, np.array(MSE), 'b--',markersize=2)
    plt.ylabel(r"MSE", color='b', fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2,right=0.9)

    ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])
    ax2.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])
    ax2.get_yaxis().set_ticks([])
    
    plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight')
    plt.show()
예제 #10
0
def part_e_2():
    x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False)

    degree = 10

    designMatrix = DesignMatrix('polynomial2D', degree)
    leastSquares = LeastSquares(backend='manual', method='ols')
    X = designMatrix.getMatrix(x_train)
    leastSquares.fit(X, y_train)

    X_test = designMatrix.getMatrix(x_test)
    leastSquares.predict()
    leastSquares.y = y_test
    ols_MSE = leastSquares.MSE()
    print(ols_MSE)
    ols_MSE = np.array([ols_MSE, ols_MSE])
    ols_lambda = np.array([1e-5, 1])

    ridge_lambda = np.logspace(-5, 0, 20)
    ridge_MSE = []
    for lambda_ in ridge_lambda:
        print("ridge " + str(lambda_))

        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual', method='ridge')
        leastSquares.setLambda(lambda_)

        X = designMatrix.getMatrix(x_train)
        leastSquares.fit(X, y_train)

        X_test = designMatrix.getMatrix(x_test)
        leastSquares.predict()
        leastSquares.y = y_test
        ridge_MSE.append(leastSquares.MSE())
        print(leastSquares.MSE())

    ridge_MSE = np.array(ridge_MSE)

    lasso_lambda = np.logspace(-4, 0, 20)
    lasso_MSE = []
    for lambda_ in lasso_lambda:
        print("lasso " + str(lambda_))
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual', method='lasso')
        leastSquares.setLambda(lambda_)

        X = designMatrix.getMatrix(x_train)
        leastSquares.fit(X, y_train)

        X_test = designMatrix.getMatrix(x_test)
        leastSquares.predict()
        leastSquares.y = y_test
        lasso_MSE.append(leastSquares.MSE())

    lasso_MSE = np.array(ridge_MSE)

    ########################################################
    plt.rc('text', usetex=True)

    plt.loglog(ols_lambda,
               ols_MSE,
               'k--o',
               markersize=1,
               linewidth=1,
               label=r'OLS')
    plt.loglog(ridge_lambda,
               ridge_MSE,
               'r-o',
               markersize=1,
               linewidth=1,
               label=r'Ridge')
    plt.loglog(lasso_lambda,
               lasso_MSE,
               'b-o',
               markersize=1,
               linewidth=1,
               label=r'Lasso')

    plt.xlabel(r"$\lambda$", fontsize=10)
    plt.ylabel(r"MSE", fontsize=10)
    #plt.subplots_adjust(left=0.2,bottom=0.2)
    plt.legend(fontsize=10)
    plt.savefig(os.path.join(os.path.dirname(__file__), 'figures',
                             'lambda_terrain.png'),
                transparent=True,
                bbox_inches='tight')
    plt.show()
예제 #11
0
def part_b():

    R2 = []
    MSE = []

    R2_noise = []
    MSE_noise = []
    beta_noise = []
    betaVariance_noise = []

    noise = np.linspace(0, 1.0, 50)
    k = 1
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y):
        N = x_data.shape[0]
        for i in range(N):
            y[i] = franke(x_data[i, 0], x_data[i, 1])

    ind = -1
    for lambda_ in np.logspace(-2, 0, 3):
        ind += 1
        MSE_noise = []

        for eta in noise:
            designMatrix = DesignMatrix('polynomial2D', 10)
            if ind == 0:
                leastSquares = LeastSquares(backend='manual', method='ols')
            else:
                leastSquares = LeastSquares(backend='manual', method='ridge')

            leastSquares.setLambda(lambda_)
            bootstrap = Bootstrap(leastSquares, designMatrix)

            N = int(1000)
            x = np.random.rand(N)
            y = np.random.rand(N)
            x_data = np.zeros(shape=(N, 2))
            x_data[:, 0] = x
            x_data[:, 1] = y
            y_data = np.zeros(shape=(N))
            computeFrankeValues(x_data, y_data)
            y_data_noise = y_data + eta * np.random.standard_normal(size=N)

            bootstrap.resample(x_data, y_data_noise, k)

            MSE_noise.append(leastSquares.MSE())
            R2_noise.append(leastSquares.R2())
            beta_noise.append(bootstrap.beta)
            betaVariance_noise.append(bootstrap.betaVariance)

            # Different noise, test data
            N = int(1000)
            x = np.random.rand(N)
            y = np.random.rand(N)
            x_data = np.zeros(shape=(N, 2))
            x_data[:, 0] = x
            x_data[:, 1] = y
            y_data = np.zeros(shape=(N))
            computeFrankeValues(x_data, y_data)
            y_data_noise = y_data + eta * np.random.standard_normal(size=N)

            X = designMatrix.getMatrix(x_data)
            leastSquares.X = X
            leastSquares.predict()
            leastSquares.y = y_data_noise

            MSE.append(leastSquares.MSE())
            R2.append(leastSquares.R2())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

        if ind == 0:
            ax1.loglog(noise,
                       np.array(MSE_noise),
                       colors[ind] + '--',
                       markersize=1,
                       label=r"OLS")
        else:
            ax1.loglog(noise,
                       np.array(MSE_noise),
                       colors[ind] + '-',
                       markersize=1,
                       label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_))))
        plt.ylabel(r"MSE", fontsize=10)
        plt.xlabel(r"noise scale $\eta$", fontsize=10)
        plt.subplots_adjust(left=0.2, bottom=0.2)

        #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])

    ax1.legend()
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'MSE_ridge_noise.png'), transparent=True, bbox_inches='tight')
    plt.show()
예제 #12
0
def plot_beta_ridge():
    beta = []
    betaVariance = []
    MSE = []
    R2 = []

    k = 10000
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y):
        N = x_data.shape[0]
        for i in range(N):
            y[i] = franke(x_data[i, 0], x_data[i, 1])

    ind = -1
    lam = np.logspace(-3, 5, 20)

    for lambda_ in lam:
        if ind == 0:
            leastSquares = LeastSquares(backend='manual', method='ols')
        else:
            leastSquares = LeastSquares(backend='manual', method='ridge')

        designMatrix = DesignMatrix('polynomial2D', 3)
        bootstrap = Bootstrap(leastSquares, designMatrix)
        leastSquares.setLambda(lambda_)
        ind += 1

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N, 2))
        x_data[:, 0] = x
        x_data[:, 1] = y
        y_data = np.zeros(shape=(N))
        computeFrankeValues(x_data, y_data)
        eta = 1.0
        y_data_noise = y_data + eta * np.random.standard_normal(size=N)

        bootstrap.resample(x_data, y_data_noise, k)

        MSE.append(leastSquares.MSE())
        R2.append(leastSquares.R2())
        beta.append(bootstrap.beta)
        betaVariance.append(bootstrap.betaVariance)

        leastSquares.y = y_data
        MSE.append(leastSquares.MSE())
        R2.append(leastSquares.R2())

    beta = np.array(beta)
    betaVariance = np.array(betaVariance)

    monomial = [
        '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3'
    ]
    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]

    for i in range(10):
        plt.errorbar(lam[1:],
                     beta[1:, i],
                     yerr=2 * betaVariance[1:, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5,
                     label=r"$\beta_{%s}$" % (monomial[i]))
    plt.rc('text', usetex=True)
    plt.ylabel(r"$\beta_j$", fontsize=10)
    plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2)
    plt.legend(fontsize=8)

    for i in range(10):
        plt.errorbar(1e-3,
                     beta[0, i],
                     yerr=2 * betaVariance[0, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5)

    fig.gca().set_xscale('log')
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_ridge.png'), transparent=True, bbox_inches='tight')

    plt.show()
예제 #13
0
max_degree = 20
Degree = []

for i in range(1, max_degree + 1):
    Degree.append(i)

Bias_vec = []
Var_vec = []
MSE_vec = []

print("#########################################################")
for degree in Degree:

    degree = int(degree)
    designMatrix = DesignMatrix('polynomial2D', degree)

    X = designMatrix.getMatrix(X)
    XT_X = np.dot(X.T, X)
    print("det(X^T*X): %g" %
          (np.linalg.det(XT_X + Lambda * np.eye(XT_X.shape[0]))))

    X_test_deg = designMatrix.getMatrix(X_test)

    # The following (m x n_bootstraps) matrix holds the column vectors y_pred
    # for each bootstrap iteration.
    X_train = np.zeros(shape=(x1_train.shape[0], 2))
    n_bootstraps = 100
    y_pred = np.empty((y_test.shape[0], n_bootstraps))

    for i in range(n_bootstraps):