Beispiel #1
0
def test_bootstrap_resample():
    """Tests the resample method of the Bootstrap class

    Tests comprise of simple resampling cases for which we can evalue 
    the exact answer by hand.
    """

    # All data is the same, variance should be zero.
    OLS = LeastSquares()
    DM = DesignMatrix('polynomial', 3)
    bootstrap = Bootstrap(OLS, DM)

    x = np.ones(10) * 2.25
    y = x**3

    #bootstrap.resample(x, y, 10)

    # This fails with an raise LinAlgError("Singular matrix") error on
    # TravisCI, but passes locally. Removing for now.
    #assert bootstrap.betaVariance == pytest.approx(np.zeros(4), abs=1e-15)

    # Ensure that larger noise in the data set gives larger computed
    # variance in the beta values from resampling.
    functions = {
        0: lambda x: np.sin(x),
        1: lambda x: np.cos(x),
        2: lambda x: np.sin(2 * x),
        3: lambda x: np.cos(2 * x),
        4: lambda x: np.sin(3 * x),
        5: lambda x: np.cos(3 * x),
        6: lambda x: np.sin(4 * x),
        7: lambda x: np.cos(4 * x),
        8: lambda x: np.sin(5 * x),
        9: lambda x: np.cos(5 * x),
        10: lambda x: np.sin(x)**2,
        11: lambda x: np.cos(x)**2,
        12: lambda x: np.sin(2 * x)**2,
        13: lambda x: np.cos(2 * x)**2,
        14: lambda x: np.sin(3 * x)**2,
        15: lambda x: np.cos(3 * x)**2,
    }
    DM = DesignMatrix(lambda j, x: functions[j](x), 9)
    OLS = LeastSquares()
    bootstrap = Bootstrap(OLS, DM)
    N = 100
    x = np.linspace(0, 2 * np.pi, N)
    meanBetaVariance = np.zeros(6)

    ind = 0
    for noiseScale in [0.0, 0.1, 1.0]:
        y = np.sin(1.5 * x) - 0.5 * np.cos(2 * x)**2 + np.random.normal(
            0, noiseScale, N)
        bootstrap.resample(x, y, 100)
        meanBetaVariance[ind] = np.mean(bootstrap.betaVariance)
        if ind > 0:
            assert meanBetaVariance[ind - 1] < meanBetaVariance[ind]
        ind += 1
Beispiel #2
0
def test_LeastSquares_predict() :
    """Tests the predict method of the Least Squares class

    The test is done with a known beta array, comparing results to a known
    MSE value.
    """
    N = 5
    P = 3
    x = np.linspace(0,1,N)
    random.seed(10)
    y = 3*x**2 - 9*x - 2.4*x**5 - 3.1
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0

    for j in range(1,P) :
        X[:,j] = x**j

    OLS = LeastSquares(backend='skl')
    beta_skl    = OLS.fit(X,y)
    predict_skl = OLS.predict()

    OLS = LeastSquares(backend='manual')
    beta_manual    = OLS.fit(X,y)

    # Ensure the exact same beta value are used by both backend versions.
    OLS.beta       = beta_skl
    predict_manual = OLS.predict()

    assert (predict_manual == pytest.approx(predict_skl, abs=1e-15))
Beispiel #3
0
def test_LeastSquares_R2() :
    """Tests the R2 score method of the Least Squares class

    The test is done with a known beta array, comparing results to a known
    MSE value.
    """
    N = 5
    P = 3
    x = np.linspace(0,1,N)
    random.seed(10)
    y = 3*x**2 - 9*x - 2.4*x**5 + 3.1
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0

    for j in range(1,P) :
        X[:,j] = x**j

    OLS = LeastSquares(backend='skl')
    beta_skl   = OLS.fit(X,y)
    R2_skl     = OLS.R2()

    OLS = LeastSquares(backend='manual')
    beta_manual = OLS.fit(X,y)

    # Ensure the manual and the skl fit both use the exact same beta 
    # values.
    OLS.beta   = beta_skl
    R2_manual  = OLS.R2()

    yHat = np.dot(X, beta_skl)
    R2_true = 1.0 - np.sum((y - yHat)**2) / np.sum((y - np.mean(y))**2)
    
    # beta: 
    #    2.98147321428571299151
    #   -6.48616071428570872826
    #   -1.66071428571428914012
    #
    # yHat = beta0 + beta1 x + beta2 x^2
    #    2.98147321428571299151
    #    1.25613839285714279370
    #   -0.67678571428571365765
    #   -2.81729910714285569640
    #   -5.16540178571428487686
    #
    # y = 3x^2 - 9x -2.4x^5 + 3.1
    #    3.10000000000000008882
    #    1.03515625000000000000
    #   -0.72500000000000008882
    #   -2.53203124999999973355
    #   -5.30000000000000071054
    #
    # R2 = 1.0 - sum(yHat - y)**2 / sum(yHat - mean(y))**2
    #    0.99605957942938250227

    assert R2_skl    == pytest.approx(R2_manual, abs=1e-15)
    assert R2_skl    == pytest.approx(R2_true, abs=1e-15)
    assert R2_manual == pytest.approx(R2_true, abs=1e-15)
Beispiel #4
0
def test_LeastSquares_fit() :
    """Tests the fit method of the Least Squares class

    The tests comprise fitting of models to known data.
    """

    # Ensure fitting polynomials of order 1 through 5 to y(x) = x results
    # in the beta corresponding to the x term equal to 1.0 and all other
    # beta values zero.
    #
    # Secondly we test on y(x)=x + 2 to also make sure the intercept is 
    # calculated correctly.
    for intercept in [0, 2] :
        for method in ['manual', 'skl'] :
            N = 10
            x = np.linspace(0,1,N)
            y = x + intercept

            for i in range(2,5+1) :
                P = i
                X = np.zeros(shape=(N,P))
                X[:,0] = 1.0

                for j in range(1,P) :
                    X[:,j] = x**j

                OLS = LeastSquares(backend=method)
                beta = OLS.fit(X,y)
                
                assert beta[0] == pytest.approx(intercept, abs=1e-10)
                assert beta[1] == pytest.approx(1.0, abs=1e-10)
                for j in range(2,P) :
                    assert beta[j] == pytest.approx(0.0, abs=1e-10)



    # Ensure the backend='manual' and the backend='skl' versions of 
    # LeastSquares.fit(X,y) give the same result.
    N = 5
    P = 5
    x = np.linspace(0,1,N)
    y = x + x**2 - (1.0 - x)**5
    
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0
    for j in range(1,P) :
        X[:,j] = x**j

    OLS = LeastSquares(backend='manual')
    beta_manual = OLS.fit(X,y)

    OLS = LeastSquares(backend='skl')
    beta_skl    = OLS.fit(X,y)

    assert beta_manual == pytest.approx(beta_skl, abs=1e-10)
Beispiel #5
0
def test_LeastSquares_meanSquaredError() :
    """Tests the meanSquaredError method of the Least Squares class

    The test is done with a known beta array, comparing results to a known
    MSE value.
    """
    N = 5
    P = 3
    x = np.linspace(0,1,N)
    random.seed(10)
    y = 3*x**2 - 9*x - 2.4*x**5 + 3.1
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0

    for j in range(1,P) :
        X[:,j] = x**j

    OLS = LeastSquares(backend='skl')
    beta_skl    = OLS.fit(X,y)
    MSE_skl     = OLS.meanSquaredError()

    OLS = LeastSquares(backend='manual')
    beta_manual = OLS.fit(X,y)

    # Ensure the manual and the skl fit both use the exact same beta 
    # values.
    OLS.beta    = beta_skl
    MSE_manual  = OLS.meanSquaredError()

    # beta: 
    #    2.98147321428571299151
    #   -6.48616071428570872826
    #   -1.66071428571428914012
    #
    # yHat = beta0 + beta1 x + beta2 x^2
    #    2.98147321428571299151
    #    1.25613839285714279370
    #   -0.67678571428571365765
    #   -2.81729910714285569640
    #   -5.16540178571428487686
    #
    # MSE = 1/5 * sum(yHat - y)**2
    #    0.03294015066964287725

    MSE_true = 0.03294015066964287725

    assert MSE_skl    == pytest.approx(MSE_manual, abs=1e-15)
    assert MSE_skl    == pytest.approx(MSE_true, abs=1e-15)
    assert MSE_manual == pytest.approx(MSE_true, abs=1e-15)
Beispiel #6
0
def test_LeastSquares_fit_lasso() :
    """Tests the fit method of the Least Squares class with method='lasso'
    """
    N = 500
    P = 5
    x = np.linspace(0,1,N)
    y = 2.0 + x + x**2
    
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0
    for j in range(1,P) :
        X[:,j] = x**j

    lasso = LeastSquares(method='lasso', backend='skl')
    lasso.setLambda(0.01)
    beta_lasso = lasso.fit(X,y)
    
    # Make sure lasso regression zeroes out x*3 and x**4 beta terms.
    assert beta_lasso[-2:] == pytest.approx(np.zeros(2), abs=1e-15)
Beispiel #7
0
def plot_MSE_R2() :
    leastSquares = LeastSquares(backend='manual')

    N = int(1e4)
    x = np.random.rand(N)
    y = np.random.rand(N)
    x_data = np.zeros(shape=(N,2))
    x_data[:,0] = x
    x_data[:,1] = y
    y_data = np.zeros(shape=(N))

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y) :    
        N = x_data.shape[0]
        for i in range(N) :
            y[i] = franke(x_data[i,0], x_data[i,1])

    computeFrankeValues(x_data, y_data)

    p_max = 10
    p   = [i for i in range(2, p_max+1)]
    R2  = [None for i in range(2, p_max+1)]
    MSE = [None for i in range(2, p_max+1)]

    for degree in p :
        designMatrix = DesignMatrix('polynomial2D', degree)
        X = designMatrix.getMatrix(x_data) 
        leastSquares.fit(X, y_data)
        _ = leastSquares.predict()

        R2[degree-2] = leastSquares.R2()
        MSE[degree-2] = leastSquares.MSE()

        print(R2[degree-2])
        print(MSE[degree-2])

    p = np.array(p)

    plt.semilogy(p, 1-np.array(R2),'b-o', markersize=3)
    plt.rc('text', usetex=True)
    plt.xlabel(r"$p$", fontsize=10)
    plt.ylabel(r"$1-(R^2$ score$)$", fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_R2.png'), transparent=True, bbox_inches='tight')
    plt.show()

    plt.figure()
    ax = plt.gca()
    plt.semilogy(p, MSE, 'r-o', markersize=3)

    plt.rc('text', usetex=True)
    plt.xlabel(r"$p$", fontsize=10)
    plt.ylabel(r"MSE", fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_MSE.png'), transparent=True, bbox_inches='tight')
    plt.show()
Beispiel #8
0
def visualize_beta():
    L = 10
    nn = pickle.load(open('nn5_final.p', 'rb'))
    #for w in nn.weights[:1] :
    #    with np.printoptions(precision=2, suppress=True) :
    #        print(w.reshape((-1,L)))
    #plt.imshow(nn.weights[0].reshape((L,-1)))
    #plt.show()

    N = 5000
    training_fraction = 0.4
    ising = Ising(L, N)
    #X, y  = ising.generateTrainingData1D()
    D, ry = ising.generateDesignMatrix1D()
    #y    /= L

    ols = LeastSquares(method='ols', backend='manual')
    ols.setLambda(0.1)
    ols.fit(D, ry)

    print(ising.states.shape)
    #W = nn.weights[0].reshape((-1,L))*nn.weights[1]
    W = ols.beta.reshape((-1, L))
    J = ising.J
    for i in range(10):
        row = ising.states[i, :]
        des = D[i, :]
        E = ry[i]
        print(row.shape)
        row = np.expand_dims(row, 1)
        print("s W s:", row.T @ W @ row)
        print("s (W+W')/2 s:", row.T @ (W + W.T) / 2 @ row)
        print("s J s:", row.T @ J @ row)
        #print("D w:  ", W.T @ des * nn.weights[1])
        #print("pred: ", nn.predict(np.expand_dims(des.T,1)))
        print("E:    ", E)
        print("")

    for i in range(N):
        row = ising.states[i, :]
        des = D[i, :]
        E = ry[i]
        atol = 1e-14
        rtol = 1e-14
        #assert np.allclose(row.T @ W @ row, row.T @ (W+W.T)/2 @ row, atol=atol, rtol=rtol)
        #assert np.allclose(row.T @ (W+W.T)/2 @ row, row.T @ J @ row, atol=atol, rtol=rtol)

    with np.printoptions(precision=2, suppress=True):
        for a in np.linalg.eig(W):
            print(a)

    with np.printoptions(precision=2, suppress=True):

        print("det=", np.linalg.det(W))
        print("cond=", np.linalg.cond(W))
        print("")
        print("J:\n:", J)
        print("W+W'/2\n", (W + W.T) / 2)
        print("J+J'/2\n", (J + J.T) / 2)
        print("Tr D:", np.sum(np.diag((W + W.T) / 2)))
Beispiel #9
0
from leastSquares import LeastSquares
from designMatrix import DesignMatrix
from franke import franke

np.random.seed(2018)


@jit(nopython=True, cache=True)
def computeFrankeValues(x_data, y, noise_strength=0.1):
    N = x_data.shape[0]
    for i in range(N):
        y[i] = franke(x_data[i, 0], x_data[i, 1]) + np.random.normal(
            0, noise_strength)


leastSquares = LeastSquares(method="ridge", backend='manual')
Lambda = 1
leastSquares.setLambda(Lambda)
#crossvalidation = CrossValidation(leastSquares, designMatrix)

N = int(1e4)
x1 = np.random.rand(N)
x2 = np.random.rand(N)

X = np.zeros(shape=(N, 2))
X[:, 0] = x1
X[:, 1] = x2

#Vector to hold y = Franke(x1,x2)
y = np.zeros(shape=(N))
noise_strength = 0.3
Beispiel #10
0
def part_a(plotting=False) :
    MSE_degree          = []
    R2_degree           = []
    betaVariance_degree = []

    for degree in [2,3,4,5]: #,6,7,8,9] :
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual')
        bootstrap    = Bootstrap(leastSquares, designMatrix)

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N,2))
        x_data[:,0] = x
        x_data[:,1] = y
        y_data = np.zeros(shape=(N))

        @jit(nopython=True, cache=True)
        def computeFrankeValues(x_data, y) :    
            N = x_data.shape[0]
            for i in range(N) :
                y[i] = franke(x_data[i,0], x_data[i,1])

        computeFrankeValues(x_data, y_data)
        bootstrap.resample(x_data, y_data, 1000)
        
        MSE_degree.         append(leastSquares.MSE())
        R2_degree.          append(leastSquares.R2())
        betaVariance_degree.append(bootstrap.betaVariance)
        if plotting :
            print("MSE: ", MSE_degree[-1])
            print("R2:  ", R2_degree[-1])
            print("Beta Variance: ")
            for b in betaVariance_degree[-1] : print(b)
            print("Beta: ")
            for b in leastSquares.beta : print(b)
            print(" ")

            M = 100
            fig = plt.figure()
            ax = fig.gca(projection='3d')

            x = np.linspace(0, 1, M)
            y = np.linspace(0, 1, M)
            X, Y = np.meshgrid(x,y)
            x_data = np.vstack([X.ravel(), Y.ravel()]).T
            
            # When plotting the Franke function itself, we use these lines.
            yy_data = np.zeros(shape=(x_data.data.shape[0]))
            computeFrankeValues(x_data, yy_data)

            # When plotting the linear regression model:
            XX = designMatrix.getMatrix(x_data)
            leastSquares.X = XX
            y_data = leastSquares.predict()

            Z = np.reshape(y_data.T, X.shape)
            ZF = np.reshape(yy_data.T, X.shape)

            #ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0, antialiased=False)
            ax.plot_surface(X,Y,abs(Z-ZF),cmap=cm.coolwarm,linewidth=0, antialiased=False)
            ax.set_zlim(-0.10, 1.40)
            ax.zaxis.set_major_locator(LinearLocator(5))
            ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
            ax.view_init(30, 45)

            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'franke.png'), transparent=True, bbox_inches='tight')
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'.png'), transparent=True, bbox_inches='tight')
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'_diff.png'), transparent=True, bbox_inches='tight')
            plt.show()

            print("\nMSE :")
            print(MSE_degree)
            print("\nR2 :")
            print(R2_degree)
            print("\nσ²(β) :")
            print(betaVariance_degree)

    return MSE_degree, R2_degree, betaVariance_degree
Beispiel #11
0
def fit_franke_noise() :

    R2           = []
    MSE          = []

    R2_noise           = []
    MSE_noise          = []
    beta_noise         = []
    betaVariance_noise = []

    noise = np.logspace(-4,0,50)
    k = 1

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y) :    
        N = x_data.shape[0]
        for i in range(N) :
            y[i] = franke(x_data[i,0], x_data[i,1])
    
    for eta in noise :
        designMatrix = DesignMatrix('polynomial2D', 10)
        leastSquares = LeastSquares(backend='manual')
        bootstrap    = Bootstrap(leastSquares, designMatrix)

        N = int(1e5)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N,2))
        x_data[:,0] = x
        x_data[:,1] = y
        y_data = np.zeros(shape=(N))
        computeFrankeValues(x_data, y_data)
        y_data_noise = y_data +  eta * np.random.standard_normal(size=N)

        bootstrap.resample(x_data, y_data_noise, k)
        

        MSE_noise.         append(leastSquares.MSE())
        R2_noise.          append(leastSquares.R2())
        beta_noise.        append(bootstrap.beta)
        betaVariance_noise.append(bootstrap.betaVariance)

        leastSquares.y = y_data
        MSE.append(leastSquares.MSE())
        R2. append(leastSquares.R2())
    """
    betaVariance_noise = np.array(betaVariance_noise)
    for beta in betaVariance_noise :
        print(beta)

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
              '#bcbd22', '#17becf']
    for i in range(6) :
        plt.loglog(noise, betaVariance_noise[:,i], colors[i]+'-o', markersize=2)
        
    
    plt.rc('text', usetex=True)
    #plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
    ## for Palatino and other serif fonts use:
    #plt.rc('font',**{'family':'serif','serif':['Palatino']})    plt.xlabel(r"$p$", fontsize=16)
    plt.xlabel(r"noise scale $\eta$", fontsize=10)
    plt.ylabel(r"$ \sigma^2(\beta_j)$", fontsize=10)
    plt.legend([r"intercept", 
                r"$\beta_{x}$", 
                r"$\beta_{y}$", 
                r"$\beta_{x^2}$", 
                r"$\beta_{xy}$", 
                r"$\beta_{y^2}$"], fontsize=10)

    plt.subplots_adjust(left=0.2,bottom=0.2)
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_variance_OLS_noise.png'), transparent=True, bbox_inches='tight')
    #plt.show()

    """
    print(R2_noise)
    print(1-np.array(R2_noise))
    fig, ax1 = plt.subplots()
    ax1.loglog(noise, 1-np.array(R2_noise),'k-o',markersize=2)
    ax1.loglog(noise, 1-np.array(R2),'k--',markersize=2)
    plt.xlabel(r"noise scale $\eta$", fontsize=10)
    plt.ylabel(r"$1-R^2$", color='k', fontsize=10)

    ax2 = ax1.twinx()
    ax2.loglog(noise, np.array(MSE_noise), 'b-o',markersize=2)
    ax2.loglog(noise, np.array(MSE), 'b--',markersize=2)
    plt.ylabel(r"MSE", color='b', fontsize=10)
    plt.subplots_adjust(left=0.2,bottom=0.2,right=0.9)

    ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])
    ax2.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])
    ax2.get_yaxis().set_ticks([])
    
    plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight')
    plt.show()
Beispiel #12
0
def part_e_2():
    x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False)

    degree = 10

    designMatrix = DesignMatrix('polynomial2D', degree)
    leastSquares = LeastSquares(backend='manual', method='ols')
    X = designMatrix.getMatrix(x_train)
    leastSquares.fit(X, y_train)

    X_test = designMatrix.getMatrix(x_test)
    leastSquares.predict()
    leastSquares.y = y_test
    ols_MSE = leastSquares.MSE()
    print(ols_MSE)
    ols_MSE = np.array([ols_MSE, ols_MSE])
    ols_lambda = np.array([1e-5, 1])

    ridge_lambda = np.logspace(-5, 0, 20)
    ridge_MSE = []
    for lambda_ in ridge_lambda:
        print("ridge " + str(lambda_))

        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual', method='ridge')
        leastSquares.setLambda(lambda_)

        X = designMatrix.getMatrix(x_train)
        leastSquares.fit(X, y_train)

        X_test = designMatrix.getMatrix(x_test)
        leastSquares.predict()
        leastSquares.y = y_test
        ridge_MSE.append(leastSquares.MSE())
        print(leastSquares.MSE())

    ridge_MSE = np.array(ridge_MSE)

    lasso_lambda = np.logspace(-4, 0, 20)
    lasso_MSE = []
    for lambda_ in lasso_lambda:
        print("lasso " + str(lambda_))
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(backend='manual', method='lasso')
        leastSquares.setLambda(lambda_)

        X = designMatrix.getMatrix(x_train)
        leastSquares.fit(X, y_train)

        X_test = designMatrix.getMatrix(x_test)
        leastSquares.predict()
        leastSquares.y = y_test
        lasso_MSE.append(leastSquares.MSE())

    lasso_MSE = np.array(ridge_MSE)

    ########################################################
    plt.rc('text', usetex=True)

    plt.loglog(ols_lambda,
               ols_MSE,
               'k--o',
               markersize=1,
               linewidth=1,
               label=r'OLS')
    plt.loglog(ridge_lambda,
               ridge_MSE,
               'r-o',
               markersize=1,
               linewidth=1,
               label=r'Ridge')
    plt.loglog(lasso_lambda,
               lasso_MSE,
               'b-o',
               markersize=1,
               linewidth=1,
               label=r'Lasso')

    plt.xlabel(r"$\lambda$", fontsize=10)
    plt.ylabel(r"MSE", fontsize=10)
    #plt.subplots_adjust(left=0.2,bottom=0.2)
    plt.legend(fontsize=10)
    plt.savefig(os.path.join(os.path.dirname(__file__), 'figures',
                             'lambda_terrain.png'),
                transparent=True,
                bbox_inches='tight')
    plt.show()
Beispiel #13
0
def part_e(plotting=False):
    x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False)

    for method in ['ols', 'ridge', 'lasso']:
        designMatrix = DesignMatrix('polynomial2D', 10)
        leastSquares = LeastSquares(backend='manual', method=method)
        leastSquares.setLambda(1e-3)
        if method == 'lasso':
            leastSquares.setLambda(1e-4)

        X = designMatrix.getMatrix(x_train)
        leastSquares.fit(X, y_train)

        X_test = designMatrix.getMatrix(x_test)
        leastSquares.predict()
        leastSquares.y = y_test
        print(leastSquares.MSE())

        if plotting:
            x = np.linspace(0, 1, 60)
            y = np.copy(x)
            XX, YY = np.meshgrid(x, y)
            ZZ = np.reshape(leastSquares.yHat, XX.shape)

            fig = plt.figure()
            ax = fig.gca(projection='3d')
            ax.plot_surface(XX,
                            YY,
                            ZZ,
                            cmap=cm.coolwarm,
                            linewidth=0,
                            antialiased=False)
            ax.set_zlim(-0.10, 1.40)
            ax.zaxis.set_major_locator(LinearLocator(5))
            ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
            ax.view_init(30, 45 + 90)
            #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', method+'terrain.png'), transparent=True, bbox_inches='tight')
            plt.show()
    if plotting:
        x = np.linspace(0, 1, 60)
        y = np.copy(x)
        XX, YY = np.meshgrid(x, y)
        ZZ = np.reshape(y_test, XX.shape)

        fig = plt.figure()
        ax = fig.gca(projection='3d')
        ax.plot_surface(XX,
                        YY,
                        ZZ,
                        cmap=cm.coolwarm,
                        linewidth=0,
                        antialiased=False)
        ax.set_zlim(-0.10, 1.40)
        ax.zaxis.set_major_locator(LinearLocator(5))
        ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
        ax.view_init(30, 45 + 90)
        #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'test_terrain.png'), transparent=True, bbox_inches='tight')
        plt.show()
Beispiel #14
0
def part_b():

    R2 = []
    MSE = []

    R2_noise = []
    MSE_noise = []
    beta_noise = []
    betaVariance_noise = []

    noise = np.linspace(0, 1.0, 50)
    k = 1
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y):
        N = x_data.shape[0]
        for i in range(N):
            y[i] = franke(x_data[i, 0], x_data[i, 1])

    ind = -1
    for lambda_ in np.logspace(-2, 0, 3):
        ind += 1
        MSE_noise = []

        for eta in noise:
            designMatrix = DesignMatrix('polynomial2D', 10)
            if ind == 0:
                leastSquares = LeastSquares(backend='manual', method='ols')
            else:
                leastSquares = LeastSquares(backend='manual', method='ridge')

            leastSquares.setLambda(lambda_)
            bootstrap = Bootstrap(leastSquares, designMatrix)

            N = int(1000)
            x = np.random.rand(N)
            y = np.random.rand(N)
            x_data = np.zeros(shape=(N, 2))
            x_data[:, 0] = x
            x_data[:, 1] = y
            y_data = np.zeros(shape=(N))
            computeFrankeValues(x_data, y_data)
            y_data_noise = y_data + eta * np.random.standard_normal(size=N)

            bootstrap.resample(x_data, y_data_noise, k)

            MSE_noise.append(leastSquares.MSE())
            R2_noise.append(leastSquares.R2())
            beta_noise.append(bootstrap.beta)
            betaVariance_noise.append(bootstrap.betaVariance)

            # Different noise, test data
            N = int(1000)
            x = np.random.rand(N)
            y = np.random.rand(N)
            x_data = np.zeros(shape=(N, 2))
            x_data[:, 0] = x
            x_data[:, 1] = y
            y_data = np.zeros(shape=(N))
            computeFrankeValues(x_data, y_data)
            y_data_noise = y_data + eta * np.random.standard_normal(size=N)

            X = designMatrix.getMatrix(x_data)
            leastSquares.X = X
            leastSquares.predict()
            leastSquares.y = y_data_noise

            MSE.append(leastSquares.MSE())
            R2.append(leastSquares.R2())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']

        if ind == 0:
            ax1.loglog(noise,
                       np.array(MSE_noise),
                       colors[ind] + '--',
                       markersize=1,
                       label=r"OLS")
        else:
            ax1.loglog(noise,
                       np.array(MSE_noise),
                       colors[ind] + '-',
                       markersize=1,
                       label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_))))
        plt.ylabel(r"MSE", fontsize=10)
        plt.xlabel(r"noise scale $\eta$", fontsize=10)
        plt.subplots_adjust(left=0.2, bottom=0.2)

        #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))])

    ax1.legend()
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'MSE_ridge_noise.png'), transparent=True, bbox_inches='tight')
    plt.show()
Beispiel #15
0
def test_LeastSquares_fit_ridge() :
    """Tests the fit method of the Least Squares class with method='ridge'
    """
    N = 5
    P = 5
    x = np.linspace(0,1,N)
    y = x + x**2 - (1.0 - x)**5
    
    X = np.zeros(shape=(N,P))
    X[:,0] = 1.0
    for j in range(1,P) :
        X[:,j] = x**j

    OLS = LeastSquares(method='ols', backend='manual')
    beta_ols = OLS.fit(X,y)

    OLS = LeastSquares(method='ridge', backend='manual')
    OLS.setLambda(0.0)
    beta_lambda0 = OLS.fit(X,y)

    assert beta_lambda0 == pytest.approx(beta_ols, abs=1e-15)

    # Make sure the skl and the manual backends give the same result
    SKL = LeastSquares(method='ridge', backend='skl')
    SKL.setLambda(0.0)
    beta_skl = SKL.fit(X,y)
    
    assert beta_lambda0 == pytest.approx(beta_skl, abs=1e-10)

    SKL.setLambda = 0.5
    OLS.setLambda = 0.5
    beta_skl = SKL.fit(X,y)
    beta_lambda0 = OLS.fit(X,y)
    print(beta_lambda0)
    print(beta_skl)

    assert beta_lambda0 == pytest.approx(beta_skl, abs=1e-10)
Beispiel #16
0
        ]
        return averages


if __name__ == '__main__':

    Degree = 1 + np.arange(20)

    train_MSE = []
    test_MSE = []
    print("#########################################################")
    for degree in Degree:

        degree = int(degree)
        designMatrix = DesignMatrix('polynomial2D', degree)
        leastSquares = LeastSquares(method="ridge", backend='manual')
        Lambda = 4
        leastSquares.setLambda(Lambda)
        crossvalidation = CrossValidation(leastSquares, designMatrix)

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N, 2))
        x_data[:, 0] = x
        x_data[:, 1] = y
        y_data = np.zeros(shape=(N))

        X = designMatrix.getMatrix(x_data)
        XT_X = np.dot(X.T, X)
        print("det(X^T*X): %g" %
Beispiel #17
0
def R2_versus_lasso():
    L = 3
    N = 10000
    training_fraction = 0.4
    ising = Ising(L, N)
    D, ry = ising.generateDesignMatrix1D()
    X, y = ising.generateTrainingData1D()
    y /= L

    D_train = D[int(training_fraction * N):, :]
    ry_train = ry[int(training_fraction * N):]
    D_validation = D[:int(training_fraction * N), :]
    ry_validation = ry[:int(training_fraction * N)]

    lasso = LeastSquares(method='lasso', backend='skl')
    lasso.setLambda(1e-2)
    lasso.fit(D_train, ry_train)
    lasso.y = ry_validation
    lasso_R2 = sklearn.metrics.mean_squared_error(
        ry_validation / L,
        lasso.predict(D_validation) / L)

    n_samples, n_features = X.shape

    nn = NeuralNetwork(inputs=L * L,
                       neurons=L,
                       outputs=1,
                       activations='identity',
                       cost='mse',
                       silent=False)
    nn.addLayer(neurons=1)
    nn.addOutputLayer(activations='identity')

    validation_skip = 100
    epochs = 50000
    nn.fit(D.T,
           ry,
           shuffle=True,
           batch_size=2000,
           validation_fraction=1 - training_fraction,
           learning_rate=0.0001,
           verbose=False,
           silent=False,
           epochs=epochs,
           validation_skip=validation_skip,
           optimizer='adam')

    plt.rc('text', usetex=True)
    validation_loss = nn.validation_loss_improving
    validation_ep = np.linspace(0, epochs, len(nn.validation_loss_improving))
    plt.semilogy(validation_ep, validation_loss, 'r-', label=r'NN')
    plt.semilogy([0, epochs],
                 np.array([lasso_R2, lasso_R2]),
                 'k--',
                 label=r'Lasso')
    plt.xlabel(r'Epoch', fontsize=10)
    plt.ylabel(r'Mean squared error', fontsize=10)
    plt.legend(fontsize=10)
    plt.xlim((0, epochs))
    ax = plt.gca()
    ymin, ymax = ax.get_ylim()
    if ymin > pow(10, -5):
        ymin = pow(10, -5)
    #plt.ylim((ymin,ymax))
    plt.savefig(os.path.join(os.path.dirname(__file__), 'figures',
                             'NN_compare_lasso.png'),
                transparent=True,
                bbox_inches='tight')
if len(sys.argv) > 1:
    name = sys.argv[1]
    mode = sys.argv[2]

    if mode == "render_time":
        txt = Txt()
        txt.readTxt('txt/' + name + '.txt')
        renderTime = txt.getRenderTime()
        numberPolygons = txt.getNumberPolygons()

        plot = Plot()
        plot.setChartTile(name.capitalize())
        plot.setVertTitle('Render Time (nanoseconds)')

        lsq = LeastSquares()        

        #PLOT NORMAL
        plot.plotChartRT(numberPolygons,renderTime)

        #PLOT LINEAR
        lsq.linearLeastSquares(False,False,numberPolygons,renderTime)
        lsRenderTime = lsq.createLinearEquation("Vertex")
        lsRenderTimeFormula = lsq.getLinearEqFormula("Vertex")
        plot.plotLeastSquareChartRT(numberPolygons,lsRenderTime, renderTime,'Least Squares: Linear',lsRenderTimeFormula)
        linearRenderTimeError = lsq.calculateError(renderTime,lsRenderTime)

        #PLOT EXPONENTIAL
        lsq.linearLeastSquares(False,True,numberPolygons,renderTime)
        lsRenderTime = lsq.createExpEquation("Vertex")
        lsRenderTimeFormula = lsq.getExpEqFormula("Vertex")
Beispiel #19
0
def plot_beta_ridge():
    beta = []
    betaVariance = []
    MSE = []
    R2 = []

    k = 10000
    fig, ax1 = plt.subplots()
    plt.rc('text', usetex=True)

    @jit(nopython=True, cache=True)
    def computeFrankeValues(x_data, y):
        N = x_data.shape[0]
        for i in range(N):
            y[i] = franke(x_data[i, 0], x_data[i, 1])

    ind = -1
    lam = np.logspace(-3, 5, 20)

    for lambda_ in lam:
        if ind == 0:
            leastSquares = LeastSquares(backend='manual', method='ols')
        else:
            leastSquares = LeastSquares(backend='manual', method='ridge')

        designMatrix = DesignMatrix('polynomial2D', 3)
        bootstrap = Bootstrap(leastSquares, designMatrix)
        leastSquares.setLambda(lambda_)
        ind += 1

        N = int(1e4)
        x = np.random.rand(N)
        y = np.random.rand(N)
        x_data = np.zeros(shape=(N, 2))
        x_data[:, 0] = x
        x_data[:, 1] = y
        y_data = np.zeros(shape=(N))
        computeFrankeValues(x_data, y_data)
        eta = 1.0
        y_data_noise = y_data + eta * np.random.standard_normal(size=N)

        bootstrap.resample(x_data, y_data_noise, k)

        MSE.append(leastSquares.MSE())
        R2.append(leastSquares.R2())
        beta.append(bootstrap.beta)
        betaVariance.append(bootstrap.betaVariance)

        leastSquares.y = y_data
        MSE.append(leastSquares.MSE())
        R2.append(leastSquares.R2())

    beta = np.array(beta)
    betaVariance = np.array(betaVariance)

    monomial = [
        '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3'
    ]
    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]

    for i in range(10):
        plt.errorbar(lam[1:],
                     beta[1:, i],
                     yerr=2 * betaVariance[1:, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5,
                     label=r"$\beta_{%s}$" % (monomial[i]))
    plt.rc('text', usetex=True)
    plt.ylabel(r"$\beta_j$", fontsize=10)
    plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10)
    plt.subplots_adjust(left=0.2, bottom=0.2)
    plt.legend(fontsize=8)

    for i in range(10):
        plt.errorbar(1e-3,
                     beta[0, i],
                     yerr=2 * betaVariance[0, i],
                     fmt='-o',
                     markersize=2,
                     linewidth=1,
                     color=colors[i],
                     elinewidth=0.5,
                     capsize=2,
                     capthick=0.5)

    fig.gca().set_xscale('log')
    #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_ridge.png'), transparent=True, bbox_inches='tight')

    plt.show()