Beispiel #1
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std
def benchmark_pca():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = np.zeros((N, 10))
    for i in range(N):
        Ytrain_ind[i, Ytrain[i]] = 1

    Ntest = len(Ytest)
    Ytest_ind = np.zeros((Ntest, 10))
    for i in range(Ntest):
        Ytest_ind[i, Ytest[i]] = 1

    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    LLtest = []
    CRtest = []

    # D = 300 -> error = 0.07
    lr = 0.0001
    reg = 0.01
    for i in range(200):
        p_y = forward(Xtrain, W, b)
        # print "p_y:", p_y
        ll = cost(p_y, Ytrain_ind)
        LL.append(ll)

        p_y_test = forward(Xtest, W, b)
        lltest = cost(p_y_test, Ytest_ind)
        LLtest.append(lltest)

        err = error_rate(p_y_test, Ytest)
        CRtest.append(err)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
        if i % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i, ll))
            print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    iters = range(len(LL))
    plt.plot(iters, LL, label='train loss')
    plt.plot(iters, LLtest, label='test loss')
    plt.title('Loss')
    plt.legend()
    plt.show()
    plt.plot(CRtest)
    plt.title('Error')
    plt.show()
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)

    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(
            50):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)):  # shortcut so it won't take so long...
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)

    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #4
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    mu = X.mean(axis=0)
    std = X.std(axis=0)
    np.place(std, std == 0, 1)
    X = (X - mu) / std

    Xtrain, Ytrain = X[:-1000], Y[:-1000]
    Xtest, Ytest = X[-1000:], Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    #Full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    learning_rate = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(200):
        pY = forward(Xtrain, W, b)

        W -= learning_rate * (derivative_W(pY, Ytrain_ind, Xtrain) + reg * W)
        b -= learning_rate * (derivative_b(pY, Ytrain_ind) + reg * b)

        pYtest = forward(Xtest, W, b)
        ll = cost(pYtest, Ytest_ind)
        LL.append(ll)

        if i % 10 == 0:
            err = error_rate(pYtest, Ytest)
            print "Cost at iter %d: %.6f" % (i, ll)
            print "Error rate:", err

    pY = forward(Xtest, W, b)
    print "Final error rate:", error_rate(pY, pYtest)
    print "Elapsed time for full GD:", datetime.now() - t0

    #SGD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    learning_rate = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(1):  # one epoch
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W -= learning_rate * (derivative_W(p_y, y, x) + reg * W)
            b -= learning_rate * (derivative_b(p_y, y) + reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final Error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for SGD:", datetime.now() - t0

    #Batch

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    learning_rate = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz
    t0 = datetime.now()

    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W -= learning_rate * (derivative_W(p_y, y, x) + reg * W)
            b -= learning_rate * (derivative_b(p_y, y) + reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)

            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final Error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for Batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label='full')
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label='stochastic')
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label='batch')
    plt.legend()
    plt.show()
Beispiel #5
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print("Performing logistic regression...")

    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # # 1. full
    # W = np.random.randn(D, 10) / 28
    # b = np.zeros(10)
    # LL = []
    # lr = 0.0001
    # reg = 0.01
    # t0 = datetime.now()
    # for i in xrange(200):
    #     p_y = forward(Xtrain, W, b)
    #
    #     W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
    #     b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
    #
    #
    #     p_y_test = forward(Xtest, W, b)
    #     ll = cost(p_y_test, Ytest_ind)
    #     LL.append(ll)
    #     if i % 10 == 0:
    #         err = error_rate(p_y_test, Ytest)
    #         print("Cost at iteration %d: %.6f" % (i, ll))
    #         print("Error rate:", err)
    # p_y = forward(Xtest, W, b)
    # print("Final error rate:", error_rate(p_y, Ytest))
    # print("Elapsted time for full GD:", datetime.now() - t0)
    #
    #
    # # 2. stochastic
    # W = np.random.randn(D, 10) / 28
    # b = np.zeros(10)
    # LL_stochastic = []
    # lr = 0.0001
    # reg = 0.01
    #
    # t0 = datetime.now()
    # for i in range(1): # takes very long since we're computing cost for 41k samples
    #     tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
    #     for n in range(min(N, 500)): # shortcut so it won't take so long...
    #         x = tmpX[n,:].reshape(1,D)
    #         y = tmpY[n,:].reshape(1,10)
    #         p_y = forward(x, W, b)
    #
    #         W += lr*(gradW(y, p_y, x) - reg*W)
    #         b += lr*(gradb(y, p_y) - reg*b)
    #
    #         p_y_test = forward(Xtest, W, b)
    #         ll = cost(p_y_test, Ytest_ind)
    #         LL_stochastic.append(ll)
    #
    #         if n % (N/2) == 0:
    #             err = error_rate(p_y_test, Ytest)
    #             print("Cost at iteration %d: %.6f" % (i, ll))
    #             print("Error rate:", err)
    # p_y = forward(Xtest, W, b)
    # print("Final error rate:", error_rate(p_y, Ytest))
    # print("Elapsted time for SGD:", datetime.now() - t0)
    #
    #
    # # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j + 1) * batch_sz, :]
            y = tmpY[j * batch_sz:(j + 1) * batch_sz, :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #6
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. Full GD
    W = np.random.randn(
        D, 10) / 28  # Square root of no. of dimentionality. i.e. 28 * 28 = 784
    b = np.zeros(10)
    loss_batch = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in range(epoch):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        temp_loss = cost(p_y_test, Ytest_ind)
        loss_batch.append(temp_loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, temp_loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)
    print("=======================================================")

    # 2. Stochastic GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    loss_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()

    for i in range(
            epoch
    ):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)

        #for n in range(min(N, 500)): # shortcut so it won't take so long...
        for n in range(N):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            loss = cost(p_y_test, Ytest_ind)
            loss_stochastic.append(loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)
    print("=======================================================")

    # 3. Mini-batch GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    loss_mini_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()

    for i in range(epoch):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)

        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            temp_loss = cost(p_y_test, Ytest_ind)
            loss_mini_batch.append(temp_loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, temp_loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for mini-batch GD:", datetime.now() - t0)

    # Plot graph
    x1 = np.linspace(0, 1, len(loss_batch))
    plt.plot(x1, loss_batch, label="full(batch) GD")
    x2 = np.linspace(0, 1, len(loss_stochastic))
    plt.plot(x2, loss_stochastic, label="stochastic GD")
    x3 = np.linspace(0, 1, len(loss_mini_batch))
    plt.plot(x3, loss_mini_batch, label="mini-batch GD")
    plt.legend()
    plt.show()
def main():
    # get PCA transformed data
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]  # the first 300 features

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print "Performing logistic regression..."
    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(
        D, 10
    ) / 28  # we're setting our initial weights to be pretty small, proportional to the square root of the dimensionality
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in xrange(200):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        # do a forward pass on the test set so that we can calculate the cost on the test set and then plot that
        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:  # calculate the error rate on every 10 iterations
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0

    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in xrange(
            1):  # takes very long since we're computing cost for 41k samples
        # on each pass, we typically want to shuffle through the training data and the labels
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        # we're actually only going to go through 500 samples because its slow
        for n in xrange(min(N, 500)):  # shortcut so it won't take so long...
            # reshape x into a 2 dimensional matrix
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            # forward pass to get the output
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (
                    N / 2
            ) == 0:  # calculate the error rate once for every N/2 samples
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for SGD:", datetime.now() - t0

    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            # get the current batches input and targets
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            # forward pass to get the output predictions
            p_y = forward(x, W, b)

            # Gradient descent
            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (
                    n_batches / 2
            ) == 0:  # print error rate at every (number of batches)/2 iterations
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #8
0
def main():
    X_train, Y_train, X_test, Y_test, N_train, N_test = get_transformed_data()
    X_train = X_train[:32000, ]
    Y_train = Y_train[:32000]
    X_test = X_test[:12000, ]
    Y_test = Y_test[:12000]
    Y_test_for_comp = Y_test
    Y_train_ind = ylength2indicator(Y_train)
    Y_test_ind = ylength2indicator(Y_test)
    # Above is to compute the length indicator;
    Y_train_Q = ytoint(Y_train)
    Y_test_Q = ytoint(Y_test)
    # Above is to compute the length only;

    ## About the iterations
    max_iter = 150
    print_period = 1000
    N = X_train.shape[0]
    n_batches = N / batch_sz
    M = 2048
    K = [7, 11]
    KM = 3072
    poolsz = (2, 2)

    ##Placeholder and other variables.
    X = tf.placeholder(tf.float32, shape=(batch_sz, 40, 40, 3), name='X')
    T0 = tf.placeholder(tf.float32, shape=(batch_sz, K[0]), name='T')
    T1 = tf.placeholder(tf.float32, shape=(batch_sz, K[1]), name='T')

    ## About Optimization parameters.
    W1_shape = (
        5, 5, 3, 16
    )  #(filter_width, filter_height, num_col_chanels, num_feature_maps)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32)
    W2_shape = (
        5, 5, 16, 32
    )  # (filter_width, filter_height, old_num_feature_maps, num_feature_maps)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)
    W3_shape = (
        5, 5, 32, 48
    )  # (filter_width, filter_height, old_num_feature_maps, num_feature_maps)
    W3_init = init_filter(W3_shape, poolsz)
    b3_init = np.zeros(W3_shape[-1], dtype=np.float32)
    W4_shape = (
        3, 3, 48, 64
    )  # (filter_width, filter_height, old_num_feature_maps, num_feature_maps)]
    W4_init = init_filter(W4_shape, poolsz)
    b4_init = np.zeros(W4_shape[-1], dtype=np.float32)
    W5_shape = (
        3, 3, 64, 128
    )  # (filter_width, filter_height, old_num_feature_maps, num_feature_maps)]
    W5_init = init_filter(W5_shape, poolsz)
    b5_init = np.zeros(W5_shape[-1], dtype=np.float32)

    W6_init = np.random.randn(W5_shape[-1] * 2 * 2,
                              M) / np.sqrt(W4_shape[-1] * 2 * 2 + M)
    b6_init = np.zeros(M, dtype=np.float32)

    W7_init = np.random.randn(M, KM) / np.sqrt(M + KM)
    b7_init = np.zeros(KM, dtype=np.float32)
    W8_init = np.random.randn(KM, K[0]) / np.sqrt(KM + K[0])
    b8_init = np.zeros(K[0], dtype=np.float32)
    W8N_init = np.random.randn(KM, K[1]) / np.sqrt(KM + K[1])
    b8N_init = np.zeros(K[1], dtype=np.float32)

    W1_L = tf.Variable(W1_init.astype(np.float32))
    b1_L = tf.Variable(b1_init.astype(np.float32))
    W2_L = tf.Variable(W2_init.astype(np.float32))
    b2_L = tf.Variable(b2_init.astype(np.float32))
    W3_L = tf.Variable(W3_init.astype(np.float32))
    b3_L = tf.Variable(b3_init.astype(np.float32))
    W4_L = tf.Variable(W4_init.astype(np.float32))
    b4_L = tf.Variable(b4_init.astype(np.float32))
    W5_L = tf.Variable(W5_init.astype(np.float32))
    b5_L = tf.Variable(b5_init.astype(np.float32))
    W6_L = tf.Variable(W6_init.astype(np.float32))
    b6_L = tf.Variable(b6_init.astype(np.float32))
    W7_L = tf.Variable(W7_init.astype(np.float32))
    b7_L = tf.Variable(b7_init.astype(np.float32))
    W8_L = tf.Variable(W8_init.astype(np.float32))
    b8_L = tf.Variable(b8_init.astype(np.float32))

    Z1_L = conv1pool(X, W1_L, b1_L)
    Z2_L = conv2pool(Z1_L, W2_L, b2_L)
    Z3_L = conv2pool(Z2_L, W3_L, b3_L)
    Z4_L = conv2pool(Z3_L, W4_L, b4_L)

    print Z4_L

    Z5_L = conv2pool(Z4_L, W5_L, b5_L)

    print Z5_L

    Z5_shape_L = Z5_L.get_shape().as_list()
    Z5r_L = tf.reshape(Z5_L, [Z5_shape_L[0], np.prod(Z5_shape_L[1:])])
    Z5r_L = tf.nn.dropout(Z5r_L, keep_prob)

    print Z5r_L

    Z6_L = tf.nn.relu(tf.matmul(Z5r_L, W6_L) + b6_L)
    Z6_L = tf.nn.dropout(Z6_L, keep_prob)

    print Z6_L

    Z7_L = tf.nn.relu(tf.matmul(Z6_L, W7_L) + b7_L)
    Z7_L = tf.nn.dropout(Z7_L, keep_prob)

    Yish_L = tf.matmul(Z7_L, W8_L) + b8_L
    cost_L = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(Yish_L, T0))
    train_op_L = tf.train.RMSPropOptimizer(0.0001, decay=0.98,
                                           momentum=0.9).minimize(cost_L)
    predict_op_L = tf.argmax(Yish_L, 1)

    W1 = [0, 0, 0, 0, 0]
    b1 = [0, 0, 0, 0, 0]
    W2 = [0, 0, 0, 0, 0]
    b2 = [0, 0, 0, 0, 0]
    W3 = [0, 0, 0, 0, 0]
    b3 = [0, 0, 0, 0, 0]
    W4 = [0, 0, 0, 0, 0]
    b4 = [0, 0, 0, 0, 0]
    W5 = [0, 0, 0, 0, 0]
    b5 = [0, 0, 0, 0, 0]
    W6 = [0, 0, 0, 0, 0]
    b6 = [0, 0, 0, 0, 0]
    W7 = [0, 0, 0, 0, 0]
    b7 = [0, 0, 0, 0, 0]
    W8 = [0, 0, 0, 0, 0]
    b8 = [0, 0, 0, 0, 0]
    Yish = [0, 0, 0, 0, 0]
    cost = [0, 0, 0, 0, 0]
    train_op = [0, 0, 0, 0, 0]
    predict_op = [0, 0, 0, 0, 0]

    for h in range(5):
        W1[h] = tf.Variable(W1_init.astype(np.float32))
        b1[h] = tf.Variable(b1_init.astype(np.float32))
        W2[h] = tf.Variable(W2_init.astype(np.float32))
        b2[h] = tf.Variable(b2_init.astype(np.float32))
        W3[h] = tf.Variable(W3_init.astype(np.float32))
        b3[h] = tf.Variable(b3_init.astype(np.float32))
        W4[h] = tf.Variable(W4_init.astype(np.float32))
        b4[h] = tf.Variable(b4_init.astype(np.float32))
        W5[h] = tf.Variable(W5_init.astype(np.float32))
        b5[h] = tf.Variable(b5_init.astype(np.float32))
        W6[h] = tf.Variable(W6_init.astype(np.float32))
        b6[h] = tf.Variable(b6_init.astype(np.float32))
        W7[h] = tf.Variable(W7_init.astype(np.float32))
        b7[h] = tf.Variable(b7_init.astype(np.float32))
        W8[h] = tf.Variable(W8N_init.astype(np.float32))
        b8[h] = tf.Variable(b8N_init.astype(np.float32))

        Z1 = conv1pool(X, W1[h], b1[h])
        Z2 = conv2pool(Z1, W2[h], b2[h])
        Z3 = conv2pool(Z2, W3[h], b3[h])
        Z4 = conv2pool(Z3, W4[h], b4[h])
        Z5 = conv2pool(Z4, W5[h], b5[h])
        Z5_shape = Z5.get_shape().as_list()
        Z5r = tf.reshape(Z5, [Z5_shape[0], np.prod(Z5_shape[1:])])
        Z5r = tf.nn.dropout(Z5r, keep_prob)
        Z6 = tf.nn.relu(tf.matmul(Z5r, W6[h]) + b6[h])
        Z6 = tf.nn.dropout(Z6, keep_prob)
        Z7 = tf.nn.relu(tf.matmul(Z6, W7[h]) + b7[h])
        Z7 = tf.nn.dropout(Z7, keep_prob)

        Yish[h] = tf.matmul(Z7, W8[h]) + b8[h]
        cost[h] = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(Yish[h], T1))
        train_op[h] = tf.train.RMSPropOptimizer(0.0001,
                                                decay=0.98,
                                                momentum=0.9).minimize(cost[h])
        predict_op[h] = tf.argmax(Yish[h], 1)


## Save all the variables.
    saver = tf.train.Saver()
    LL = []
    Error_Testing = []
    Yish_log = []
    init = tf.initialize_all_variables()

    with tf.Session() as session:
        session.run(init)
        print 'Computing the length of the number.'
        for i in xrange(max_iter):
            Yish_ = np.zeros((len(X_test), K[0]))
            for j in xrange(n_batches):
                Xbatch = X_train[j * batch_sz:(j * batch_sz + batch_sz), ]
                Ybatch = Y_train_ind[j * batch_sz:(j * batch_sz + batch_sz), ]
                if len(Xbatch) == batch_sz:
                    session.run(train_op_L,
                                feed_dict={
                                    X: Xbatch,
                                    T0: Ybatch,
                                    keep_prob0: 0.8,
                                    keep_prob: 0.5
                                })
                    if j % print_period == 0:
                        test_cost = 0
                        prediction_test = np.zeros(len(X_test))
                        prediction_train = np.zeros(len(X_train))

                        for k in xrange(len(X_test) / batch_sz):
                            Xtestbatch = X_test[k * batch_sz:(k * batch_sz +
                                                              batch_sz), ]
                            Ytestbatch = Y_test_ind[k *
                                                    batch_sz:(k * batch_sz +
                                                              batch_sz), ]
                            test_cost += session.run(cost_L,
                                                     feed_dict={
                                                         X: Xtestbatch,
                                                         T0: Ytestbatch,
                                                         keep_prob0: 1,
                                                         keep_prob: 1
                                                     })
                            prediction_test[k *
                                            batch_sz:(k * batch_sz +
                                                      batch_sz)] = session.run(
                                                          predict_op_L,
                                                          feed_dict={
                                                              X: Xtestbatch,
                                                              keep_prob0: 1,
                                                              keep_prob: 1
                                                          })
                            Yish_[k * batch_sz:(k * batch_sz +
                                                batch_sz)] = session.run(
                                                    Yish_L,
                                                    feed_dict={
                                                        X: Xtestbatch,
                                                        keep_prob0: 1,
                                                        keep_prob: 1
                                                    })
                        err_testing = error_rate(prediction_test, Y_test_Q)
                        print "Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (
                            i, j, test_cost, err_testing)
                        LL.append(test_cost)
                        Error_Testing.append(err_testing)
        Yish_ = session.run(tf.nn.log_softmax(Yish_))
        Yish_log = np.array([Yish_])
        Yish_log_length = Yish_log
        Yish_log_length = np.reshape(Yish_log_length, (len(X_test), 1, K[0]))

        # # # # 	# Evaluating the digit.
        Y_train_ = np.array(digit(Y_train))
        Y_test_ = np.array(digit(Y_test))
        Yish_log_ = np.array([]).reshape(len(X_test), 2, 0)

        for h in range(5):
            Y_train = Y_train_[:, h]
            Y_test = Y_test_[:, h]
            Y_train_ind = y2indicator(Y_train)
            Y_test_ind = y2indicator(Y_test)

            t0 = datetime.now()
            LL = []
            Error_Training = []
            Error_Testing = []
            Yish_log = []

            print 'Computing the %d digit of the number.' % (h)
            for i in xrange(max_iter):
                for j in xrange(n_batches):
                    Xbatch = X_train[j * batch_sz:(j * batch_sz + batch_sz), ]
                    Ybatch = Y_train_ind[j * batch_sz:(j * batch_sz +
                                                       batch_sz), ]
                    if len(Xbatch) == batch_sz:
                        session.run(train_op[h],
                                    feed_dict={
                                        X: Xbatch,
                                        T1: Ybatch,
                                        keep_prob0: 0.8,
                                        keep_prob: 0.5
                                    })
                        if j % print_period == 0:
                            test_cost = 0
                            prediction_test = np.zeros(len(X_test))
                            prediction_train = np.zeros(len(X_train))
                            Yish_ = np.zeros((len(X_test), K[1]))

                            for k in xrange(len(X_test) / batch_sz):
                                Xtestbatch = X_test[k *
                                                    batch_sz:(k * batch_sz +
                                                              batch_sz), ]
                                Ytestbatch = Y_test_ind[k * batch_sz:(
                                    k * batch_sz + batch_sz), ]
                                test_cost += session.run(cost[h],
                                                         feed_dict={
                                                             X: Xtestbatch,
                                                             T1: Ytestbatch,
                                                             keep_prob0: 1,
                                                             keep_prob: 1
                                                         })
                                prediction_test_batch = session.run(
                                    predict_op[h],
                                    feed_dict={
                                        X: Xtestbatch,
                                        keep_prob0: 1,
                                        keep_prob: 1
                                    })
                                for n, item in enumerate(
                                        prediction_test_batch):
                                    if item == 10:
                                        prediction_test_batch[n] = 0
                                prediction_test[k * batch_sz:(
                                    k * batch_sz +
                                    batch_sz)] = prediction_test_batch
                                Yish_[k * batch_sz:(k * batch_sz +
                                                    batch_sz)] = session.run(
                                                        Yish[h],
                                                        feed_dict={
                                                            X: Xtestbatch,
                                                            keep_prob0: 1,
                                                            keep_prob: 1
                                                        })

                            Y_test_transformed = Y_test
                            for n, item in enumerate(Y_test):
                                if item == 10:
                                    Y_test_transformed[n] = 0
                            for n, item in enumerate(Yish_):
                                if np.argmax(item, axis=0) == 10:
                                    Yish_[n] = [0.0909] * 11

                            err_testing = error_rate(prediction_test,
                                                     Y_test_transformed)
                            print "Cost / err on digit h=%d at iteration i=%d, j=%d: %.3f / %.3f" % (
                                h, i, j, test_cost, err_testing)
                            LL.append(test_cost)
                            Error_Testing.append(err_testing)
                print(session.run(W1[h][0, 0, 0, 3]))

            Yish_ = session.run(tf.nn.log_softmax(Yish_))
            for itr in range(len(Yish_)):
                Yish_log.append([
                    prediction_test[itr], Yish_[itr,
                                                int(prediction_test[itr])]
                ])
            Yish_log = np.array(Yish_log)
            Yish_log_ = np.dstack((Yish_log_, Yish_log))
            # print np.shape(Yish_log_);
            save_path = saver.save(session, model_path)

    # To make an artificial form.
    b = np.zeros((len(X_test), 2, 1))
    Yish_log_ = np.concatenate((b, Yish_log_), axis=2)
    Yish_log_ = np.concatenate((Yish_log_, b), axis=2)

    Yish_log_whole = np.concatenate((Yish_log_, Yish_log_length), axis=1)
    # print np.shape(Yish_log_whole);
    # print Yish_log_whole[1:2,]
    # Inference of the whole number#############
    # Argmax statistics.
    Inf_digit = np.zeros((len(X_test), 7))
    Inf_num = np.zeros(len(X_test))
    Inf_digit[:, 0] = Yish_log_whole[:, 1, 0] + Yish_log_whole[:, 2, 0]

    for j in range(len(X_test)):
        for i in range(1, 7):
            Inf_digit[j, i] = sum(
                Yish_log_whole[j, 1, 1:i]) + Yish_log_whole[j, 2, i]

    Length_digit = np.argmax(Inf_digit, 1)
    # Inference
    for i in range(len(Length_digit)):
        if Length_digit[i] == 0:
            Inf_num[i] = 0
        else:
            Inf_num[i] = ''.join([
                str(int(x))
                for x in Yish_log_whole[i, 0, 1:Length_digit[i] + 1]
            ])

    Inf_num = [int(x) for x in Inf_num]
    print Inf_num[0:9]
    print Y_test_for_comp[0:9]
    #Evaluation of the Error rate:
    err_testing = error_rate(Y_test_for_comp, Inf_num)
    print err_testing
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print "Performing logistic regression..."
    Xtrain = X[:-1000,]
    Ytrain = Y[:-1000]
    Xtest  = X[-1000:,]
    Ytest  = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in xrange(200):
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0


    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in xrange(1): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)): # shortcut so it won't take so long...
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N/2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for SGD:", datetime.now() - t0


    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
            y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches/2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0



    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #10
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    # normalize X first

    print "Performing logistic regression..."
    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    #1. Full GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    #the whole array of lost functions with iterations.

    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(50):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)

        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err

    p_y = forward(Xtest, W, b)
    print "The lost sequence is given as:", LL
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0

    #2. Stochastic

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()

    for i in xrange(1):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err

    p_y = forward(Xtest, W, b)

    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for SGD:", datetime.now() - t0

    # x1 = np.linspace(0, 1, len(LL))
    # plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    plt.legend()
    plt.show()
    print LL

    #3. batch

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()

    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)
            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)

            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)


    # 2. stochastic
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(50): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)): # shortcut so it won't take so long...
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)


    # 3. batch
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
            y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)



    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize the data:
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print('Performing logistic regression...')
    Xtrain, Ytrain = X[:-1000, :], Y[:-1000]
    Xtest, Ytest = X[-1000:, :], Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)
    K = len(set(Y))

    np.random.seed()

    # 1. Full Gradient Descend:
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    LL = []  # a storage for costs
    lr = 0.0001  # learning rate
    reg = 0.01  # L2-regularization term
    t0 = datetime.now()
    print('utilizing full GD...')
    for i in range(200):
        p_y = forward(Xtrain, W, b)

        W += lr * (grad_W(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (grad_b(Ytrain_ind, p_y).sum(axis=0) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)

        if i % 10 == 0:
            error = error_rate(p_y_test, Ytest)
            print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error))
    dt1 = datetime.now() - t0
    p_y_test = forward(Xtest, W, b)
    plt.plot(LL)
    plt.title('Cost for full GD')
    plt.show()
    plt.savefig('Cost_full_GD.png')
    print('Final error rate:', error_rate(p_y_test, Ytest))
    print('Elapsed time for full GD:', dt1)

    # 2. Stochastic Gradien Descent
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    LLstochastic = []  # a storage for costs
    lr = 0.0001  # learning rate
    reg = 0.01  # L2-regularization term
    t0 = datetime.now()
    print('utilizing stochastic GD...')
    for i in range(25):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        # we consider just 500 samples, not all the dataset
        for n in range(N):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, K)
            p_y = forward(x, W, b)

            W += lr * (grad_W(y, p_y, x) - reg * W)
            b += lr * (grad_b(y, p_y).sum(axis=0) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LLstochastic.append(ll)

            if n % (N // 2) == 0:
                error = error_rate(p_y_test, Ytest)
                print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error))

    dt2 = datetime.now() - t0
    p_y_test = forward(Xtest, W, b)
    plt.plot(LLstochastic)
    plt.title('Cost for stochastic GD')
    plt.show()
    plt.savefig('Cost_stochastic_GD.png')
    print('Final error rate:', error_rate(p_y_test, Ytest))
    print('Elapsed time for stochastic GD:', dt2)

    # 3. Batch Gradient Descent:
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    LLbatch = []
    lr = 0.0001  # learning rate
    reg = 0.01  # L2-regularization term
    batch_size = 500
    n_batches = N // batch_size
    t0 = datetime.now()
    print('utilizing batch GD...')
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_size:batch_size * (j + 1), :]
            y = tmpY[j * batch_size:batch_size * (j + 1), :]
            p_y = forward(x, W, b)

            W += lr * (grad_W(y, p_y, x) - reg * W)
            b += lr * (grad_b(y, p_y).sum(axis=0) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LLbatch.append(ll)

            if j % (n_batches // 2) == 0:
                error = error_rate(p_y_test, Ytest)
                print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error))
    dt3 = datetime.now() - t0
    p_y_test = forward(Xtest, W, b)
    plt.plot(LLbatch)
    plt.title('Cost for batch GD')
    plt.show()
    plt.savefig('Cost_batch_GD.png')
    print('Final error rate:', error_rate(p_y_test, Ytest))
    print('Elapsed time for batch GD', dt3)

    # plot all costs together:
    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label='full')

    x2 = np.linspace(0, 1, len(LLstochastic))
    plt.plot(x2, LLstochastic, label='stochastic')

    x3 = np.linspace(0, 1, len(LLbatch))
    plt.plot(x3, LLbatch, label='batch')

    plt.legend()
    plt.show()
    plt.savefig('Costs_together.png')
Beispiel #13
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()

    print('logistic regression')

    # randomly assign weights

    N, D = Xtrain.shape

    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    M = 10
    scale = 28

    # full grad descent
    W, b = initwb(D, M, scale)

    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(200):
        P_Y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, P_Y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, P_Y) - reg * b)

        P_Y_test = forward(Xtest, W, b)
        ll = cost(P_Y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(P_Y_test, Ytest)
            print("cost at iter:  %d:  %.6f" % (i, ll))
            print("error rate:  ", err, "\n")

    P_Y = forward(Xtest, W, b)
    print("final error:  ", error_rate(P_Y, Ytest))
    print("elapsed time for full GD:  ", datetime.now() - t0)

    # 2.  Stochastic
    W, b = initwb(D, M, scale)

    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(1):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            P_Y = forward(x, W, b)

            W += lr * (gradW(y, P_Y, x) - reg * W)
            b += lr * (gradb(y, P_Y) - reg * b)

            P_Y_test = forward(Xtest, W, b)

            ll = cost(P_Y_test, Ytest_ind)

            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(P_Y_test, Ytest)
                print("Cost at iteration %d:  %6.f" % (i, ll))
                print("error rate:  ", err)

    P_Y = forward(Xtest, W, b)

    print("error rate:  ", error_rate(P_Y, Ytest))
    print("elapsed time for SGD:  ", datetime.now() - t0)

    # batch
    W, b = initwb(D, M, scale)

    LL_batch = []
    lr = 0.001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            P_Y = forward(x, W, b)

            W += lr * (gradW(y, P_Y, x) - reg * W)
            b += lr * (gradb(y, P_Y) - reg * b)
            P_Y_test = forward(Xtest, W, b)

            ll = cost(P_Y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(P_Y_test, Ytest)
                print("Cost at iteration %d:  %6.f" % (i, ll))
                print("error rate:  ", err)
    P_Y = forward(Xtest, W, b)

    print("error rate:  ", error_rate(P_Y, Ytest))
    print("elapsed time for SGD:  ", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #14
0
def main():
	X, Y, _, _  = get_transformed_data()
	#First 300 factors
	X = X[:,:300]


	# normalize X first
	mu  = X.mean(axis=0)
	std = X.std(axis=0)
	X = (X-mu) / std 


	print("Performing logistic regression...")	
	
	Xtrain = X[:-1000,]
	Ytrain = Y[:-1000]
	Xtest = X[-1000:,]
	Ytest = Y[-1000:]
	

	N, D = Xtrain.shape
	Ytrain_ind = y2indicator(Ytrain)
	Ytest_ind = y2indicator(Ytest)
	

	#1. full gradient descent
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL = [] 
	

	lr = 0.0001
	reg = 0.01 
	t0 = datetime.now()
	for i in range(200):
		p_y = forward(Xtrain, W, b)
	

		W+=  lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
		b+=  lr*(gradb(Ytrain_ind, p_y) - reg*b)
		
		p_y_test = forward(Xtest, W, b)
		ll = cost(p_y_test, Ytest_ind)
		LL.append(ll)


		err = error_rate(p_y_test, Ytest)
		
		if i % 10 ==0:
			print("FULL Cost a iteration %d: %.6f" %(i,ll))
			print("FULL Error rate:", err)

	p_y = forward(Xtest, W, b)				
	print("FULL Final error rate", error_rate(p_y, Ytest))
	print("FULL GD time", (datetime.now() - t0))
	
	#2. Stochastic gradient descent
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL_stochastic = [] 

	lr = 0.0001
	reg = 0.01 
	t0 = datetime.now()
	for i in range(1):
		tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
		
		for n in xrange(min(N,500)):
		
			x = tmpX[n, :].reshape(1, D)
			y = tmpY[n, :].reshape(1, 10)
			p_y = forward(x, W, b)

			
			W+=  lr*(gradW(y, p_y, x) - reg*W)
			b+=  lr*(gradb(y, p_y) - reg*b)
			p_y_test = forward(Xtest, W, b)
			ll = cost(p_y_test, Ytest_ind)
			LL_stochastic.append(ll)

			err = error_rate(p_y_test, Ytest)	
			if n % int(N/2) ==0:
				print("STOCHASTIC Cost a iteration %d: %.6f" %(i,ll))
				print("STOCHASTIC Error rate:", err)
	
	p_y = forward(Xtest, W, b)				
	print("STOCHASTIC Final error rate", error_rate(p_y, Ytest))
	print("STOCHASTIC GD time", (datetime.now() - t0))	


	#3. batch
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL_batch = [] 

	lr = 0.0001
	reg = 0.01 
	
	batch_sz = 500
	n_batches = N / batch_sz

	t0 = datetime.now()
	for i in range(50):
		tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
		
		for j in xrange(n_batches):
		
			x = tmpX[j*batch_sz:((j+1)*batch_sz), :]
			y = tmpY[j*batch_sz:((j+1)*batch_sz), :]
			p_y = forward(x, W, b)

			
			W+=  lr*(gradW(y, p_y, x) - reg*W)
			b+=  lr*(gradb(y, p_y) - reg*b)
			p_y_test = forward(Xtest, W, b)
			ll = cost(p_y_test, Ytest_ind)
			LL_batch.append(ll)

		
			
			if j % int(n_batches/2) ==0:
				err = error_rate(p_y_test, Ytest)		
				print("BATCH Cost a iteration %d: %.6f" %(i,ll))
				print("BATCH Error rate:", err)
	
	p_y = forward(Xtest, W, b)				
	print("BATCH Final error rate", error_rate(p_y, Ytest))
	print("BATCH GD time", (datetime.now() - t0))	


	

	x1 = np.linspace(0, 1, len(LL))
	plt.plot(x1, LL, label='full')
	x2 = np.linspace(0, 1, len(LL_stochastic))
	plt.plot(x2, LL_stochastic, label='stochastic')
	x3 = np.linspace(0, 1, len(LL_batch))
	plt.plot(x3, LL_batch, label='batch')
	plt.legend()
	plt.show()