def benchmark_pca():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = np.zeros((N, 10))
    for i in range(N):
        Ytrain_ind[i, Ytrain[i]] = 1

    Ntest = len(Ytest)
    Ytest_ind = np.zeros((Ntest, 10))
    for i in range(Ntest):
        Ytest_ind[i, Ytest[i]] = 1

    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    LLtest = []
    CRtest = []

    # D = 300 -> error = 0.07
    lr = 0.0001
    reg = 0.01
    for i in range(200):
        p_y = forward(Xtrain, W, b)
        # print "p_y:", p_y
        ll = cost(p_y, Ytrain_ind)
        LL.append(ll)

        p_y_test = forward(Xtest, W, b)
        lltest = cost(p_y_test, Ytest_ind)
        LLtest.append(lltest)

        err = error_rate(p_y_test, Ytest)
        CRtest.append(err)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
        if i % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i, ll))
            print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    iters = range(len(LL))
    plt.plot(iters, LL, label='train loss')
    plt.plot(iters, LLtest, label='test loss')
    plt.title('Loss')
    plt.legend()
    plt.show()
    plt.plot(CRtest)
    plt.title('Error')
    plt.show()
def benchmark_full():
    Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()

    print("Performing logistic regression...")
    # lr = LogisticRegression(solver='lbfgs')

    # convert Ytrain and Ytest to (N x K) matrices of indicator variables
    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    LLtest = []
    CRtest = []

    # reg = 1
    # learning rate 0.0001 is too high, 0.00005 is also too high
    # 0.00003 / 2000 iterations => 0.363 error, -7630 cost
    # 0.00004 / 1000 iterations => 0.295 error, -7902 cost
    # 0.00004 / 2000 iterations => 0.321 error, -7528 cost

    # reg = 0.1, still around 0.31 error
    # reg = 0.01, still around 0.31 error
    lr = 0.00004
    reg = 0.01
    for i in range(500):
        p_y = forward(Xtrain, W, b)
        # print "p_y:", p_y
        ll = cost(p_y, Ytrain_ind)
        LL.append(ll)

        p_y_test = forward(Xtest, W, b)
        lltest = cost(p_y_test, Ytest_ind)
        LLtest.append(lltest)

        err = error_rate(p_y_test, Ytest)
        CRtest.append(err)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
        if i % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i, ll))
            print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    iters = range(len(LL))
    plt.plot(iters, LL, iters, LLtest)
    plt.show()
    plt.plot(CRtest)
    plt.show()
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)

    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(
            50):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)):  # shortcut so it won't take so long...
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)

    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #4
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print("Performing logistic regression...")

    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # # 1. full
    # W = np.random.randn(D, 10) / 28
    # b = np.zeros(10)
    # LL = []
    # lr = 0.0001
    # reg = 0.01
    # t0 = datetime.now()
    # for i in xrange(200):
    #     p_y = forward(Xtrain, W, b)
    #
    #     W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
    #     b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
    #
    #
    #     p_y_test = forward(Xtest, W, b)
    #     ll = cost(p_y_test, Ytest_ind)
    #     LL.append(ll)
    #     if i % 10 == 0:
    #         err = error_rate(p_y_test, Ytest)
    #         print("Cost at iteration %d: %.6f" % (i, ll))
    #         print("Error rate:", err)
    # p_y = forward(Xtest, W, b)
    # print("Final error rate:", error_rate(p_y, Ytest))
    # print("Elapsted time for full GD:", datetime.now() - t0)
    #
    #
    # # 2. stochastic
    # W = np.random.randn(D, 10) / 28
    # b = np.zeros(10)
    # LL_stochastic = []
    # lr = 0.0001
    # reg = 0.01
    #
    # t0 = datetime.now()
    # for i in range(1): # takes very long since we're computing cost for 41k samples
    #     tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
    #     for n in range(min(N, 500)): # shortcut so it won't take so long...
    #         x = tmpX[n,:].reshape(1,D)
    #         y = tmpY[n,:].reshape(1,10)
    #         p_y = forward(x, W, b)
    #
    #         W += lr*(gradW(y, p_y, x) - reg*W)
    #         b += lr*(gradb(y, p_y) - reg*b)
    #
    #         p_y_test = forward(Xtest, W, b)
    #         ll = cost(p_y_test, Ytest_ind)
    #         LL_stochastic.append(ll)
    #
    #         if n % (N/2) == 0:
    #             err = error_rate(p_y_test, Ytest)
    #             print("Cost at iteration %d: %.6f" % (i, ll))
    #             print("Error rate:", err)
    # p_y = forward(Xtest, W, b)
    # print("Final error rate:", error_rate(p_y, Ytest))
    # print("Elapsted time for SGD:", datetime.now() - t0)
    #
    #
    # # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j + 1) * batch_sz, :]
            y = tmpY[j * batch_sz:(j + 1) * batch_sz, :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / np.sqrt(D)
    W0 = W.copy() # save for later
    b = np.zeros(10)
    test_losses_full = []
    lr = 0.9
    reg = 0.
    t0 = datetime.now()
    last_dt = 0
    intervals = []
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        gW = gradW(Ytrain_ind, p_y, Xtrain) / N
        gb = gradb(Ytrain_ind, p_y) / N

        W += lr*(gW - reg*W)
        b += lr*(gb - reg*b)

        p_y_test = forward(Xtest, W, b)
        test_loss = cost(p_y_test, Ytest_ind)
        dt = (datetime.now() - t0).total_seconds()

        # save these
        dt2 = dt - last_dt
        last_dt = dt
        intervals.append(dt2)

        test_losses_full.append([dt, test_loss])
        if (i + 1) % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i + 1, test_loss))
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)

    # save the max time so we don't surpass it in subsequent iterations
    max_dt = dt
    avg_interval_dt = np.mean(intervals)


    # 2. stochastic
    W = W0.copy()
    b = np.zeros(10)
    test_losses_sgd = []
    lr = 0.001
    reg = 0.

    t0 = datetime.now()
    last_dt_calculated_loss = 0
    done = False
    for i in range(50): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(N):
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            gW = gradW(y, p_y, x)
            gb = gradb(y, p_y)

            W += lr*(gW - reg*W)
            b += lr*(gb - reg*b)

            dt = (datetime.now() - t0).total_seconds()
            dt2 = dt - last_dt_calculated_loss

            if dt2 > avg_interval_dt:
                last_dt_calculated_loss = dt
                p_y_test = forward(Xtest, W, b)
                test_loss = cost(p_y_test, Ytest_ind)
                test_losses_sgd.append([dt, test_loss])

            # time to quit
            if dt > max_dt:
                done = True
                break
        if done:
            break

        if (i + 1) % 1 == 0:
            print("Cost at iteration %d: %.6f" % (i + 1, test_loss))
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)


    # 3. mini-batch
    W = W0.copy()
    b = np.zeros(10)
    test_losses_batch = []
    batch_sz = 500
    lr = 0.08
    reg = 0.
    n_batches = int(np.ceil(N / batch_sz))


    t0 = datetime.now()
    last_dt_calculated_loss = 0
    done = False
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j*batch_sz:(j + 1)*batch_sz,:]
            y = tmpY[j*batch_sz:(j + 1)*batch_sz,:]
            p_y = forward(x, W, b)

            current_batch_sz = len(x)
            gW = gradW(y, p_y, x) / current_batch_sz
            gb = gradb(y, p_y) / current_batch_sz

            W += lr*(gW - reg*W)
            b += lr*(gb - reg*b)

            dt = (datetime.now() - t0).total_seconds()
            dt2 = dt - last_dt_calculated_loss

            if dt2 > avg_interval_dt:
                last_dt_calculated_loss = dt
                p_y_test = forward(Xtest, W, b)
                test_loss = cost(p_y_test, Ytest_ind)
                test_losses_batch.append([dt, test_loss])

            # time to quit
            if dt > max_dt:
                done = True
                break
        if done:
            break

        if (i + 1) % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i + 1, test_loss))
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for mini-batch GD:", datetime.now() - t0)


    # convert to numpy arrays
    test_losses_full = np.array(test_losses_full)
    test_losses_sgd = np.array(test_losses_sgd)
    test_losses_batch = np.array(test_losses_batch)

    
    plt.plot(test_losses_full[:,0], test_losses_full[:,1], label="full")
    plt.plot(test_losses_sgd[:,0], test_losses_sgd[:,1], label="sgd")
    plt.plot(test_losses_batch[:,0], test_losses_batch[:,1], label="mini-batch")
    plt.legend()
    plt.show()
Beispiel #6
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. Full GD
    W = np.random.randn(
        D, 10) / 28  # Square root of no. of dimentionality. i.e. 28 * 28 = 784
    b = np.zeros(10)
    loss_batch = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in range(epoch):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        temp_loss = cost(p_y_test, Ytest_ind)
        loss_batch.append(temp_loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, temp_loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)
    print("=======================================================")

    # 2. Stochastic GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    loss_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()

    for i in range(
            epoch
    ):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)

        #for n in range(min(N, 500)): # shortcut so it won't take so long...
        for n in range(N):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            loss = cost(p_y_test, Ytest_ind)
            loss_stochastic.append(loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)
    print("=======================================================")

    # 3. Mini-batch GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    loss_mini_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()

    for i in range(epoch):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)

        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            temp_loss = cost(p_y_test, Ytest_ind)
            loss_mini_batch.append(temp_loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, temp_loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for mini-batch GD:", datetime.now() - t0)

    # Plot graph
    x1 = np.linspace(0, 1, len(loss_batch))
    plt.plot(x1, loss_batch, label="full(batch) GD")
    x2 = np.linspace(0, 1, len(loss_stochastic))
    plt.plot(x2, loss_stochastic, label="stochastic GD")
    x3 = np.linspace(0, 1, len(loss_mini_batch))
    plt.plot(x3, loss_mini_batch, label="mini-batch GD")
    plt.legend()
    plt.show()
def main():
    # get PCA transformed data
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]  # the first 300 features

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print "Performing logistic regression..."
    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(
        D, 10
    ) / 28  # we're setting our initial weights to be pretty small, proportional to the square root of the dimensionality
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in xrange(200):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        # do a forward pass on the test set so that we can calculate the cost on the test set and then plot that
        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:  # calculate the error rate on every 10 iterations
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0

    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in xrange(
            1):  # takes very long since we're computing cost for 41k samples
        # on each pass, we typically want to shuffle through the training data and the labels
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        # we're actually only going to go through 500 samples because its slow
        for n in xrange(min(N, 500)):  # shortcut so it won't take so long...
            # reshape x into a 2 dimensional matrix
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            # forward pass to get the output
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (
                    N / 2
            ) == 0:  # calculate the error rate once for every N/2 samples
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for SGD:", datetime.now() - t0

    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            # get the current batches input and targets
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            # forward pass to get the output predictions
            p_y = forward(x, W, b)

            # Gradient descent
            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (
                    n_batches / 2
            ) == 0:  # print error rate at every (number of batches)/2 iterations
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #8
0
def main():
    X_train, X_test, t_train, t_test = get_pca_normalized_data()
    print("Performing multi-class logistic regression...\n")

    N, D = X_train.shape
    K = 10
    T_train = T_indicator(t_train)
    T_test = T_indicator(t_test)

    lr = float(sys.argv[1])
    reg = float(sys.argv[2])
    batch_size = int(sys.argv[3])

    ######## 1. FULL GRADIENT DESCENT ########
    print('Full Gradient Descent')
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    J_test_full = []
    t0 = datetime.now()
    for epoch in range(50):
        Y_train = forward(X_train, W, b)
        W -= lr * (gradW(T_train, Y_train, X_train) - reg * W)
        b -= lr * (gradb(T_train, Y_train) - reg * b)

        Y_test = forward(X_test, W, b)
        j_test = J(T_test, Y_test)
        J_test_full.append(j_test)

        if epoch % 1 == 0:
            err = accuracy(predict(Y_test), t_test)
            if epoch % 10 == 0:
                print("Epoch {}:\tcost: {}\taccuracy: {}".format(
                    epoch, round(j_test, 4), err))
    Y_test = forward(X_test, W, b)
    print("Final accuracy:", accuracy(predict(Y_test), t_test))
    print("Elapsted time for full GD: {}\n".format(datetime.now() - t0))

    ######## 2. STOCHASTIC GRADIENT DESCENT ########
    print('Stochastic Gradient Descent')
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    J_test_stochastic = []
    t0 = datetime.now()
    for epoch in range(
            50):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpT = shuffle(X_train, T_train)
        for n in range(min(N, 500)):  # shortcut so it won't take so long...
            x = tmpX[n, :].reshape(1, D)
            t = tmpT[n, :].reshape(1, 10)
            Y_train = forward(x, W, b)

            W -= lr * (gradW(t, Y_train, x) - reg * W)
            b -= lr * (gradb(t, Y_train) - reg * b)

            Y_test = forward(X_test, W, b)
            j_test = J(T_test, Y_test)
            J_test_stochastic.append(j_test)

        if epoch % 1 == 0:
            err = accuracy(predict(Y_test), t_test)
            if epoch % 10 == 0:
                print("Epoch {}:\tcost: {}\taccuracy: {}".format(
                    epoch, round(j_test, 4), err))
    Y_test_final = forward(X_test, W, b)
    print("Final accuracy:", accuracy(predict(Y_test_final), t_test))
    print("Elapsted time for SGD: {}\n".format(datetime.now() - t0))

    ######## 3. BATCH GRADIENT DESCENT ########
    print('Batch Gradient Descent')
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    J_test_batch = []
    nb_batches = N // batch_size
    t0 = datetime.now()
    for epoch in range(50):
        tmpX, tmpT = shuffle(X_train, T_train)
        for batch_index in range(nb_batches):
            x = tmpX[batch_index * batch_size:(batch_index * batch_size +
                                               batch_size), :]
            t = tmpT[batch_index * batch_size:(batch_index * batch_size +
                                               batch_size), :]
            Y_train = forward(x, W, b)

            W -= lr * (gradW(t, Y_train, x) - reg * W)
            b -= lr * (gradb(t, Y_train) - reg * b)

            Y_test = forward(X_test, W, b)
            j_test = J(T_test, Y_test)
            J_test_batch.append(j_test)
        if epoch % 1 == 0:
            err = accuracy(predict(Y_test), t_test)
            if epoch % 10 == 0:
                print("Epoch {}\tcost: {}\taccuracy: {}".format(
                    epoch, round(j_test, 4), err))
    Y_test_final = forward(X_test, W, b)
    print("Final accuracy:", accuracy(predict(Y_test_final), t_test))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    ######## PLOTS ########
    x1 = np.linspace(0, 1, len(J_test_full))
    plt.plot(x1, J_test_full, label="full")
    x2 = np.linspace(0, 1, len(J_test_stochastic))
    plt.plot(x2, J_test_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(J_test_batch))
    plt.plot(x3, J_test_batch, label="batch")
    plt.legend()
    #plt.savefig('full_vs_stoch_vs_batch_lr={}_reg={}_batch_size={}.png'.format(lr, reg, batch_size))
    plt.show()
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print "Performing logistic regression..."
    Xtrain = X[:-1000,]
    Ytrain = Y[:-1000]
    Xtest  = X[-1000:,]
    Ytest  = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in xrange(200):
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0


    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in xrange(1): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)): # shortcut so it won't take so long...
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N/2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for SGD:", datetime.now() - t0


    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
            y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches/2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0



    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #10
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    # normalize X first

    print "Performing logistic regression..."
    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    #1. Full GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    #the whole array of lost functions with iterations.

    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(50):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)

        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err

    p_y = forward(Xtest, W, b)
    print "The lost sequence is given as:", LL
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0

    #2. Stochastic

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()

    for i in xrange(1):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err

    p_y = forward(Xtest, W, b)

    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for SGD:", datetime.now() - t0

    # x1 = np.linspace(0, 1, len(LL))
    # plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    plt.legend()
    plt.show()
    print LL

    #3. batch

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()

    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)
            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)

            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)


    # 2. stochastic
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(50): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)): # shortcut so it won't take so long...
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)


    # 3. batch
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
            y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)



    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #12
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()

    print('logistic regression')

    # randomly assign weights

    N, D = Xtrain.shape

    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    M = 10
    scale = 28

    # full grad descent
    W, b = initwb(D, M, scale)

    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(200):
        P_Y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, P_Y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, P_Y) - reg * b)

        P_Y_test = forward(Xtest, W, b)
        ll = cost(P_Y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(P_Y_test, Ytest)
            print("cost at iter:  %d:  %.6f" % (i, ll))
            print("error rate:  ", err, "\n")

    P_Y = forward(Xtest, W, b)
    print("final error:  ", error_rate(P_Y, Ytest))
    print("elapsed time for full GD:  ", datetime.now() - t0)

    # 2.  Stochastic
    W, b = initwb(D, M, scale)

    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(1):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            P_Y = forward(x, W, b)

            W += lr * (gradW(y, P_Y, x) - reg * W)
            b += lr * (gradb(y, P_Y) - reg * b)

            P_Y_test = forward(Xtest, W, b)

            ll = cost(P_Y_test, Ytest_ind)

            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(P_Y_test, Ytest)
                print("Cost at iteration %d:  %6.f" % (i, ll))
                print("error rate:  ", err)

    P_Y = forward(Xtest, W, b)

    print("error rate:  ", error_rate(P_Y, Ytest))
    print("elapsed time for SGD:  ", datetime.now() - t0)

    # batch
    W, b = initwb(D, M, scale)

    LL_batch = []
    lr = 0.001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            P_Y = forward(x, W, b)

            W += lr * (gradW(y, P_Y, x) - reg * W)
            b += lr * (gradb(y, P_Y) - reg * b)
            P_Y_test = forward(Xtest, W, b)

            ll = cost(P_Y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(P_Y_test, Ytest)
                print("Cost at iteration %d:  %6.f" % (i, ll))
                print("error rate:  ", err)
    P_Y = forward(Xtest, W, b)

    print("error rate:  ", error_rate(P_Y, Ytest))
    print("elapsed time for SGD:  ", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Beispiel #13
0
def main():
	X, Y, _, _  = get_transformed_data()
	#First 300 factors
	X = X[:,:300]


	# normalize X first
	mu  = X.mean(axis=0)
	std = X.std(axis=0)
	X = (X-mu) / std 


	print("Performing logistic regression...")	
	
	Xtrain = X[:-1000,]
	Ytrain = Y[:-1000]
	Xtest = X[-1000:,]
	Ytest = Y[-1000:]
	

	N, D = Xtrain.shape
	Ytrain_ind = y2indicator(Ytrain)
	Ytest_ind = y2indicator(Ytest)
	

	#1. full gradient descent
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL = [] 
	

	lr = 0.0001
	reg = 0.01 
	t0 = datetime.now()
	for i in range(200):
		p_y = forward(Xtrain, W, b)
	

		W+=  lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
		b+=  lr*(gradb(Ytrain_ind, p_y) - reg*b)
		
		p_y_test = forward(Xtest, W, b)
		ll = cost(p_y_test, Ytest_ind)
		LL.append(ll)


		err = error_rate(p_y_test, Ytest)
		
		if i % 10 ==0:
			print("FULL Cost a iteration %d: %.6f" %(i,ll))
			print("FULL Error rate:", err)

	p_y = forward(Xtest, W, b)				
	print("FULL Final error rate", error_rate(p_y, Ytest))
	print("FULL GD time", (datetime.now() - t0))
	
	#2. Stochastic gradient descent
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL_stochastic = [] 

	lr = 0.0001
	reg = 0.01 
	t0 = datetime.now()
	for i in range(1):
		tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
		
		for n in xrange(min(N,500)):
		
			x = tmpX[n, :].reshape(1, D)
			y = tmpY[n, :].reshape(1, 10)
			p_y = forward(x, W, b)

			
			W+=  lr*(gradW(y, p_y, x) - reg*W)
			b+=  lr*(gradb(y, p_y) - reg*b)
			p_y_test = forward(Xtest, W, b)
			ll = cost(p_y_test, Ytest_ind)
			LL_stochastic.append(ll)

			err = error_rate(p_y_test, Ytest)	
			if n % int(N/2) ==0:
				print("STOCHASTIC Cost a iteration %d: %.6f" %(i,ll))
				print("STOCHASTIC Error rate:", err)
	
	p_y = forward(Xtest, W, b)				
	print("STOCHASTIC Final error rate", error_rate(p_y, Ytest))
	print("STOCHASTIC GD time", (datetime.now() - t0))	


	#3. batch
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL_batch = [] 

	lr = 0.0001
	reg = 0.01 
	
	batch_sz = 500
	n_batches = N / batch_sz

	t0 = datetime.now()
	for i in range(50):
		tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
		
		for j in xrange(n_batches):
		
			x = tmpX[j*batch_sz:((j+1)*batch_sz), :]
			y = tmpY[j*batch_sz:((j+1)*batch_sz), :]
			p_y = forward(x, W, b)

			
			W+=  lr*(gradW(y, p_y, x) - reg*W)
			b+=  lr*(gradb(y, p_y) - reg*b)
			p_y_test = forward(Xtest, W, b)
			ll = cost(p_y_test, Ytest_ind)
			LL_batch.append(ll)

		
			
			if j % int(n_batches/2) ==0:
				err = error_rate(p_y_test, Ytest)		
				print("BATCH Cost a iteration %d: %.6f" %(i,ll))
				print("BATCH Error rate:", err)
	
	p_y = forward(Xtest, W, b)				
	print("BATCH Final error rate", error_rate(p_y, Ytest))
	print("BATCH GD time", (datetime.now() - t0))	


	

	x1 = np.linspace(0, 1, len(LL))
	plt.plot(x1, LL, label='full')
	x2 = np.linspace(0, 1, len(LL_stochastic))
	plt.plot(x2, LL_stochastic, label='stochastic')
	x3 = np.linspace(0, 1, len(LL_batch))
	plt.plot(x3, LL_batch, label='batch')
	plt.legend()
	plt.show()
Beispiel #14
0
    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
<<<<<<< HEAD
    for i in xrange(200):
=======
    for i in range(200):
>>>>>>> upstream/master
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
<<<<<<< HEAD
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0
=======