예제 #1
0
def tt_plot_comp(train_x, train_y, test_x, test_y):
    """Train, test and plot a comparison of update functions.

    Arguments:
        train_x, train_y, test_x, test_y: train and test data
    """
    funcs = [sgd, sgd_momentum, rms_prop]
    cost = []
    accr = []
    for func in funcs:
        # train and test
        train, predict, _ = cnn(update_func=func)
        test_accr = []
        train_cost = []
        for _ in tqdm(range(NO_ITERS)):
            train_x, train_y = shuffle_data(train_x, train_y)
            test_x, test_y = shuffle_data(test_x, test_y)
            cost = 0.0
            train_length = len(train_x)

            starts = range(0, train_length, BATCH_SIZE)
            ends = range(BATCH_SIZE, train_length, BATCH_SIZE)
            for start, end in zip(starts, ends):
                cost += train(train_x[start:end], train_y[start:end])

            # average out the cost for one epoch
            cost = cost / (train_length // BATCH_SIZE)
            train_cost += [cost]
            test_accr.append(
                np.mean(np.argmax(test_y, axis=1) == predict(test_x)))

        # output max accuracy at # iterations
        print('%.1f accuracy at %d iterations' %
              (np.max(test_accr) * 100, np.argmax(test_accr) + 1))

        cost += [train_cost]
        accr += [test_accr]

    # plot test accuracy
    pylab.figure()
    for label, series in zip([x.__name__ for x in funcs], accr):
        pylab.plot(range(NO_ITERS), series, label=label)
    pylab.xlabel('epochs')
    pylab.ylabel('test accuracy')
    pylab.legend()
    pylab.savefig(os.path.join(CUR_DIR, 'project_2a_test.png'))

    # plot training cost
    pylab.figure()
    for label, series in zip([x.__name__ for x in funcs], cost):
        pylab.plot(range(NO_ITERS), series, label=label)
    pylab.xlabel('epochs')
    pylab.ylabel('training cost')
    pylab.legend()
    pylab.savefig(os.path.join(CUR_DIR, 'project_2a_train.png'))
    pylab.show()
예제 #2
0
def load_train_test():
    """Load training and testing data."""
    # read and divide data into test and train sets
    cal_housing = np.loadtxt(os.path.join(
        DATA_DIR, 'cal_housing.data'), delimiter=',')
    x_data, y_data = cal_housing[:, :8], cal_housing[:, -1]
    y_data = (np.asmatrix(y_data)).transpose()
    x_data, y_data = shuffle_data(x_data, y_data)

    # separate train and test data
    test_len = 3 * x_data.shape[0] // 10  # 3:7 test:train split
    test_x, test_y = x_data[:test_len], y_data[:test_len]
    train_x, train_y = x_data[test_len:], y_data[test_len:]

    # scale and normalize data
    train_x_max, train_x_min = np.max(train_x, axis=0), np.min(train_x, axis=0)
    test_x_max, test_x_min = np.max(test_x, axis=0), np.min(test_x, axis=0)
    train_x = scale(train_x, train_x_min, train_x_max)
    test_x = scale(test_x, test_x_min, test_x_max)
    train_x_mean, train_x_std = np.mean(
        train_x, axis=0), np.std(train_x, axis=0)
    test_x_mean, test_x_std = np.mean(test_x, axis=0), np.std(test_x, axis=0)
    train_x = normalize(train_x, train_x_mean, train_x_std)
    test_x = normalize(test_x, test_x_mean, test_x_std)

    return train_x, train_y, test_x, test_y
예제 #3
0
def train_test(batch_size=4, hl_neuron=10, decay=1e-6, layer_4=False):
    """Train and test the neural network with data.

    Arguments:
        batch_size: int - batch size for mini-batch gradient descent
        hl_neuron: int - number of neurons for hidden layer
        decay: float - decay parameter
    """
    # init functions and variables
    if layer_4:
        train, predict = nn_4_layer(hl_neuron, decay)
    else:
        train, predict = nn_3_layer(hl_neuron, decay)
    train_x, train_y, test_x, test_y = load_train_test()
    n_tr = len(train_x)
    test_accuracy = []
    train_cost = []
    timings = []
    start_time = 0

    # train and test
    for _ in tqdm(range(EPOCHS)):
        train_x, train_y = shuffle_data(train_x, train_y)
        cost = 0.0
        for start, end in zip(range(0, n_tr, batch_size),
                              range(batch_size, n_tr, batch_size)):
            start_time = time.time()
            cost += train(train_x[start:end], train_y[start:end])
            timings.append((time.time() - start_time) * 1e6)
        train_cost = np.append(train_cost, cost / (n_tr // batch_size))
        test_accuracy = np.append(
            test_accuracy,
            np.mean(np.argmax(test_y, axis=1) == predict(test_x)))

    # print results
    print('%.1f accuracy at %d iterations' %
          (np.max(test_accuracy) * 100, np.argmax(test_accuracy) + 1))
    average_time = np.average(timings)
    print('average time per update: {}'.format(average_time))

    return (train_cost, test_accuracy, average_time)
예제 #4
0
def nn_4_layer(train_x,
               train_y,
               test_x,
               test_y,
               no_hidden=30,
               learning_rate=1e-4):
    """Train and test the 4-layer neural network."""
    no_features = train_x.shape[1]
    x_mat = T.matrix('x')  # data sample
    d_mat = T.matrix('d')  # desired output

    # initialize weights and biases for hidden layer(s) and output layer
    w_o = theano.shared(np.random.randn(20) * .01, FL_X)
    b_o = theano.shared(np.random.randn() * .01, FL_X)
    w_h1 = theano.shared(np.random.randn(no_features, no_hidden) * .01, FL_X)
    b_h1 = theano.shared(np.random.randn(no_hidden) * 0.01, FL_X)
    w_h2 = theano.shared(np.random.randn(no_hidden, 20) * .01, FL_X)
    b_h2 = theano.shared(np.random.randn(20) * 0.01, FL_X)

    # learning rate
    alpha = theano.shared(learning_rate, FL_X)

    # define mathematical expressions
    h1_out = T.nnet.sigmoid(T.dot(x_mat, w_h1) + b_h1)
    h2_out = T.nnet.sigmoid(T.dot(h1_out, w_h2) + b_h2)
    y_vec = T.dot(h2_out, w_o) + b_o
    cost = T.abs_(T.mean(T.sqr(d_mat - y_vec)))
    accuracy = T.mean(d_mat - y_vec)

    # define gradients
    dw_o, db_o, dw_h1, db_h1, dw_h2, db_h2 = T.grad(
        cost, [w_o, b_o, w_h1, b_h1, w_h2, b_h2])

    # compile train and test functions
    train = theano.function(inputs=[x_mat, d_mat],
                            outputs=cost,
                            updates=[[w_o, w_o - alpha * dw_o],
                                     [b_o, b_o - alpha * db_o],
                                     [w_h1, w_h1 - alpha * dw_h1],
                                     [b_h1, b_h1 - alpha * db_h1],
                                     [w_h2, w_h2 - alpha * dw_h2],
                                     [b_h2, b_h2 - alpha * db_h2]],
                            allow_input_downcast=True)
    test = theano.function(inputs=[x_mat, d_mat],
                           outputs=[y_vec, cost, accuracy],
                           allow_input_downcast=True)

    # train and test
    train_cost = np.zeros(EPOCHS)
    test_cost = np.zeros(EPOCHS)
    test_accuracy = np.zeros(EPOCHS)

    min_error = 1e+15
    best_iter = 0
    best_w_o = np.zeros(20)
    best_w_h1 = np.zeros([no_features, no_hidden])
    best_w_h2 = np.zeros([no_hidden, 20])
    best_b_o = 0
    best_b_h1 = np.zeros(no_hidden)
    best_b_h2 = np.zeros(20)

    alpha.set_value(learning_rate)

    for i in tqdm(range(EPOCHS)):
        train_x, train_y = shuffle_data(train_x, train_y)
        train_cost[i] = train(train_x, np.transpose(train_y))
        _, test_cost[i], test_accuracy[i] = test(test_x, np.transpose(test_y))
        if test_cost[i] < min_error:
            best_iter = i
            min_error = test_cost[i]
            best_w_o = w_o.get_value()
            best_w_h1 = w_h1.get_value()
            best_w_h2 = w_h2.get_value()
            best_b_o = b_o.get_value()
            best_b_h1 = b_h1.get_value()
            best_b_h2 = b_h2.get_value()

    # set weights and biases to values at which performance was best
    w_o.set_value(best_w_o)
    b_o.set_value(best_b_o)
    w_h1.set_value(best_w_h1)
    b_h1.set_value(best_b_h1)
    w_h2.set_value(best_w_h2)
    b_h2.set_value(best_b_h2)

    _, best_cost, best_accuracy = test(test_x, np.transpose(test_y))

    print('Minimum error: %.1f, Best accuracy %.1f, Number of Iterations: %d' %
          (best_cost, best_accuracy, best_iter))

    return train_cost, test_cost, test_accuracy
예제 #5
0
def tt_plot_func(train_x, train_y, test_x, test_y, func=sgd):
    """Train, test and plot using a particular update function.

    Arguments:
        train_x, train_y, test_x, test_y: train and test data
        func: update function to use, default to nn_cnn.sgd
    """
    # train and test
    train, predict, test = cnn(update_func=func)
    test_accr = []
    train_cost = []
    for i in tqdm(range(NO_ITERS)):
        train_x, train_y = shuffle_data(train_x, train_y)
        test_x, test_y = shuffle_data(test_x, test_y)
        cost = 0.0
        train_length = len(train_x)

        starts = range(0, train_length, BATCH_SIZE)
        ends = range(BATCH_SIZE, train_length, BATCH_SIZE)
        for start, end in zip(starts, ends):
            cost += train(train_x[start:end], train_y[start:end])

        # average out the cost for one epoch
        cost = cost / (train_length // BATCH_SIZE)
        train_cost += [cost]
        test_accr.append(np.mean(np.argmax(test_y, axis=1) == predict(test_x)))

    # output max accuracy at # iterations
    print('%.1f accuracy at %d iterations' %
          (np.max(test_accr) * 100, np.argmax(test_accr) + 1))

    # plot test accuracy
    pylab.figure()
    pylab.plot(range(NO_ITERS), test_accr)
    pylab.xlabel('epochs')
    pylab.ylabel('test accuracy')
    pylab.savefig(os.path.join(CUR_DIR, 'project_2a_test.png'))

    # plot training cost
    pylab.figure()
    pylab.plot(range(NO_ITERS), train_cost)
    pylab.xlabel('epochs')
    pylab.ylabel('training cost')
    pylab.savefig(os.path.join(CUR_DIR, 'project_2a_train.png'))

    # pick a random image
    ind = np.random.randint(low=0, high=2000)
    conv_1, pool_1, conv_2, pool_2 = test(test_x[ind:ind + 1, :])

    # show input image
    pylab.figure()
    pylab.gray()
    pylab.axis('off')
    pylab.imshow(test_x[ind, :].reshape(28, 28))
    pylab.title('input image')
    pylab.savefig(os.path.join(CUR_DIR, 'img_input.png'))

    # show convolved and pooled feature maps
    pylab.figure()
    pylab.gray()
    for i in range(15):
        pylab.subplot(3, 5, i + 1)
        pylab.axis('off')
        pylab.imshow(conv_1[0, i, :].reshape(20, 20))
    pylab.suptitle('layer 1 convolved feature maps')
    pylab.savefig(os.path.join(CUR_DIR, 'img_conv_1.png'))

    pylab.figure()
    pylab.gray()
    for i in range(15):
        pylab.subplot(3, 5, i + 1)
        pylab.axis('off')
        pylab.imshow(pool_1[0, i, :].reshape(10, 10))
    pylab.suptitle('layer 1 pooled feature maps')
    pylab.savefig(os.path.join(CUR_DIR, 'img_pooled_1.png'))

    pylab.figure()
    pylab.gray()
    for i in range(20):
        pylab.subplot(4, 5, i + 1)
        pylab.axis('off')
        pylab.imshow(conv_2[0, i, :].reshape(6, 6))
    pylab.suptitle('layer 2 convolved feature maps')
    pylab.savefig(os.path.join(CUR_DIR, 'img_conv_2.png'))

    pylab.figure()
    pylab.gray()
    for i in range(20):
        pylab.subplot(4, 5, i + 1)
        pylab.axis('off')
        pylab.imshow(pool_2[0, i, :].reshape(3, 3))
    pylab.suptitle('layer 2 pooled feature maps')
    pylab.savefig(os.path.join(CUR_DIR, 'img_pooled_2.png'))
    pylab.show()