Example #1
0
def test_matrix_elementwise_multiply():
    ctx = ndarray.gpu(0)
    shape = (500, 200)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    y = np.random.uniform(0, 10, size=shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty(shape, ctx=ctx)
    gpu_op.matrix_elementwise_multiply(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(x * y, z, rtol=1e-5)
Example #2
0
def test_relu_gradient():
    shape = (2000, 2500)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    grad_x = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_grad_x = ndarray.array(grad_x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.relu_gradient(arr_x, arr_grad_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(((x > 0) * grad_x).astype(np.float32), y)
Example #3
0
def test_softmax_cross_entropy():
    ctx = ndarray.gpu(0)
    shape = (400, 1000)
    y = np.random.uniform(-5, 5, shape).astype(np.float32)
    y_ = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_y_ = ndarray.array(y_, ctx=ctx)
    arr_out = ndarray.empty((1, ), ctx=ctx)
    gpu_op.softmax_cross_entropy(arr_y, arr_y_, arr_out)
    out = arr_out.asnumpy()
    # numpy calculation
    cross_entropy = np.mean(
        -np.sum(y_ * np.log(autodiff.softmax_func(y)), axis=1), keepdims=True)
    np.testing.assert_allclose(cross_entropy, out, rtol=1e-5)
Example #4
0
def test_softmax():
    ctx = ndarray.gpu(0)
    shape = (400, 1000)
    x = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.softmax(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(autodiff.softmax_func(x), y, rtol=1e-5)
Example #5
0
def test_relu():
    shape = (2000, 2500)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.relu(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(np.maximum(x, 0).astype(np.float32), y)
Example #6
0
def test_matrix_elementwise_multiply_by_const():
    shape = (2000, 3000)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    val = np.random.uniform(-5, 5)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.matrix_elementwise_multiply_by_const(arr_x, val, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(x * val, y, rtol=1e-5)
Example #7
0
def test_broadcast_to():
    ctx = ndarray.gpu(0)
    shape = (200, 300)
    to_shape = (130, 200, 300)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(to_shape, ctx=ctx)
    gpu_op.broadcast_to(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(np.broadcast_to(x, to_shape), y)
Example #8
0
def test_matrix_multiply():
    ctx = ndarray.gpu(0)
    x = np.random.uniform(0, 10, size=(500, 700)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(700, 1000)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((500, 1000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, False, arr_y, False, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5)

    x = np.random.uniform(0, 10, size=(1000, 500)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((1000, 2000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, False, arr_y, True, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, np.transpose(y)), z, rtol=1e-5)

    x = np.random.uniform(0, 10, size=(500, 1000)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((1000, 2000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, True, arr_y, True, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), np.transpose(y)),
                               z,
                               rtol=1e-5)
Example #9
0
def test_convolution_forward():
    ctx = ndarray.gpu(0)
    in_shape = (1, 1, 8)
    filter_shape = (1, 1, 5)
    out_shape = (1, 1, 8)
    input_arr_np = np.arange(8).reshape(in_shape)

    dinput_arr_np = np.arange(8).reshape(in_shape)
    filter_arr_np = np.arange(5).reshape(filter_shape)
    dfilter_arr_np = np.arange(5).reshape(filter_shape)
    dinput = ndarray.array(dinput_arr_np, ctx=ctx)
    dfilter = ndarray.array(dfilter_arr_np, ctx=ctx)
    arr_in = ndarray.array(input_arr_np, ctx=ctx)
    arr_filter = ndarray.array(filter_arr_np, ctx=ctx)
    arr_out = ndarray.empty(out_shape, ctx=ctx)
    gpu_op.convolution_1d_forward(arr_in, arr_filter, arr_out, "NCHW", "SAME",
                                  1)
    gpu_op.convolution_1d_backward(arr_in, arr_out, arr_filter, dfilter,
                                   dinput, "NCHW", "SAME", 1)
    print(arr_out.asnumpy())
    print(dfilter.asnumpy())
    print(dinput.asnumpy())
    print(arr_in.asnumpy())
Example #10
0
def test_reduce_sum_axis_zero():
    ctx = ndarray.gpu(0)
    shape = (500, 200, 100)
    to_shape = (200, 100)
    x = np.random.uniform(0, 20, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(to_shape, ctx=ctx)
    gpu_op.reduce_sum_axis_zero(arr_x, arr_y)
    y = arr_y.asnumpy()
    y_ = np.sum(x, axis=0)
    for index, _ in np.ndenumerate(y):
        v = y[index]
        v_ = y_[index]
        if abs((v - v_) / v_) > 1e-4:
            print(index, v, v_)
    np.testing.assert_allclose(np.sum(x, axis=0), y, rtol=1e-5)
Example #11
0
def mnist_logreg(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False):
    print("Build logistic regression model...")

    W1 = ad.Variable(name="W1")
    b1 = ad.Variable(name="b1")
    X = ad.Variable(name="X")
    y_ = ad.Variable(name="y_")

    z1 = ad.matmul_op(X, W1)
    y = z1 + ad.broadcastto_op(b1, z1)

    loss = ad.softmaxcrossentropy_op(y, y_)

    grad_W1, grad_b1 = ad.gradients(loss, [W1, b1])
    executor = ad.Executor([loss, grad_W1, grad_b1, y], ctx=executor_ctx)

    # Read input data
    datasets = load_mnist_data("mnist.pkl.gz")
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # Set up minibatch
    batch_size = 1000
    n_train_batches = train_set_x.shape[0] // batch_size
    n_valid_batches = valid_set_x.shape[0] // batch_size

    print("Start training loop...")

    # Initialize parameters
    W1_val = np.zeros((784, 10))
    b1_val = np.zeros((10))
    X_val = np.empty(shape=(batch_size, 784), dtype=np.float32)
    y_val = np.empty(shape=(batch_size, 10), dtype=np.float32)
    valid_X_val = np.empty(shape=(batch_size, 784), dtype=np.float32)
    valid_y_val = np.empty(shape=(batch_size, 10), dtype=np.float32)
    if ndarray.is_gpu_ctx(executor_ctx):
        W1_val = ndarray.array(W1_val, ctx=executor_ctx)
        b1_val = ndarray.array(b1_val, ctx=executor_ctx)
        X_val = ndarray.array(X_val, ctx=executor_ctx)
        y_val = ndarray.array(y_val, ctx=executor_ctx)

    lr = 1e-3
    for i in range(num_epochs):
        print("epoch %d" % i)
        for minibatch_index in range(n_train_batches):
            minibatch_start = minibatch_index * batch_size
            minibatch_end = (minibatch_index + 1) * batch_size
            X_val[:] = train_set_x[minibatch_start:minibatch_end]
            y_val[:] = convert_to_one_hot(
                train_set_y[minibatch_start:minibatch_end])
            loss_val, grad_W1_val, grad_b1_val, _ = executor.run(
                feed_dict = {X: X_val, y_: y_val, W1: W1_val, b1: b1_val})
            # SGD update
            if (executor_ctx is None):
                W1_val = W1_val - lr * grad_W1_val
                b1_val = b1_val - lr * grad_b1_val
            else:
                sgd_update_gpu(W1_val, grad_W1_val, lr)
                sgd_update_gpu(b1_val, grad_b1_val, lr)
        if print_loss_val_each_epoch:
            if isinstance(loss_val, ndarray.NDArray):
                print(loss_val.asnumpy())
            else:
                print(loss_val)

    correct_predictions = []
    for minibatch_index in range(n_valid_batches):
        minibatch_start = minibatch_index * batch_size
        minibatch_end = (minibatch_index + 1) * batch_size
        valid_X_val[:] = valid_set_x[minibatch_start:minibatch_end]
        valid_y_val[:] = convert_to_one_hot(
            valid_set_y[minibatch_start:minibatch_end])
        _, _, _, valid_y_predicted = executor.run(
            feed_dict={
                        X: valid_X_val,
                        y_: valid_y_val,
                        W1: W1_val,
                        b1: b1_val},
            convert_to_numpy_ret_vals=True)
        correct_prediction = np.equal(
            np.argmax(valid_y_val, 1),
            np.argmax(valid_y_predicted, 1)).astype(np.float)
        correct_predictions.extend(correct_prediction)
    accuracy = np.mean(correct_predictions)
    # validation set accuracy=0.928200
    print("validation set accuracy=%f" % accuracy)
Example #12
0
def mnist_mlp(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False):
    print("Build 3-layer MLP model...")

    W1 = ad.Variable(name="W1")
    W2 = ad.Variable(name="W2")
    W3 = ad.Variable(name="W3")
    b1 = ad.Variable(name="b1")
    b2 = ad.Variable(name="b2")
    b3 = ad.Variable(name="b3")
    X = ad.Variable(name="X")
    y_ = ad.Variable(name="y_")

    # relu(X W1+b1)
    z1 = ad.matmul_op(X, W1)
    z2 = z1 + ad.broadcastto_op(b1, z1)
    z3 = ad.relu_op(z2)

    # relu(z3 W2+b2)
    z4 = ad.matmul_op(z3, W2)
    z5 = z4 + ad.broadcastto_op(b2, z4)
    z6 = ad.relu_op(z5)

    # softmax(z5 W2+b2)
    z7 = ad.matmul_op(z6, W3)
    y = z7 + ad.broadcastto_op(b3, z7)

    loss = ad.softmaxcrossentropy_op(y, y_)

    grad_W1, grad_W2, grad_W3, grad_b1, grad_b2, grad_b3 = ad.gradients(
        loss, [W1, W2, W3, b1, b2, b3])
    executor = ad.Executor(
        [loss, grad_W1, grad_W2, grad_W3, grad_b1, grad_b2, grad_b3, y],
        ctx=executor_ctx)

    # Read input data
    datasets = load_mnist_data("mnist.pkl.gz")
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    # Set up minibatch
    batch_size = 1000
    n_train_batches = train_set_x.shape[0] // batch_size
    n_valid_batches = valid_set_x.shape[0] // batch_size

    print("Start training loop...")

    # Initialize parameters
    rand = np.random.RandomState(seed=123)
    W1_val = rand.normal(scale=0.1, size=(784, 256))
    W2_val = rand.normal(scale=0.1, size=(256, 100))
    W3_val = rand.normal(scale=0.1, size=(100, 10))
    b1_val = rand.normal(scale=0.1, size=(256))
    b2_val = rand.normal(scale=0.1, size=(100))
    b3_val = rand.normal(scale=0.1, size=(10))
    X_val = np.empty(shape=(batch_size, 784), dtype=np.float32)
    y_val = np.empty(shape=(batch_size, 10), dtype=np.float32)
    valid_X_val = np.empty(shape=(batch_size, 784), dtype=np.float32)
    valid_y_val = np.empty(shape=(batch_size, 10), dtype=np.float32)
    if ndarray.is_gpu_ctx(executor_ctx):
        W1_val = ndarray.array(W1_val, ctx=executor_ctx)
        W2_val = ndarray.array(W2_val, ctx=executor_ctx)
        W3_val = ndarray.array(W3_val, ctx=executor_ctx)
        b1_val = ndarray.array(b1_val, ctx=executor_ctx)
        b2_val = ndarray.array(b2_val, ctx=executor_ctx)
        b3_val = ndarray.array(b3_val, ctx=executor_ctx)
        X_val = ndarray.array(X_val, ctx=executor_ctx)
        y_val = ndarray.array(y_val, ctx=executor_ctx)

    lr = 1.0e-3
    for i in range(num_epochs):
        print("epoch %d" % i)
        for minibatch_index in range(n_train_batches):
            minibatch_start = minibatch_index * batch_size
            minibatch_end = (minibatch_index + 1) * batch_size
            X_val[:] = train_set_x[minibatch_start:minibatch_end]
            y_val[:] = convert_to_one_hot(
                train_set_y[minibatch_start:minibatch_end])
            loss_val, grad_W1_val, grad_W2_val, grad_W3_val, \
                grad_b1_val, grad_b2_val, grad_b3_val, _ = executor.run(
                    feed_dict={
                        X: X_val,
                        y_: y_val,
                        W1: W1_val,
                        W2: W2_val,
                        W3: W3_val,
                        b1: b1_val,
                        b2: b2_val,
                        b3: b3_val})
            # SGD update
            if (executor_ctx is None):
                W1_val = W1_val - lr * grad_W1_val
                W2_val = W2_val - lr * grad_W2_val
                W3_val = W3_val - lr * grad_W3_val
                b1_val = b1_val - lr * grad_b1_val
                b2_val = b2_val - lr * grad_b2_val
                b3_val = b3_val - lr * grad_b3_val
            else:
                sgd_update_gpu(W1_val, grad_W1_val, lr)
                sgd_update_gpu(W2_val, grad_W2_val, lr)
                sgd_update_gpu(W3_val, grad_W3_val, lr)
                sgd_update_gpu(b1_val, grad_b1_val, lr)
                sgd_update_gpu(b2_val, grad_b2_val, lr)
                sgd_update_gpu(b3_val, grad_b3_val, lr)
        if print_loss_val_each_epoch:
            if isinstance(loss_val, ndarray.NDArray):
                print(loss_val.asnumpy())
            else:
                print(loss_val)

    correct_predictions = []
    for minibatch_index in range(n_valid_batches):
        minibatch_start = minibatch_index * batch_size
        minibatch_end = (minibatch_index + 1) * batch_size
        valid_X_val[:] = valid_set_x[minibatch_start:minibatch_end]
        valid_y_val[:] = convert_to_one_hot(
            valid_set_y[minibatch_start:minibatch_end])
        _, _, _, _, _, _, _, valid_y_predicted = executor.run(
            feed_dict={
                X: valid_X_val,
                y_: valid_y_val,
                W1: W1_val,
                W2: W2_val,
                W3: W3_val,
                b1: b1_val,
                b2: b2_val,
                b3: b3_val},
            convert_to_numpy_ret_vals=True)
        correct_prediction = np.equal(
            np.argmax(valid_y_val, 1),
            np.argmax(valid_y_predicted, 1)).astype(np.float)
        correct_predictions.extend(correct_prediction)
    accuracy = np.mean(correct_predictions)
    # validation set accuracy=0.970800
    print("validation set accuracy=%f" % accuracy)