def test_matrix_elementwise_multiply(): ctx = ndarray.gpu(0) shape = (500, 200) x = np.random.uniform(0, 10, size=shape).astype(np.float32) y = np.random.uniform(0, 10, size=shape).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.array(y, ctx=ctx) arr_z = ndarray.empty(shape, ctx=ctx) gpu_op.matrix_elementwise_multiply(arr_x, arr_y, arr_z) z = arr_z.asnumpy() np.testing.assert_allclose(x * y, z, rtol=1e-5)
def test_relu_gradient(): shape = (2000, 2500) ctx = ndarray.gpu(0) x = np.random.uniform(-1, 1, shape).astype(np.float32) grad_x = np.random.uniform(-5, 5, shape).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_grad_x = ndarray.array(grad_x, ctx=ctx) arr_y = ndarray.empty(shape, ctx=ctx) gpu_op.relu_gradient(arr_x, arr_grad_x, arr_y) y = arr_y.asnumpy() np.testing.assert_allclose(((x > 0) * grad_x).astype(np.float32), y)
def test_softmax_cross_entropy(): ctx = ndarray.gpu(0) shape = (400, 1000) y = np.random.uniform(-5, 5, shape).astype(np.float32) y_ = np.random.uniform(-5, 5, shape).astype(np.float32) arr_y = ndarray.array(y, ctx=ctx) arr_y_ = ndarray.array(y_, ctx=ctx) arr_out = ndarray.empty((1, ), ctx=ctx) gpu_op.softmax_cross_entropy(arr_y, arr_y_, arr_out) out = arr_out.asnumpy() # numpy calculation cross_entropy = np.mean( -np.sum(y_ * np.log(autodiff.softmax_func(y)), axis=1), keepdims=True) np.testing.assert_allclose(cross_entropy, out, rtol=1e-5)
def test_softmax(): ctx = ndarray.gpu(0) shape = (400, 1000) x = np.random.uniform(-5, 5, shape).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.empty(shape, ctx=ctx) gpu_op.softmax(arr_x, arr_y) y = arr_y.asnumpy() np.testing.assert_allclose(autodiff.softmax_func(x), y, rtol=1e-5)
def test_relu(): shape = (2000, 2500) ctx = ndarray.gpu(0) x = np.random.uniform(-1, 1, shape).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.empty(shape, ctx=ctx) gpu_op.relu(arr_x, arr_y) y = arr_y.asnumpy() np.testing.assert_allclose(np.maximum(x, 0).astype(np.float32), y)
def test_matrix_elementwise_multiply_by_const(): shape = (2000, 3000) ctx = ndarray.gpu(0) x = np.random.uniform(0, 10, size=shape).astype(np.float32) val = np.random.uniform(-5, 5) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.empty(shape, ctx=ctx) gpu_op.matrix_elementwise_multiply_by_const(arr_x, val, arr_y) y = arr_y.asnumpy() np.testing.assert_allclose(x * val, y, rtol=1e-5)
def test_broadcast_to(): ctx = ndarray.gpu(0) shape = (200, 300) to_shape = (130, 200, 300) x = np.random.uniform(-1, 1, shape).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.empty(to_shape, ctx=ctx) gpu_op.broadcast_to(arr_x, arr_y) y = arr_y.asnumpy() np.testing.assert_allclose(np.broadcast_to(x, to_shape), y)
def test_matrix_multiply(): ctx = ndarray.gpu(0) x = np.random.uniform(0, 10, size=(500, 700)).astype(np.float32) y = np.random.uniform(0, 10, size=(700, 1000)).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.array(y, ctx=ctx) arr_z = ndarray.empty((500, 1000), ctx=ctx) gpu_op.matrix_multiply(arr_x, False, arr_y, False, arr_z) z = arr_z.asnumpy() np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5) x = np.random.uniform(0, 10, size=(1000, 500)).astype(np.float32) y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.array(y, ctx=ctx) arr_z = ndarray.empty((1000, 2000), ctx=ctx) gpu_op.matrix_multiply(arr_x, False, arr_y, True, arr_z) z = arr_z.asnumpy() np.testing.assert_allclose(np.dot(x, np.transpose(y)), z, rtol=1e-5) x = np.random.uniform(0, 10, size=(500, 1000)).astype(np.float32) y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.array(y, ctx=ctx) arr_z = ndarray.empty((1000, 2000), ctx=ctx) gpu_op.matrix_multiply(arr_x, True, arr_y, True, arr_z) z = arr_z.asnumpy() np.testing.assert_allclose(np.dot(np.transpose(x), np.transpose(y)), z, rtol=1e-5)
def test_convolution_forward(): ctx = ndarray.gpu(0) in_shape = (1, 1, 8) filter_shape = (1, 1, 5) out_shape = (1, 1, 8) input_arr_np = np.arange(8).reshape(in_shape) dinput_arr_np = np.arange(8).reshape(in_shape) filter_arr_np = np.arange(5).reshape(filter_shape) dfilter_arr_np = np.arange(5).reshape(filter_shape) dinput = ndarray.array(dinput_arr_np, ctx=ctx) dfilter = ndarray.array(dfilter_arr_np, ctx=ctx) arr_in = ndarray.array(input_arr_np, ctx=ctx) arr_filter = ndarray.array(filter_arr_np, ctx=ctx) arr_out = ndarray.empty(out_shape, ctx=ctx) gpu_op.convolution_1d_forward(arr_in, arr_filter, arr_out, "NCHW", "SAME", 1) gpu_op.convolution_1d_backward(arr_in, arr_out, arr_filter, dfilter, dinput, "NCHW", "SAME", 1) print(arr_out.asnumpy()) print(dfilter.asnumpy()) print(dinput.asnumpy()) print(arr_in.asnumpy())
def test_reduce_sum_axis_zero(): ctx = ndarray.gpu(0) shape = (500, 200, 100) to_shape = (200, 100) x = np.random.uniform(0, 20, shape).astype(np.float32) arr_x = ndarray.array(x, ctx=ctx) arr_y = ndarray.empty(to_shape, ctx=ctx) gpu_op.reduce_sum_axis_zero(arr_x, arr_y) y = arr_y.asnumpy() y_ = np.sum(x, axis=0) for index, _ in np.ndenumerate(y): v = y[index] v_ = y_[index] if abs((v - v_) / v_) > 1e-4: print(index, v, v_) np.testing.assert_allclose(np.sum(x, axis=0), y, rtol=1e-5)
def mnist_logreg(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False): print("Build logistic regression model...") W1 = ad.Variable(name="W1") b1 = ad.Variable(name="b1") X = ad.Variable(name="X") y_ = ad.Variable(name="y_") z1 = ad.matmul_op(X, W1) y = z1 + ad.broadcastto_op(b1, z1) loss = ad.softmaxcrossentropy_op(y, y_) grad_W1, grad_b1 = ad.gradients(loss, [W1, b1]) executor = ad.Executor([loss, grad_W1, grad_b1, y], ctx=executor_ctx) # Read input data datasets = load_mnist_data("mnist.pkl.gz") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # Set up minibatch batch_size = 1000 n_train_batches = train_set_x.shape[0] // batch_size n_valid_batches = valid_set_x.shape[0] // batch_size print("Start training loop...") # Initialize parameters W1_val = np.zeros((784, 10)) b1_val = np.zeros((10)) X_val = np.empty(shape=(batch_size, 784), dtype=np.float32) y_val = np.empty(shape=(batch_size, 10), dtype=np.float32) valid_X_val = np.empty(shape=(batch_size, 784), dtype=np.float32) valid_y_val = np.empty(shape=(batch_size, 10), dtype=np.float32) if ndarray.is_gpu_ctx(executor_ctx): W1_val = ndarray.array(W1_val, ctx=executor_ctx) b1_val = ndarray.array(b1_val, ctx=executor_ctx) X_val = ndarray.array(X_val, ctx=executor_ctx) y_val = ndarray.array(y_val, ctx=executor_ctx) lr = 1e-3 for i in range(num_epochs): print("epoch %d" % i) for minibatch_index in range(n_train_batches): minibatch_start = minibatch_index * batch_size minibatch_end = (minibatch_index + 1) * batch_size X_val[:] = train_set_x[minibatch_start:minibatch_end] y_val[:] = convert_to_one_hot( train_set_y[minibatch_start:minibatch_end]) loss_val, grad_W1_val, grad_b1_val, _ = executor.run( feed_dict = {X: X_val, y_: y_val, W1: W1_val, b1: b1_val}) # SGD update if (executor_ctx is None): W1_val = W1_val - lr * grad_W1_val b1_val = b1_val - lr * grad_b1_val else: sgd_update_gpu(W1_val, grad_W1_val, lr) sgd_update_gpu(b1_val, grad_b1_val, lr) if print_loss_val_each_epoch: if isinstance(loss_val, ndarray.NDArray): print(loss_val.asnumpy()) else: print(loss_val) correct_predictions = [] for minibatch_index in range(n_valid_batches): minibatch_start = minibatch_index * batch_size minibatch_end = (minibatch_index + 1) * batch_size valid_X_val[:] = valid_set_x[minibatch_start:minibatch_end] valid_y_val[:] = convert_to_one_hot( valid_set_y[minibatch_start:minibatch_end]) _, _, _, valid_y_predicted = executor.run( feed_dict={ X: valid_X_val, y_: valid_y_val, W1: W1_val, b1: b1_val}, convert_to_numpy_ret_vals=True) correct_prediction = np.equal( np.argmax(valid_y_val, 1), np.argmax(valid_y_predicted, 1)).astype(np.float) correct_predictions.extend(correct_prediction) accuracy = np.mean(correct_predictions) # validation set accuracy=0.928200 print("validation set accuracy=%f" % accuracy)
def mnist_mlp(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False): print("Build 3-layer MLP model...") W1 = ad.Variable(name="W1") W2 = ad.Variable(name="W2") W3 = ad.Variable(name="W3") b1 = ad.Variable(name="b1") b2 = ad.Variable(name="b2") b3 = ad.Variable(name="b3") X = ad.Variable(name="X") y_ = ad.Variable(name="y_") # relu(X W1+b1) z1 = ad.matmul_op(X, W1) z2 = z1 + ad.broadcastto_op(b1, z1) z3 = ad.relu_op(z2) # relu(z3 W2+b2) z4 = ad.matmul_op(z3, W2) z5 = z4 + ad.broadcastto_op(b2, z4) z6 = ad.relu_op(z5) # softmax(z5 W2+b2) z7 = ad.matmul_op(z6, W3) y = z7 + ad.broadcastto_op(b3, z7) loss = ad.softmaxcrossentropy_op(y, y_) grad_W1, grad_W2, grad_W3, grad_b1, grad_b2, grad_b3 = ad.gradients( loss, [W1, W2, W3, b1, b2, b3]) executor = ad.Executor( [loss, grad_W1, grad_W2, grad_W3, grad_b1, grad_b2, grad_b3, y], ctx=executor_ctx) # Read input data datasets = load_mnist_data("mnist.pkl.gz") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # Set up minibatch batch_size = 1000 n_train_batches = train_set_x.shape[0] // batch_size n_valid_batches = valid_set_x.shape[0] // batch_size print("Start training loop...") # Initialize parameters rand = np.random.RandomState(seed=123) W1_val = rand.normal(scale=0.1, size=(784, 256)) W2_val = rand.normal(scale=0.1, size=(256, 100)) W3_val = rand.normal(scale=0.1, size=(100, 10)) b1_val = rand.normal(scale=0.1, size=(256)) b2_val = rand.normal(scale=0.1, size=(100)) b3_val = rand.normal(scale=0.1, size=(10)) X_val = np.empty(shape=(batch_size, 784), dtype=np.float32) y_val = np.empty(shape=(batch_size, 10), dtype=np.float32) valid_X_val = np.empty(shape=(batch_size, 784), dtype=np.float32) valid_y_val = np.empty(shape=(batch_size, 10), dtype=np.float32) if ndarray.is_gpu_ctx(executor_ctx): W1_val = ndarray.array(W1_val, ctx=executor_ctx) W2_val = ndarray.array(W2_val, ctx=executor_ctx) W3_val = ndarray.array(W3_val, ctx=executor_ctx) b1_val = ndarray.array(b1_val, ctx=executor_ctx) b2_val = ndarray.array(b2_val, ctx=executor_ctx) b3_val = ndarray.array(b3_val, ctx=executor_ctx) X_val = ndarray.array(X_val, ctx=executor_ctx) y_val = ndarray.array(y_val, ctx=executor_ctx) lr = 1.0e-3 for i in range(num_epochs): print("epoch %d" % i) for minibatch_index in range(n_train_batches): minibatch_start = minibatch_index * batch_size minibatch_end = (minibatch_index + 1) * batch_size X_val[:] = train_set_x[minibatch_start:minibatch_end] y_val[:] = convert_to_one_hot( train_set_y[minibatch_start:minibatch_end]) loss_val, grad_W1_val, grad_W2_val, grad_W3_val, \ grad_b1_val, grad_b2_val, grad_b3_val, _ = executor.run( feed_dict={ X: X_val, y_: y_val, W1: W1_val, W2: W2_val, W3: W3_val, b1: b1_val, b2: b2_val, b3: b3_val}) # SGD update if (executor_ctx is None): W1_val = W1_val - lr * grad_W1_val W2_val = W2_val - lr * grad_W2_val W3_val = W3_val - lr * grad_W3_val b1_val = b1_val - lr * grad_b1_val b2_val = b2_val - lr * grad_b2_val b3_val = b3_val - lr * grad_b3_val else: sgd_update_gpu(W1_val, grad_W1_val, lr) sgd_update_gpu(W2_val, grad_W2_val, lr) sgd_update_gpu(W3_val, grad_W3_val, lr) sgd_update_gpu(b1_val, grad_b1_val, lr) sgd_update_gpu(b2_val, grad_b2_val, lr) sgd_update_gpu(b3_val, grad_b3_val, lr) if print_loss_val_each_epoch: if isinstance(loss_val, ndarray.NDArray): print(loss_val.asnumpy()) else: print(loss_val) correct_predictions = [] for minibatch_index in range(n_valid_batches): minibatch_start = minibatch_index * batch_size minibatch_end = (minibatch_index + 1) * batch_size valid_X_val[:] = valid_set_x[minibatch_start:minibatch_end] valid_y_val[:] = convert_to_one_hot( valid_set_y[minibatch_start:minibatch_end]) _, _, _, _, _, _, _, valid_y_predicted = executor.run( feed_dict={ X: valid_X_val, y_: valid_y_val, W1: W1_val, W2: W2_val, W3: W3_val, b1: b1_val, b2: b2_val, b3: b3_val}, convert_to_numpy_ret_vals=True) correct_prediction = np.equal( np.argmax(valid_y_val, 1), np.argmax(valid_y_predicted, 1)).astype(np.float) correct_predictions.extend(correct_prediction) accuracy = np.mean(correct_predictions) # validation set accuracy=0.970800 print("validation set accuracy=%f" % accuracy)