def test_MatrixMult(): X = ad.Variable(name="X") W1 = init.random_normal((10, 5), stddev=0.1, name='W1') y = ad.matmul_op(X, W1) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) #test transpose_A X = ad.Variable(name="X") W1 = init.random_normal((10, 5), stddev=0.1, name='W1') y = ad.matmul_op(X, W1, True) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(10, batch_size)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) #test transpose_B X = ad.Variable(name="X") W1 = init.random_normal((5, 10), stddev=0.1, name='W1') y = ad.matmul_op(X, W1, trans_B=True) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def train_hetu(num_epoch): ctx = ndarray.gpu(0) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") if use_same_init: gcn1 = GCN(num_features, hidden_layer_size, custom_init=(init_w1, init_b1)) gcn2 = GCN(hidden_layer_size, num_classes, custom_init=(init_w2, init_b2)) else: gcn1 = GCN(num_features, hidden_layer_size) gcn2 = GCN(hidden_layer_size, num_classes) mp_val = mp_matrix(graph, ctx, use_original_gcn_norm=True) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(graph.x, ctx=ctx), y_ : ndarray.array(convert_to_one_hot(graph.y, max_val=num_classes), ctx=ctx) } x = gcn1(x_) x = ad.relu_op(x) y = gcn2(x) loss = ad.softmaxcrossentropy_op(y, y_) opt = optimizer.AdamOptimizer(0.01) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx) start_time = time.time() losses = [] for i in range(num_epoch): loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph.y).sum() losses.append(loss_val.asnumpy().mean()) if i==0: start_time= time.time() print("Train loss :", loss_val.asnumpy().mean()) print("Train accuracy:", acc/len(y_predicted)) print("Hetu time:",i, time.time()-start_time) print("Hetu time:", time.time()-start_time) mp_val = mp_matrix(graph_full, ctx) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(graph_full.x, ctx=ctx), } executor_eval = ad.Executor([y], ctx=ctx) y_predicted, = executor_eval.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph_full.y)[train_split:].sum() print("Test accuracy:", acc/len(y_predicted[train_split:])) return losses
def onnx2hetu(logs, model_name='tf_cnn_model.onnx'): logs.append('loading onnx file to hetu! filename is {}'.format(model_name)) print(logs[-1]) x, y = hx.onnx2hetu.load_onnx(model_name) logs.append('loading onnx file to hetu PASS!') print(logs[-1]) executor = ad.Executor([y], ctx=ctx) rand = np.random.RandomState(seed=123) datasets = load_mnist_data("mnist.pkl.gz") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # X_val=rand.normal(scale=0.1, size=(20,784)).astype(np.float32) # X_val = train_set_x[:20, :] logs.append( 'validing models use assert_allclose between hetu and onnxruntime!...') print(logs[-1]) ath = executor.run(feed_dict={x: X_val}) sess = rt.InferenceSession(model_name) input = sess.get_inputs()[0].name pre = sess.run(None, {input: X_val.astype(np.float32)})[0] np.testing.assert_allclose(ath[0].asnumpy(), pre, rtol=1e-2) logs.append('validing models(cnn) PASS!') print(logs[-1])
def test_Softmax(): X = ad.Variable(name="X") y = ad.softmax_op(X) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(128, 150)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_Transpose(): X = ad.Variable(name="X") y = ad.transpose_op(X, [2, 0, 1]) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(3, 2, 5)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_ReduceSum(): X = ad.Variable(name="X") y = ad.reduce_sum_op(X, 0, keepdims=False) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(2, 23, 5)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def train_hetu(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = int(os.environ["WORKER_ID"]) nrank = int(os.environ["DMLC_NUM_WORKER"]) ctx = ndarray.gpu(rank) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") mask_ = ad.Variable(name="mask_") gcn1 = GraphSage(meta["feature"], hidden_layer_size, activation="relu", dropout=0.1) gcn2 = GraphSage(2*hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1) x = gcn1(x_) x = gcn2(x) W = initializers.xavier_uniform(shape=(2*hidden_layer_size, meta["class"])) B = initializers.zeros(shape=(meta["class"],)) x = ad.matmul_op(x, W) y = x + ad.broadcastto_op(B, x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.mul_op(loss, mask_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(0.1) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx, comm_mode='PS') distributed.ps_init(rank, nrank) batch_size = 4000 with DistributedGraphSageSampler(args.path, batch_size, 2, 2, rank=rank, nrank=nrank) as sampler: epoch = 0 nnodes = 0 start = time.time() while True: g_sample, mask = sampler.sample() mp_val = mp_matrix(g_sample, ndarray.gpu(rank)) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, mask_ : ndarray.array(mask, ctx=ctx), x_ : ndarray.array(g_sample.x, ctx=ctx), y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=g_sample.num_classes), ctx=ctx) } loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = ((y_predicted == g_sample.y) * mask).sum() distributed.ps_get_worker_communicator().BarrierWorker() nnodes += batch_size if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 print("Epoch :", epoch, time.time() - start) print("Train accuracy:", acc/mask.sum()) start = time.time() if epoch >= num_epoch: break
def test_Reshape(): X = ad.Variable(name="X") y = ad.array_reshape_op(X, [-1, 10 * 10 * 10]) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 10, 10, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_AddElewise(): X = ad.Variable(name="X") b3 = init.random_normal((10, ), stddev=0.1, name='b3') y = X + b3 executor = ad.Executor([y], ctx=ctx, enable_lazy=False) X_val = rand.normal(scale=0.1, size=(batch_size, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_MaxPool(): X = ad.Variable(name="X") y = ad.max_pool2d_op(X, kernel_H=2, kernel_W=2, padding=0, stride=2) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 10, 10, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_Conv2d(): X = ad.Variable(name="X") W1 = init.random_normal((32, 1, 5, 5), stddev=0.1, name='W1') y = ad.conv2d_op(X, W1, padding=2, stride=1) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 1, 28, 28)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_AddConst(): X = ad.Variable(name="X") val = 3.3 y = X + val executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_DivConst(): X = ad.Variable(name="X") const = 5.5 y = ad.div_const_op(const, X) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(2, 2)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_Div(): X = ad.Variable(name="X") B = ad.Variable(name="B") y = ad.div_op(X, B) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(2, 2)).astype(np.float32) B_val = rand.normal(scale=0.1, size=(2, 2)).astype(np.float32) res = executor.run(feed_dict={X: X_val, B: B_val}) Check(executor, res, [X, B], [y], [X_val, B_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_Pad(): X = ad.Variable(name="X") paddings = [[1, 1], [1, 1], [2, 1], [1, 3]] y = ad.pad_op(X, paddings, constant_values=0) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(1, 1, 1, 1)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_Concat(): A = ad.Variable(name="A") B = ad.Variable(name="B") y = ad.concat_op(A, B, axis=1) executor = ad.Executor([y], ctx=ctx) A_val = rand.normal(scale=0.1, size=(2, 3)).astype(np.float32) B_val = rand.normal(scale=0.1, size=(2, 3)).astype(np.float32) res = executor.run(feed_dict={A: A_val, B: B_val}) Check(executor, res, [A, B], [y], [A_val, B_val]) print(sys._getframe().f_code.co_name, 'pass!')
def train_hetu(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = int(os.environ["WORKER_ID"]) nrank = int(os.environ["DMLC_NUM_WORKER"]) hosts, ports = load_ip_config(args.ip_config) ctx = ndarray.gpu(rank) distributed.grpc_init(hosts=hosts, ports=ports, rank=rank, nrank=nrank) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") gcn1 = GCN(meta["feature"], hidden_layer_size, activation="relu") gcn2 = GCN(hidden_layer_size, meta["class"]) x = gcn1(x_) y = gcn2(x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(0.1) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx, comm_mode='PS') def transform(graph): mp_val = mp_matrix(graph, ndarray.gpu(rank)) return graph, mp_val with DistributedSubgraphSampler(args.path, 4000, 2, rank=rank, nrank=nrank ,transformer=transform, backend="grpc") as sampler: epoch = 0 nnodes = 0 start = time.time() while True: g_sample, mp_val = sampler.sample() feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(g_sample.x, ctx=ctx), y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=g_sample.num_classes), ctx=ctx) } loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == g_sample.y).sum() distributed.ps_get_worker_communicator().BarrierWorker() nnodes += g_sample.num_nodes if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 print("Epoch :", epoch, time.time() - start) print("Train accuracy:", acc/len(y_predicted)) start = time.time() if epoch >= num_epoch: break
def test_Where(): cond = ad.Variable(name="Cond", dtype=np.bool) A = ad.Variable(name="A") B = ad.Variable(name="B") y = ad.where_op(cond, A, B) executor = ad.Executor([y], ctx=ctx) shape = [2, 2, 3] Cond_val = rand.randint(0, 2, size=shape, dtype=np.bool) A_val = rand.normal(scale=0.1, size=shape).astype(np.float32) B_val = rand.normal(scale=0.1, size=shape).astype(np.float32) res = executor.run(feed_dict={cond: Cond_val, A: A_val, B: B_val}) Check(executor, res, [cond, A, B], [y], [Cond_val, A_val, B_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_Onehot(): X = ad.Variable(name="X") classes = 10 y = ad.one_hot_op(X, classes) executor = ad.Executor([y], ctx=ctx) X_val = rand.randint( 0, 10, 20, ).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def test_BatchNorm(): X = ad.Variable(name="X") bn_scale = init.random_normal((64, ), stddev=0.1, name='bn_scale') bn_bias = init.random_normal((64, ), stddev=0.1, name='bn_bias') y = ad.batch_normalization_op(X, bn_scale, bn_bias) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 64, 28, 28)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X, bn_scale, bn_bias], [y], [X_val, bn_scale.tensor_value, bn_bias.tensor_value]) print(sys._getframe().f_code.co_name, 'pass!')
def eval(): start = time.time() ad.Dropout.DropoutOp.phase = "eval" mp_val = mp_matrix(graph_full, ctx) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(graph_full.x, ctx=ctx), } executor_eval = ad.Executor([y], ctx=ctx) y_predicted, = executor_eval.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph_full.y)[train_split:].sum() print("Test accuracy:", acc/len(y_predicted[train_split:])) ad.Dropout.DropoutOp.phase = "training"
def test_dense(): npw = np.random.random((5, 10)).astype(np.float32) npx = np.random.random((7, 5)).astype(np.float32) cpuctx = ndarray.cpu(0) gpuctx = ndarray.gpu(0) X = ad.Variable(name="x") mid = X + 3 W = ad.Variable(name='w', value=npw, ctx=cpuctx) y = ad.matmul_op(mid, W) opt = optimizer.SGDOptimizer(learning_rate=0.1) train_op = opt.minimize(y) executor = ad.Executor([y, train_op], ctx=gpuctx) pred_y, _ = executor.run(feed_dict={X: npx}, convert_to_numpy_ret_vals=True) nppred_y = np.matmul((npx + 3), npw) np.testing.assert_allclose(pred_y, nppred_y, rtol=1e-6) new_npw = npw - 0.1 * np.matmul((npx+3).T, np.ones(nppred_y.shape).astype(np.float32)) np.testing.assert_allclose(W.tensor_value.asnumpy(), new_npw, rtol=1e-10)
def test_add_lazy(shape1=(1, 4, 1), shape2=(2, 3, 4, 5), ctx=ndarray.gpu(1)): x = np.random.random(shape1).astype(np.float32) z = np.random.random(shape2).astype(np.float32) ath_x = ad.Variable(name='x', value=x) ath_z = ad.Variable(name='z', value=z) ath_y = ad.add_op(ad.broadcast_shape_op(ath_x, shape2), ath_z) executor = ad.Executor([ath_y], ctx=ctx) ath_results = [var.asnumpy() for var in executor.run()] import tensorflow as tf tf_x = tf.convert_to_tensor(x) tf_z = tf.convert_to_tensor(z) tf_y = tf_x + tf_z with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_results = sess.run([tf_y]) np.testing.assert_allclose(ath_results[0], tf_results[0]) print('Passed add op test with shape ', shape1, shape2)
def test_slice(shape1=(7, 11, 13), shape2=(2, 3, 4), begin_pos=(0, 0, 0)): ctx = ndarray.gpu(1) x = np.random.random(shape1).astype(np.float32) ath_x = ad.Variable(name='x', value=x) ath_y = ad.slice_op(ath_x, begin_pos, shape2) ath_grad = ad.gradients(ath_y, [ath_x])[0] executor = ad.Executor([ath_y, ath_grad], ctx=ctx, enable_lazy=False) ath_results = [var.asnumpy() for var in executor.run()] import tensorflow as tf tf_x = tf.convert_to_tensor(x) tf_y = tf.slice(tf_x, begin_pos, shape2) tf_grad = tf.gradients(tf_y, tf_x) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_results = sess.run([tf_y, tf_grad]) np.testing.assert_allclose(ath_results[0], tf_results[0]) np.testing.assert_allclose(ath_results[1], np.reshape(tf_results[1], ath_results[1].shape)) print('Passed slice op test with shape ', shape1, shape2, ' and begin pos ', begin_pos)
def test_broadcast(shape1=(3, 1), shape2=(2, 3, 4)): ctx = ndarray.gpu(1) x = np.random.random(shape1).astype(np.float32) ath_x = ad.Variable(name='x', value=x) ath_y = ad.broadcast_shape_op(ath_x, shape2) ath_grad = ad.gradients(ath_y, [ath_x])[0] executor = ad.Executor([ath_y, ath_grad], ctx=ctx) ath_results = [var.asnumpy() for var in executor.run()] import tensorflow as tf tf_x = tf.convert_to_tensor(x) tf_y = tf.broadcast_to(tf_x, shape2) tf_grad = tf.gradients(tf_y, tf_x) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_results = sess.run([tf_y, tf_grad]) np.testing.assert_allclose(ath_results[0], tf_results[0]) np.testing.assert_allclose(ath_results[1], np.reshape(tf_results[1], ath_results[1].shape)) print('Passed broadcast shape op test with shape ', shape1, shape2)
def test_reduce_sum(shape=(2, 3, 4), axes=[2]): ctx = ndarray.gpu(1) x = np.random.random(shape).astype(np.float32) ath_x = ad.Variable(name='x', value=x) ath_y = ad.reduce_sum_op(ath_x, axes, keepdims=False) ath_grad = ad.gradients(ath_y, [ath_x])[0] executor = ad.Executor([ath_y, ath_grad], ctx=ctx) ath_results = [var.asnumpy() for var in executor.run()] import tensorflow as tf tf_x = tf.convert_to_tensor(x) tf_y = tf.reduce_sum(tf_x, axes) tf_grad = tf.gradients(tf_y, tf_x) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_results = sess.run([tf_y, tf_grad]) np.testing.assert_allclose(ath_results[0], np.reshape(tf_results[0], ath_results[0].shape), rtol=1e-6) np.testing.assert_allclose(ath_results[1], np.reshape(tf_results[1], ath_results[1].shape), rtol=1e-6) print('Passed reduce sum op test with shape and axes ', shape, axes)
def test_transpose(shape=(2, 3, 4, 5), perm=None): ctx = ndarray.gpu(1) x = np.random.random(shape).astype(np.float32) ath_x = ad.Variable(name='x', value=x) ath_y = ad.transpose_op(ath_x, perm) ath_grad = ad.gradients(ath_y, [ath_x])[0] executor = ad.Executor([ath_y, ath_grad], ctx=ctx, enable_lazy=False) ath_results = [var.asnumpy() for var in executor.run()] import tensorflow as tf tf_x = tf.convert_to_tensor(x) tf_y = tf.transpose(tf_x, perm) tf_grad = tf.gradients(tf_y, tf_x) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_results = sess.run([tf_y, tf_grad]) np.testing.assert_allclose(ath_results[0], tf_results[0]) np.testing.assert_allclose(ath_results[1], np.reshape(tf_results[1], ath_results[1].shape)) print('Passed transpose shape op test with shape ', shape, ' and perm ', perm)
def mnist_mlp(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False): print("Build 3-layer MLP model...") W1 = init.random_normal((784, 256), stddev=0.1, name='W1') W2 = init.random_normal((256, 256), stddev=0.1, name='W2') W3 = init.random_normal((256, 10), stddev=0.1, name='W3') b1 = init.random_normal((256, ), stddev=0.1, name='b1') b2 = init.random_normal((256, ), stddev=0.1, name='b2') b3 = init.random_normal((10, ), stddev=0.1, name='b3') X = ad.Variable(name="X") # relu(X W1+b1) z1 = ad.matmul_op(X, W1) + b1 z2 = ad.relu_op(z1) # relu(z3 W2+b2) z3 = ad.matmul_op(z2, W2) + b2 z4 = ad.relu_op(z3) # softmax(z5 W2+b2) y = ad.matmul_op(z4, W3) + b3 executor = ad.Executor([y], ctx=executor_ctx) rand = np.random.RandomState(seed=123) X_val = rand.normal(scale=0.1, size=(batch_size, 784)).astype(np.float32) ath = executor.run(feed_dict={X: X_val}) ax.hetu2onnx.export(executor, [X], [y], 'ath.onnx') # # sess = rt.InferenceSession("ath.onnx") input = sess.get_inputs()[0].name pre = sess.run(None, {input: X_val.astype(np.float32)})[0] np.testing.assert_allclose(pre, ath[0], rtol=1e-2)
def cnn(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False): print("Build CNN model...") W1 = init.random_normal((32, 1, 5, 5), stddev=0.1, name='W1') W2 = init.random_normal((64, 32, 5, 5), stddev=0.1, name='W2') W3 = init.random_normal((7 * 7 * 64, 10), stddev=0.1, name='W3') b3 = init.random_normal((10, ), stddev=0.1, name='b3') X = ad.Variable(name="X") z1 = ad.conv2d_op(X, W1, padding=2, stride=1) z2 = ad.relu_op(z1) z3 = ad.avg_pool2d_op(z2, kernel_H=2, kernel_W=2, padding=0, stride=2) z4 = ad.conv2d_op(z3, W2, padding=2, stride=1) z5 = ad.relu_op(z4) z6 = ad.avg_pool2d_op(z5, kernel_H=2, kernel_W=2, padding=0, stride=2) z6_flat = ad.array_reshape_op(z6, (-1, 7 * 7 * 64)) y = ad.matmul_op(z6_flat, W3) + b3 executor = ad.Executor([y], ctx=executor_ctx) rand = np.random.RandomState(seed=123) X_val = rand.normal(scale=0.1, size=(batch_size, 1, 28, 28)).astype(np.float32) ath = executor.run(feed_dict={X: X_val}) hx.hetu2onnx.export(executor, [X], [y], 'ath.onnx') # # sess = rt.InferenceSession("ath.onnx") input = sess.get_inputs()[0].name pre = sess.run(None, {input: X_val.astype(np.float32)})[0] np.testing.assert_allclose(ath[0].asnumpy(), pre, rtol=1e-2)
def test_batch_matmul(shape1=(7, 4, 6), shape2=(7, 6, 5), transA=False, transB=False): executor_ctx = ndarray.gpu(1) if transA: shape1 = tuple(list(shape1)[:-2] + [shape1[-1], shape1[-2]]) if transB: shape2 = tuple(list(shape2)[:-2] + [shape2[-1], shape2[-2]]) data = np.random.normal(0.0, 0.2, shape1).astype(np.float32) weights = np.random.normal(0.0, 0.1, shape2).astype(np.float32) ath_data = ad.Variable(name='data') ath_weights = ad.Variable(name='weights') ath_output = ad.batch_matmul_op(ath_data, ath_weights, trans_A=transA, trans_B=transB) ath_grads = ad.gradients(ath_output, [ath_data, ath_weights]) executor = ad.Executor( [ath_output] + ath_grads, ctx=executor_ctx ) ath_results = executor.run(feed_dict={ath_data: data, ath_weights: weights}) ath_results = [res.asnumpy() for res in ath_results] import tensorflow as tf tf_data = tf.placeholder(name='data', dtype=tf.float32) tf_weights = tf.placeholder(name='weights', dtype=tf.float32) tf_output = tf.matmul(tf_data, tf_weights, transpose_a=transA, transpose_b=transB) tf_grads = tf.gradients(tf_output, [tf_data, tf_weights]) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_results = sess.run([tf_output] + tf_grads, feed_dict={tf_data: data, tf_weights: weights}) np.testing.assert_allclose(ath_results[0], tf_results[0], atol=1e-6) np.testing.assert_allclose(ath_results[1], tf_results[1], atol=1e-6) np.testing.assert_allclose(ath_results[2], tf_results[2], atol=1e-6) print('Pass batch matmul op test with shape ', shape1, shape2)