def dfm_criteo(dense_input, sparse_input, y_): feature_dimension = 33762577 embedding_size = 128 learning_rate = 0.01 # FM Embedding1 = init.random_normal([feature_dimension, 1], stddev=0.01, name="fst_order_embedding", ctx=ndarray.cpu(0)) FM_W = init.random_normal([13, 1], stddev=0.01, name="dense_parameter") sparse_1dim_input = ad.embedding_lookup_op(Embedding1, sparse_input, ctx=ndarray.cpu(0)) fm_dense_part = ad.matmul_op(dense_input, FM_W) fm_sparse_part = ad.reduce_sum_op(sparse_1dim_input, axes=1) """ fst order output""" y1 = fm_dense_part + fm_sparse_part Embedding2 = init.random_normal([feature_dimension, embedding_size], stddev=0.01, name="snd_order_embedding", ctx=ndarray.cpu(0)) sparse_2dim_input = ad.embedding_lookup_op(Embedding2, sparse_input, ctx=ndarray.cpu(0)) sparse_2dim_sum = ad.reduce_sum_op(sparse_2dim_input, axes=1) sparse_2dim_sum_square = ad.mul_op(sparse_2dim_sum, sparse_2dim_sum) sparse_2dim_square = ad.mul_op(sparse_2dim_input, sparse_2dim_input) sparse_2dim_square_sum = ad.reduce_sum_op(sparse_2dim_square, axes=1) sparse_2dim = sparse_2dim_sum_square + -1 * sparse_2dim_square_sum sparse_2dim_half = sparse_2dim * 0.5 """snd order output""" y2 = ad.reduce_sum_op(sparse_2dim_half, axes=1, keepdims=True) #DNN flatten = ad.array_reshape_op(sparse_2dim_input, (-1, 26 * embedding_size)) W1 = init.random_normal([26 * embedding_size, 256], stddev=0.01, name="W1") W2 = init.random_normal([256, 256], stddev=0.01, name="W2") W3 = init.random_normal([256, 1], stddev=0.01, name="W3") fc1 = ad.matmul_op(flatten, W1) relu1 = ad.relu_op(fc1) fc2 = ad.matmul_op(relu1, W2) relu2 = ad.relu_op(fc2) y3 = ad.matmul_op(relu2, W3) y4 = y1 + y2 y = y4 + y3 y = ad.sigmoid_op(y) loss = ad.binarycrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(learning_rate=learning_rate) train_op = opt.minimize(loss) return loss, y, y_, train_op
def neural_mf(user_input, item_input, y_, num_users, num_items): batch_size = 256 embed_dim = 8 layers = [64, 32, 16, 8] learning_rate = 0.01 User_Embedding = init.random_normal( (num_users, embed_dim + layers[0] // 2), stddev=0.01, name="user_embed", ctx=ndarray.cpu(0)) Item_Embedding = init.random_normal( (num_items, embed_dim + layers[0] // 2), stddev=0.01, name="item_embed", ctx=ndarray.cpu(0)) # MLP_User_Embedding = init.random_normal((num_users, layers[0] // 2), stddev=0.01, name="mlp_user_embed", ctx=ndarray.cpu(0)) # MLP_Item_Embedding = init.random_normal((num_items, layers[0] // 2), stddev=0.01, name="mlp_item_embed", ctx=ndarray.cpu(0)) user_latent = ad.embedding_lookup_op(User_Embedding, user_input, ctx=ndarray.cpu(0)) item_latent = ad.embedding_lookup_op(Item_Embedding, item_input, ctx=ndarray.cpu(0)) mf_user_latent = ad.slice_op(user_latent, (0, 0), (-1, embed_dim)) mlp_user_latent = ad.slice_op(user_latent, (0, embed_dim), (-1, -1)) mf_item_latent = ad.slice_op(item_latent, (0, 0), (-1, embed_dim)) mlp_item_latent = ad.slice_op(item_latent, (0, embed_dim), (-1, -1)) # mf_user_latent = ad.embedding_lookup_op(MF_User_Embedding, user_input, ctx=ndarray.cpu(0)) # mf_item_latent = ad.embedding_lookup_op(MF_Item_Embedding, item_input, ctx=ndarray.cpu(0)) # mlp_user_latent = ad.embedding_lookup_op(MLP_User_Embedding, user_input, ctx=ndarray.cpu(0)) # mlp_item_latent = ad.embedding_lookup_op(MLP_Item_Embedding, item_input, ctx=ndarray.cpu(0)) W1 = init.random_normal((layers[0], layers[1]), stddev=0.1, name='W1') W2 = init.random_normal((layers[1], layers[2]), stddev=0.1, name='W2') W3 = init.random_normal((layers[2], layers[3]), stddev=0.1, name='W3') W4 = init.random_normal((embed_dim + layers[3], 1), stddev=0.1, name='W4') mf_vector = ad.mul_op(mf_user_latent, mf_item_latent) mlp_vector = ad.concat_op(mlp_user_latent, mlp_item_latent, axis=1) fc1 = ad.matmul_op(mlp_vector, W1) relu1 = ad.relu_op(fc1) fc2 = ad.matmul_op(relu1, W2) relu2 = ad.relu_op(fc2) fc3 = ad.matmul_op(relu2, W3) relu3 = ad.relu_op(fc3) concat_vector = ad.concat_op(mf_vector, relu3, axis=1) y = ad.matmul_op(concat_vector, W4) y = ad.sigmoid_op(y) loss = ad.binarycrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(learning_rate=learning_rate) # opt = optimizer.AdamOptimizer(learning_rate=learning_rate) train_op = opt.minimize(loss) return loss, y, train_op
def wdl_criteo(dense, sparse, labels): batch_size = 128 feature_dimension = 33762577 embedding_size = 128 learning_rate = 0.01 if isinstance(dense, tuple): dense_input = dl.dataloader_op([[dense[0], batch_size, 'train'], [dense[1], batch_size, 'validate']]) sparse_input = dl.dataloader_op([[sparse[0], batch_size, 'train'], [sparse[1], batch_size, 'validate']]) y_ = dl.dataloader_op([[labels[0], batch_size, 'train'], [labels[1], batch_size, 'validate']]) else: dense_input = dl.dataloader_op([[dense, batch_size, 'train']]) sparse_input = dl.dataloader_op([[sparse, batch_size, 'train']]) y_ = dl.dataloader_op([[labels, batch_size, 'train']]) print("Data loaded.") Embedding = init.random_normal([feature_dimension, embedding_size], stddev=0.01, name="snd_order_embedding", ctx=ndarray.cpu(0)) sparse_input = ad.embedding_lookup_op(Embedding, sparse_input, ctx=ndarray.cpu(0)) sparse_input = ad.array_reshape_op(sparse_input, (-1, 26 * embedding_size)) #DNN flatten = dense_input W1 = init.random_normal([13, 256], stddev=0.01, name="W1") W2 = init.random_normal([256, 256], stddev=0.01, name="W2") W3 = init.random_normal([256, 256], stddev=0.01, name="W3") W4 = init.random_normal([256 + 26 * embedding_size, 1], stddev=0.01, name="W4") fc1 = ad.matmul_op(flatten, W1) relu1 = ad.relu_op(fc1) fc2 = ad.matmul_op(relu1, W2) relu2 = ad.relu_op(fc2) y3 = ad.matmul_op(relu2, W3) y4 = ad.concat_op(sparse_input, y3, axis=1) y = ad.matmul_op(y4, W4) y = ad.sigmoid_op(y) loss = ad.binarycrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(learning_rate=learning_rate) train_op = opt.minimize(loss) return loss, y, y_, train_op
def wdl_adult(X_deep, X_wide, y_): lr = 5 / 128 dim_wide = 809 dim_deep = 68 W = init.random_normal([dim_wide+20, 2], stddev=0.1, name="W") W1 = init.random_normal([dim_deep, 50], stddev=0.1, name="W1") b1 = init.random_normal([50], stddev=0.1, name="b1") W2 = init.random_normal([50, 20], stddev=0.1, name="W2") b2 = init.random_normal([20], stddev=0.1, name="b2") #deep Embedding = [] X_deep_input = None for i in range(8): Embedding_name = "Embedding_deep_" + str(i) Embedding.append(init.random_normal([50, 8], stddev=0.1, name=Embedding_name)) now = ad.embedding_lookup_op(Embedding[i], X_deep[i]) now = ad.array_reshape_op(now, (-1, 8)) if X_deep_input is None: X_deep_input = now else: X_deep_input = ad.concat_op(X_deep_input, now, 1) for i in range(4): now = ad.array_reshape_op(X_deep[i + 8], (-1, 1)) X_deep_input = ad.concat_op(X_deep_input, now, 1) mat1 = ad.matmul_op(X_deep_input, W1) add1 = mat1 + ad.broadcastto_op(b1, mat1) relu1= ad.relu_op(add1) dropout1 = relu1 #ad.dropout_op(relu1, 0.5) mat2 = ad.matmul_op(dropout1, W2) add2 = mat2 + ad.broadcastto_op(b2, mat2) relu2= ad.relu_op(add2) dropout2 = relu2 #ad.dropout_op(relu2, 0.5) dmodel=dropout2 # wide wmodel = ad.concat_op(X_wide, dmodel, 1) wmodel = ad.matmul_op(wmodel, W) prediction = wmodel loss = ad.softmaxcrossentropy_op(prediction, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(learning_rate=lr) train_op = opt.minimize(loss) return loss, prediction, y_, train_op
def fc(x, shape, name, with_relu=True): weight = init.random_normal(shape=shape, stddev=0.1, name=name+'_weight') bias = init.random_normal(shape=shape[-1:], stddev=0.1, name=name+'_bias') x = ad.matmul_op(x, weight) x = x + ad.broadcastto_op(bias, x) if with_relu: x = ad.relu_op(x) return x
def conv_pool(x, in_channel, out_channel, name): weight = init.random_normal(shape=(out_channel, in_channel, 5, 5), stddev=0.1, name=name + '_weight') x = ad.conv2d_op(x, weight, padding=2, stride=1) x = ad.relu_op(x) x = ad.max_pool2d_op(x, kernel_H=2, kernel_W=2, padding=0, stride=2) return x
def version_1(cls, node, tensor_dict, **kwargs): inputs = [ tensor_dict.get(inp, None) for inp in node.input_tensor_names ] assert len(inputs) == 1 y = ad.relu_op(inputs[0]) tensor_dict[node.output_tensor_names[0]] = y return y
def train_hetu(num_epoch): ctx = ndarray.gpu(0) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") if use_same_init: gcn1 = GCN(num_features, hidden_layer_size, custom_init=(init_w1, init_b1)) gcn2 = GCN(hidden_layer_size, num_classes, custom_init=(init_w2, init_b2)) else: gcn1 = GCN(num_features, hidden_layer_size) gcn2 = GCN(hidden_layer_size, num_classes) mp_val = mp_matrix(graph, ctx, use_original_gcn_norm=True) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(graph.x, ctx=ctx), y_ : ndarray.array(convert_to_one_hot(graph.y, max_val=num_classes), ctx=ctx) } x = gcn1(x_) x = ad.relu_op(x) y = gcn2(x) loss = ad.softmaxcrossentropy_op(y, y_) opt = optimizer.AdamOptimizer(0.01) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx) start_time = time.time() losses = [] for i in range(num_epoch): loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph.y).sum() losses.append(loss_val.asnumpy().mean()) if i==0: start_time= time.time() print("Train loss :", loss_val.asnumpy().mean()) print("Train accuracy:", acc/len(y_predicted)) print("Hetu time:",i, time.time()-start_time) print("Hetu time:", time.time()-start_time) mp_val = mp_matrix(graph_full, ctx) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(graph_full.x, ctx=ctx), } executor_eval = ad.Executor([y], ctx=ctx) y_predicted, = executor_eval.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph_full.y)[train_split:].sum() print("Test accuracy:", acc/len(y_predicted[train_split:])) return losses
def rnn(x, y_): ''' RNN model, for MNIST dataset. Parameters: x: Variable(hetu.gpu_ops.Node.Node), shape (N, dims) y_: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes) Return: loss: Variable(hetu.gpu_ops.Node.Node), shape (1,) y: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes) ''' print("Building RNN model...") diminput = 28 dimhidden = 128 dimoutput = 10 nsteps = 28 weight1 = init.random_normal(shape=(diminput, dimhidden), stddev=0.1, name='rnn_weight1') bias1 = init.random_normal(shape=(dimhidden, ), stddev=0.1, name='rnn_bias1') weight2 = init.random_normal(shape=(dimhidden + dimhidden, dimhidden), stddev=0.1, name='rnn_weight2') bias2 = init.random_normal(shape=(dimhidden, ), stddev=0.1, name='rnn_bias2') weight3 = init.random_normal(shape=(dimhidden, dimoutput), stddev=0.1, name='rnn_weight3') bias3 = init.random_normal(shape=(dimoutput, ), stddev=0.1, name='rnn_bias3') last_state = ad.Variable(value=np.zeros((1, )).astype(np.float32), name='initial_state', trainable=False) for i in range(nsteps): cur_x = ad.slice_op(x, (0, i * diminput), (-1, diminput)) h = ad.matmul_op(cur_x, weight1) h = h + ad.broadcastto_op(bias1, h) if i == 0: last_state = ad.broadcastto_op(last_state, h) s = ad.concat_op(h, last_state, axis=1) s = ad.matmul_op(s, weight2) s = s + ad.broadcastto_op(bias2, s) last_state = ad.relu_op(s) final_state = last_state x = ad.matmul_op(final_state, weight3) y = x + ad.broadcastto_op(bias3, x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) return loss, y
def dcn_criteo(dense_input, sparse_input, y_): feature_dimension = 33762577 embedding_size = 128 learning_rate = 0.003 Embedding = init.random_normal([feature_dimension, embedding_size], stddev=0.01, name="snd_order_embedding", ctx=ndarray.cpu(0)) sparse_input = ad.embedding_lookup_op(Embedding, sparse_input, ctx=ndarray.cpu(0)) sparse_input = ad.array_reshape_op(sparse_input, (-1, 26 * embedding_size)) x = ad.concat_op(sparse_input, dense_input, axis=1) # Cross Network cross_output = build_cross_layer(x, num_layers=3) #DNN flatten = x W1 = init.random_normal([26 * embedding_size + 13, 256], stddev=0.01, name="W1") W2 = init.random_normal([256, 256], stddev=0.01, name="W2") W3 = init.random_normal([256, 256], stddev=0.01, name="W3") W4 = init.random_normal([256 + 26 * embedding_size + 13, 1], stddev=0.01, name="W4") fc1 = ad.matmul_op(flatten, W1) relu1 = ad.relu_op(fc1) fc2 = ad.matmul_op(relu1, W2) relu2 = ad.relu_op(fc2) y3 = ad.matmul_op(relu2, W3) y4 = ad.concat_op(cross_output, y3, axis=1) y = ad.matmul_op(y4, W4) y = ad.sigmoid_op(y) loss = ad.binarycrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(learning_rate=learning_rate) train_op = opt.minimize(loss) return loss, y, y_, train_op
def test_Relu(): X = ad.Variable(name="X") y = ad.relu_op(X) executor = ad.Executor([y], ctx=ctx) X_val = rand.normal(scale=0.1, size=(batch_size, 10, 10, 10)).astype(np.float32) res = executor.run(feed_dict={X: X_val}) Check(executor, res, [X], [y], [X_val]) print(sys._getframe().f_code.co_name, 'pass!')
def batch_norm_with_relu(x, hidden, name): scale = init.random_normal(shape=(1, hidden, 1, 1), stddev=0.1, name=name + '_scale') bias = init.random_normal(shape=(1, hidden, 1, 1), stddev=0.1, name=name + '_bias') x = ad.batch_normalization_op(x, scale, bias) x = ad.relu_op(x) return x
def mnist_mlp(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False): print("Build 3-layer MLP model...") W1 = init.random_normal((784, 256), stddev=0.1, name='W1') W2 = init.random_normal((256, 256), stddev=0.1, name='W2') W3 = init.random_normal((256, 10), stddev=0.1, name='W3') b1 = init.random_normal((256, ), stddev=0.1, name='b1') b2 = init.random_normal((256, ), stddev=0.1, name='b2') b3 = init.random_normal((10, ), stddev=0.1, name='b3') X = ad.Variable(name="X") # relu(X W1+b1) z1 = ad.matmul_op(X, W1) + b1 z2 = ad.relu_op(z1) # relu(z3 W2+b2) z3 = ad.matmul_op(z2, W2) + b2 z4 = ad.relu_op(z3) # softmax(z5 W2+b2) y = ad.matmul_op(z4, W3) + b3 executor = ad.Executor([y], ctx=executor_ctx) rand = np.random.RandomState(seed=123) X_val = rand.normal(scale=0.1, size=(batch_size, 784)).astype(np.float32) ath = executor.run(feed_dict={X: X_val}) ax.hetu2onnx.export(executor, [X], [y], 'ath.onnx') # # sess = rt.InferenceSession("ath.onnx") input = sess.get_inputs()[0].name pre = sess.run(None, {input: X_val.astype(np.float32)})[0] np.testing.assert_allclose(pre, ath[0], rtol=1e-2)
def cnn(executor_ctx=None, num_epochs=10, print_loss_val_each_epoch=False): print("Build CNN model...") W1 = init.random_normal((32, 1, 5, 5), stddev=0.1, name='W1') W2 = init.random_normal((64, 32, 5, 5), stddev=0.1, name='W2') W3 = init.random_normal((7 * 7 * 64, 10), stddev=0.1, name='W3') b3 = init.random_normal((10, ), stddev=0.1, name='b3') X = ad.Variable(name="X") z1 = ad.conv2d_op(X, W1, padding=2, stride=1) z2 = ad.relu_op(z1) z3 = ad.avg_pool2d_op(z2, kernel_H=2, kernel_W=2, padding=0, stride=2) z4 = ad.conv2d_op(z3, W2, padding=2, stride=1) z5 = ad.relu_op(z4) z6 = ad.avg_pool2d_op(z5, kernel_H=2, kernel_W=2, padding=0, stride=2) z6_flat = ad.array_reshape_op(z6, (-1, 7 * 7 * 64)) y = ad.matmul_op(z6_flat, W3) + b3 executor = ad.Executor([y], ctx=executor_ctx) rand = np.random.RandomState(seed=123) X_val = rand.normal(scale=0.1, size=(batch_size, 1, 28, 28)).astype(np.float32) ath = executor.run(feed_dict={X: X_val}) hx.hetu2onnx.export(executor, [X], [y], 'ath.onnx') # # sess = rt.InferenceSession("ath.onnx") input = sess.get_inputs()[0].name pre = sess.run(None, {input: X_val.astype(np.float32)})[0] np.testing.assert_allclose(ath[0].asnumpy(), pre, rtol=1e-2)
def residual_layer(x0, input_dim, hidden_dim): embedding_len = input_dim weight_1 = init.random_normal(shape=(input_dim, hidden_dim), stddev=0.1, name='weight_1') bias_1 = init.random_normal(shape=(hidden_dim, ), stddev=0.1, name='bias_1') weight_2 = init.random_normal(shape=(hidden_dim, input_dim), stddev=0.1, name='weight_2') bias_2 = init.random_normal(shape=(input_dim, ), stddev=0.1, name='bias_2') x0w = ad.matmul_op(x0, weight_1) #(batch, hidden_dim) x0w_b = x0w + ad.broadcastto_op(bias_1, x0w) relu1 = ad.relu_op(x0w_b) x1w = ad.matmul_op(relu1, weight_2) #(batch, input_dim) x1w_b = x1w + ad.broadcastto_op(bias_2, x1w) residual = x1w_b + x0 y = ad.relu_op(residual) return y
def __call__(self, x): """ Build the computation graph, return the output node """ if self.dropout > 0: x = ad.dropout_op(x, 1 - self.dropout) x = ad.matmul_op(x, self.weight) msg = x + ad.broadcastto_op(self.bias, x) x = ad.CuSparse.csrmm_op(self.mp, msg) if self.activation == "relu": x = ad.relu_op(x) elif self.activation is not None: raise NotImplementedError return x
def conv_bn_relu(x, in_channel, out_channel, name): weight = init.random_normal(shape=(out_channel, in_channel, 3, 3), stddev=0.1, name=name + '_weight') bn_scale = init.random_normal(shape=(1, out_channel, 1, 1), stddev=0.1, name=name + '_bn_scale') bn_bias = init.random_normal(shape=(1, out_channel, 1, 1), stddev=0.1, name=name + '_bn_bias') conv = ad.conv2d_op(x, weight, padding=1, stride=1) bn = ad.batch_normalization_op(conv, bn_scale, bn_bias) act = ad.relu_op(bn) return act
def conv_bn_relu_pool(x, in_channel, out_channel, name, with_relu=True, with_pool=False): weight = init.random_normal(shape=(out_channel, in_channel, 3, 3), stddev=0.1, name=name + '_weight') bn_scale = init.random_normal(shape=(1, out_channel, 1, 1), stddev=0.1, name=name + '_bn_scale') bn_bias = init.random_normal(shape=(1, out_channel, 1, 1), stddev=0.1, name=name + '_bn_bias') x = ad.conv2d_op(x, weight, stride=1, padding=1) x = ad.batch_normalization_op(x, bn_scale, bn_bias) if with_relu: x = ad.relu_op(x) if with_pool: x = ad.max_pool2d_op(x, kernel_H=2, kernel_W=2, stride=2, padding=0) return x
def conv_relu_avg(x, shape): weight = init.random_normal(shape=shape, stddev=0.1) x = ad.conv2d_op(x, weight, padding=2, stride=1) x = ad.relu_op(x) x = ad.avg_pool2d_op(x, kernel_H=2, kernel_W=2, padding=0, stride=2) return x
def train_hetu(num_epoch): ctx = ndarray.gpu(0) feed_dict = {} nparts = 4 graph.add_self_loop() norm = graph.gcn_norm(True) graphs, edge_list, reindexed_edges = graph.part_graph(nparts) x_val = np.concatenate(list(map(lambda g: g.x, graphs))) y_concat = np.concatenate(list(map(lambda g: g.y, graphs))) y_val = convert_to_one_hot( y_concat, max_val=graph.num_classes) # shape=(n, num_classes) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") feed_dict[x_] = ndarray.array(x_val, ctx=ctx) feed_dict[y_] = ndarray.array(y_val, ctx=ctx) gcn1 = PCGCN(graph.num_features, 16, npart=nparts) gcn2 = PCGCN(16, graph.num_classes, npart=nparts) mp_val = [[None for j in range(nparts)] for i in range(nparts)] use_sparse = [True for g in graphs] for i in range(nparts): for j in range(nparts): if i == j: edges = graphs[i].edge_index else: edges = pick_edges(reindexed_edges, edge_list[i][j]) if i == j and use_sparse[i] == False: mp_val[i][j] = sparse.csr_matrix( (norm[edge_list[i][j]], (edges[1], edges[0])), shape=(graphs[j].num_nodes, graphs[i].num_nodes)).toarray() else: mp_val[i][j] = ndarray.sparse_array( values=norm[edge_list[i][j]], indices=(edges[1], edges[0]), shape=(graphs[j].num_nodes, graphs[i].num_nodes), ctx=ctx) feed_dict[gcn1.mp[i][j]] = mp_val[i][j] feed_dict[gcn2.mp[i][j]] = mp_val[i][j] subgraph_size = list(map(lambda g: g.num_nodes, graphs)) x = gcn1(x_, subgraph_size=subgraph_size, use_sparse=use_sparse) x = ad.relu_op(x) y = gcn2(x, subgraph_size=subgraph_size, use_sparse=use_sparse) # y_train = ad.slice_op(y, (0, 0), (train_split, graph.num_classes)) # loss = ad.softmaxcrossentropy_op(y_train, y_) loss = ad.softmaxcrossentropy_op(y, y_) opt = optimizer.AdamOptimizer(0.01) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx) losses = [] for i in range(num_epoch): loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == y_concat).sum() losses.append(loss_val.asnumpy()[0]) if i == 0: start_time = time.time() print("Train loss :", loss_val.asnumpy().mean()) print("Val accuracy:", acc / len(y_predicted)) print("Hetu time:", (time.time() - start_time) / 199) return losses
def wdl_adult(whatever): batch_size = 128 lr=5 dim_wide = 809 lr_ = lr / batch_size dim_deep = 68 from .load_data import load_adult_data x_train_deep, x_train_wide, y_train, x_test_deep, x_test_wide, y_test = load_adult_data() W = init.random_normal([dim_wide+20, 2], stddev=0.1, name="W") W1 = init.random_normal([dim_deep, 50], stddev=0.1, name="W1") b1 = init.random_normal([50], stddev=0.1, name="b1") W2 = init.random_normal([50, 20], stddev=0.1, name="W2") b2 = init.random_normal([20], stddev=0.1, name="b2") X_wide = dl.dataloader_op([ [x_train_wide, batch_size, 'train'], [x_test_wide, batch_size, 'validate'], ]) y_ = dl.dataloader_op([ [y_train, batch_size, 'train'], [y_test, batch_size, 'validate'], ]) #deep Embedding = [] X_deep = [] X_deep_input = None for i in range(8): X_deep_name = "x_deep_" + str(i) Embedding_name = "Embedding_deep_" + str(i) X_deep.append(dl.dataloader_op([ [x_train_deep[:,i], batch_size, 'train'], [x_test_deep[:,i], batch_size, 'validate'], ])) Embedding.append(init.random_normal([50, 8], stddev=0.1, name=Embedding_name)) now = ad.embedding_lookup_op(Embedding[i], X_deep[i]) now = ad.array_reshape_op(now, (-1, 8)) if X_deep_input is None: X_deep_input = now else: X_deep_input = ad.concat_op(X_deep_input, now, 1) for i in range(4): X_deep_name = "x_deep_" + str(8+i) X_deep.append(dl.dataloader_op([ [x_train_deep[:,8+i], batch_size, 'train'], [x_test_deep[:,8+i], batch_size, 'validate'], ])) now = ad.array_reshape_op(X_deep[i + 8], (batch_size, 1)) X_deep_input = ad.concat_op(X_deep_input, now, 1) mat1 = ad.matmul_op(X_deep_input, W1) add1 = mat1 + ad.broadcastto_op(b1, mat1) relu1= ad.relu_op(add1) dropout1 = relu1 #ad.dropout_op(relu1, 0.5) mat2 = ad.matmul_op(dropout1, W2) add2 = mat2 + ad.broadcastto_op(b2, mat2) relu2= ad.relu_op(add2) dropout2 = relu2 #ad.dropout_op(relu2, 0.5) dmodel=dropout2 # wide wmodel = ad.concat_op(X_wide, dmodel, 1) wmodel = ad.matmul_op(wmodel, W) prediction = wmodel loss = ad.softmaxcrossentropy_op(prediction, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(learning_rate=lr_) train_op = opt.minimize(loss) return loss, prediction, y_, train_op
def test(args): comm, device_id = ad.mpi_nccl_init() rank = comm.localRank.value size = comm.nRanks.value dataset_info = { 'Reddit': [232965, 602, 41], 'Proteins': [132534, 602, 8], 'Arch': [1644228, 602, 10], 'Products': [2449029, 100, 47] } node_count, num_features, num_classes = dataset_info[args.dataset] hidden_layer_size = 128 if num_features < 128: hidden_layer_size = 64 replication = args.replication node_Count_Self = row_num(node_count, rank // replication, size // replication) node_Count_All = node_count _, _, row_groups, col_groups = get_proc_groups(size, replication) executor_ctx = ndarray.gpu(device_id) if size > 1: adj_part, data_part, row_part, col_part, input_part, label_part = load_data( args, size, replication, rank) else: adj_part, data_part, row_part, col_part, input_part, label_part = load_data_whole( args) adj_matrix = ndarray.sparse_array(data_part, (row_part, col_part), shape=adj_part.shape, ctx=executor_ctx) # train:val:test=6:2:2 # Our optimization on distributed GNN algorithm does NOT affect the correctness! # Here due to the limitation of current slice_op, data is split continuously. # Continuous split is unfriendly for reordered graph data where nodes are already clustered. # Specifically, training on some node clusters and testing on other clusters may cause poor test accuracy. # The better way is to split data randomly! train_split, test_split = 0.6, 0.8 train_node = int(train_split * node_Count_Self) test_node = int(test_split * node_Count_Self) A = ad.Variable(name="A", trainable=False) H = ad.Variable(name="H") np.random.seed(123) bounds = np.sqrt(6.0 / (num_features + hidden_layer_size)) W1_val = np.random.uniform(low=-bounds, high=bounds, size=[num_features, hidden_layer_size]).astype(np.float32) W1 = ad.Variable(name="W1", value=W1_val) bounds = np.sqrt(6.0 / (num_classes + hidden_layer_size)) np.random.seed(123) W2_val = np.random.uniform(low=-bounds, high=bounds, size=[hidden_layer_size, num_classes]).astype(np.float32) W2 = ad.Variable(name="W2", value=W2_val) y_ = ad.Variable(name="y_") z = ad.distgcn_15d_op(A, H, W1, node_Count_Self, node_Count_All, size, replication, device_id, comm, [row_groups, col_groups], True) H1 = ad.relu_op(z) y = ad.distgcn_15d_op(A, H1, W2, node_Count_Self, node_Count_All, size, replication, device_id, comm, [row_groups, col_groups], True) y_train = ad.slice_op(y, (0, 0), (train_node, num_classes)) label_train = ad.slice_op(y_, (0, 0), (train_node, num_classes)) y_test = ad.slice_op(y, (test_node, 0), (node_Count_Self - test_node, num_classes)) label_test = ad.slice_op(y_, (test_node, 0), (node_Count_Self - test_node, num_classes)) loss = ad.softmaxcrossentropy_op(y_train, label_train) loss_test = ad.softmaxcrossentropy_op(y_test, label_test) opt = optimizer.AdamOptimizer() train_op = opt.minimize(loss) executor = ad.Executor([loss, y, loss_test, train_op], ctx=executor_ctx) feed_dict = { A: adj_matrix, H: ndarray.array(input_part, ctx=executor_ctx), y_: ndarray.array(convert_to_one_hot(label_part, max_val=num_classes), ctx=executor_ctx), } epoch_num = 100 epoch_all, epoch_0 = 0, 0 for i in range(epoch_num): epoch_start_time = time.time() results = executor.run(feed_dict=feed_dict) loss = results[0].asnumpy().sum() y_out = results[1] loss_test = results[2].asnumpy().sum() epoch_end_time = time.time() epoch_time = epoch_end_time - epoch_start_time epoch_all += epoch_time if i == 0: epoch_0 = epoch_time print("[Epoch: %d, Rank: %d] Epoch time: %.3f, Total time: %.3f" % (i, rank, epoch_time, epoch_all)) y_out_train, y_predict = y_out.asnumpy().argmax( axis=1)[:train_node], y_out.asnumpy().argmax(axis=1)[test_node:] label_train, label_test = label_part[:train_node], label_part[ test_node:] train_acc = ndarray.array(np.array([(y_out_train == label_train).sum() ]), ctx=executor_ctx) test_acc = ndarray.array(np.array([(y_predict == label_test).sum()]), ctx=executor_ctx) train_loss = ndarray.array(np.array([loss]), ctx=executor_ctx) test_loss = ndarray.array(np.array([loss_test]), ctx=executor_ctx) if replication > 1: col_groups[rank % replication].dlarrayNcclAllReduce( test_acc, test_acc, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) col_groups[rank % replication].dlarrayNcclAllReduce( test_loss, test_loss, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) col_groups[rank % replication].dlarrayNcclAllReduce( train_acc, train_acc, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) col_groups[rank % replication].dlarrayNcclAllReduce( train_loss, train_loss, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) else: comm.dlarrayNcclAllReduce(test_acc, test_acc, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) comm.dlarrayNcclAllReduce(test_loss, test_loss, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) comm.dlarrayNcclAllReduce(train_acc, train_acc, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) comm.dlarrayNcclAllReduce(train_loss, train_loss, ncclDataType_t.ncclFloat32, ncclRedOp_t.ncclSum) test_acc = float( test_acc.asnumpy()[0]) / (node_count - test_split * node_count) test_loss = test_loss.asnumpy()[0] / (node_count - test_split * node_count) train_acc = float(train_acc.asnumpy()[0]) / (train_split * node_count) train_loss = train_loss.asnumpy()[0] / (train_split * node_count) if rank == 0: print("[Epoch: %d] Train Loss: %.3f, Train Accuracy: %.3f, Test Loss: %.3f, Test Accuracy: %.3f"\ %(i,train_loss, train_acc, test_loss, test_acc)) avg_epoch_time = (epoch_all - epoch_0) / (epoch_num - 1) results = ndarray.array(np.array([epoch_all, avg_epoch_time]), ctx=executor_ctx) comm.dlarrayNcclAllReduce(results, results, ncclDataType_t.ncclFloat32, reduceop=ncclRedOp_t.ncclSum) results = results.asnumpy() / size if rank == 0: print("\nAverage Total Time: %.3f, Average Epoch Time: %.3f" % (results[0], results[1]))