def __init__(self, in_features, out_features, activation=None, dropout=0, name="GCN", custom_init=None, mp_val=None): self.weight = initializers.xavier_uniform(shape=(in_features,out_features), name=name+"_Weight") self.bias = initializers.zeros(shape=(out_features,), name=name+"_Bias") self.weight2 = initializers.xavier_uniform(shape=(in_features,out_features), name=name+"_Weight") #self.mp is a sparse matrix and should appear in feed_dict later self.mp = ad.Variable("message_passing", trainable=False, value=mp_val) self.activation = activation self.dropout = dropout
def logreg(x, y_): ''' Logistic Regression model, for MNIST dataset. Parameters: x: Variable(hetu.gpu_ops.Node.Node), shape (N, dims) y_: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes) Return: loss: Variable(hetu.gpu_ops.Node.Node), shape (1,) y: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes) ''' print("Build logistic regression model...") weight = init.zeros((784, 10), name='logreg_weight') bias = init.zeros((10, ), name='logreg_bias') x = ad.matmul_op(x, weight) y = x + ad.broadcastto_op(bias, x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) return loss, y
def get_token_embeddings(vocab_size, num_units, initializer=init.xavier_normal, zero_pad=True): if zero_pad: embedding_part = initializer(name='embedding_table', shape=(vocab_size - 1, num_units)) padding_zero = init.zeros(name='padding_zero', shape=(1, num_units), trainable=False) embeddings = ad.concat_op(padding_zero, embedding_part) else: embeddings = initializer(name='embedding_table', shape=(vocab_size, num_units)) return embeddings
def __init__(self, in_features: int, out_features: int, npart: int, name="GCN", custom_init=None): if custom_init is not None: self.weight = ad.Variable(value=custom_init[0], name=name + "_Weight") self.bias = ad.Variable(value=custom_init[1], name=name + "_Bias") else: self.weight = initializers.xavier_uniform(shape=(in_features, out_features), name=name + "_Weight") self.bias = initializers.zeros(shape=(out_features, ), name=name + "_Bias") self.in_features = in_features self.out_features = out_features self.npart = npart #npart * npart message_passing matrix, either dense or sparse self.mp = [[ ad.Variable("message_passing", trainable=False) for j in range(npart) ] for i in range(npart)]
def train_hetu(num_epoch): ctx = ndarray.gpu(0) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") mask_ = ad.Variable(name="mask_") gcn1 = GraphSage(graph.num_features, hidden_layer_size, activation="relu", dropout=0.1) gcn2 = GraphSage(2*hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1) x = gcn1(x_) x = gcn2(x) W = initializers.xavier_uniform(shape=(2*hidden_layer_size, graph.num_classes)) B = initializers.zeros(shape=(graph.num_classes,)) x = ad.matmul_op(x, W) y = x + ad.broadcastto_op(B, x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.mul_op(loss, mask_) opt = optimizer.AdamOptimizer(0.01) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx) def eval(): start = time.time() ad.Dropout.DropoutOp.phase = "eval" mp_val = mp_matrix(graph_full, ctx) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, x_ : ndarray.array(graph_full.x, ctx=ctx), } executor_eval = ad.Executor([y], ctx=ctx) y_predicted, = executor_eval.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph_full.y)[train_split:].sum() print("Test accuracy:", acc/len(y_predicted[train_split:])) ad.Dropout.DropoutOp.phase = "training" epoch = 0 nnodes = 0 batch_size = 1000 with GraphSageSampler(graph, batch_size, depth=2, num_sample_thread=4) as sampler: start = time.time() while True: g_sample, mask = sampler.sample() mp_val = mp_matrix(g_sample, ctx) #print(time.time() - start) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, mask_ : ndarray.array(mask,ctx=ctx), x_ : ndarray.array(g_sample.x, ctx=ctx), y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=graph.num_classes), ctx=ctx) } loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = ((y_predicted == g_sample.y) * mask).sum() # print(i, "Train loss :", loss_val.asnumpy().mean()) # print(i, "Train accuracy:", acc/len(y_predicted)) nnodes += batch_size if nnodes > graph_full.num_nodes: nnodes = 0 epoch += 1 print("Epoch :", epoch, time.time() - start) print("Train accuracy:", acc/mask.sum()) eval() start = time.time() if epoch >= num_epoch: break
def layer_norm(input_tensor, feature_size, eps=1e-8): scale = init.ones(name='layer_norm_scale', shape=(feature_size, )) bias = init.zeros(name='layer_norm_biad', shape=(feature_size, )) return ad.layer_normalization_op(input_tensor, scale, bias, eps=eps)
def train_hetu(num_epoch): ctx = ndarray.gpu(0) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") gcn1 = GraphSage(graph.num_features, hidden_layer_size, activation="relu", dropout=0.1) gcn2 = GraphSage(2 * hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1) x = gcn1(x_) x = gcn2(x) W = initializers.xavier_uniform(shape=(2 * hidden_layer_size, graph.num_classes)) B = initializers.zeros(shape=(graph.num_classes, )) x = ad.matmul_op(x, W) y = x + ad.broadcastto_op(B, x) loss = ad.softmaxcrossentropy_op(y, y_) opt = optimizer.AdamOptimizer(0.01) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx) def eval(): start = time.time() ad.Dropout.DropoutOp.phase = "eval" mp_val = mp_matrix(graph_full, ctx) feed_dict = { gcn1.mp: mp_val, gcn2.mp: mp_val, x_: ndarray.array(graph_full.x, ctx=ctx), } executor_eval = ad.Executor([y], ctx=ctx) y_predicted, = executor_eval.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == graph_full.y)[train_split:].sum() print("Test accuracy:", acc / len(y_predicted[train_split:])) ad.Dropout.DropoutOp.phase = "training" with RandomWalkSampler(graph, 4000, 2, transformer=transform, num_sample_thread=3) as sampler: for i in range(num_epoch): start = time.time() g_sample, mp_val = sampler.sample() #mp_val = mp_matrix(g_sample, ctx) #print(time.time() - start) feed_dict = { gcn1.mp: mp_val, gcn2.mp: mp_val, x_: ndarray.array(g_sample.x, ctx=ctx), y_: ndarray.array(convert_to_one_hot(g_sample.y, max_val=graph.num_classes), ctx=ctx) } loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == g_sample.y).sum() print(i, "Train loss :", loss_val.asnumpy().mean()) print(i, "Train accuracy:", acc / len(y_predicted)) if (i + 1) % 100 == 0: eval() print(time.time() - start)