Exemple #1
0
    def __init__(self, in_features, out_features, activation=None, dropout=0,
                 name="GCN", custom_init=None, mp_val=None):

        self.weight = initializers.xavier_uniform(shape=(in_features,out_features), name=name+"_Weight")
        self.bias = initializers.zeros(shape=(out_features,), name=name+"_Bias")
        self.weight2 = initializers.xavier_uniform(shape=(in_features,out_features), name=name+"_Weight")
        #self.mp is a sparse matrix and should appear in feed_dict later
        self.mp = ad.Variable("message_passing", trainable=False, value=mp_val)
        self.activation = activation
        self.dropout = dropout
Exemple #2
0
def logreg(x, y_):
    '''
    Logistic Regression model, for MNIST dataset.

    Parameters:
        x: Variable(hetu.gpu_ops.Node.Node), shape (N, dims)
        y_: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes)
    Return:
        loss: Variable(hetu.gpu_ops.Node.Node), shape (1,)
        y: Variable(hetu.gpu_ops.Node.Node), shape (N, num_classes)
    '''

    print("Build logistic regression model...")
    weight = init.zeros((784, 10), name='logreg_weight')
    bias = init.zeros((10, ), name='logreg_bias')
    x = ad.matmul_op(x, weight)
    y = x + ad.broadcastto_op(bias, x)
    loss = ad.softmaxcrossentropy_op(y, y_)
    loss = ad.reduce_mean_op(loss, [0])
    return loss, y
Exemple #3
0
def get_token_embeddings(vocab_size,
                         num_units,
                         initializer=init.xavier_normal,
                         zero_pad=True):
    if zero_pad:
        embedding_part = initializer(name='embedding_table',
                                     shape=(vocab_size - 1, num_units))
        padding_zero = init.zeros(name='padding_zero',
                                  shape=(1, num_units),
                                  trainable=False)
        embeddings = ad.concat_op(padding_zero, embedding_part)
    else:
        embeddings = initializer(name='embedding_table',
                                 shape=(vocab_size, num_units))
    return embeddings
Exemple #4
0
 def __init__(self,
              in_features: int,
              out_features: int,
              npart: int,
              name="GCN",
              custom_init=None):
     if custom_init is not None:
         self.weight = ad.Variable(value=custom_init[0],
                                   name=name + "_Weight")
         self.bias = ad.Variable(value=custom_init[1], name=name + "_Bias")
     else:
         self.weight = initializers.xavier_uniform(shape=(in_features,
                                                          out_features),
                                                   name=name + "_Weight")
         self.bias = initializers.zeros(shape=(out_features, ),
                                        name=name + "_Bias")
     self.in_features = in_features
     self.out_features = out_features
     self.npart = npart
     #npart * npart message_passing matrix, either dense or sparse
     self.mp = [[
         ad.Variable("message_passing", trainable=False)
         for j in range(npart)
     ] for i in range(npart)]
Exemple #5
0
def train_hetu(num_epoch):
    ctx = ndarray.gpu(0)

    x_ = ad.Variable(name="x_")
    y_ = ad.Variable(name="y_")
    mask_ = ad.Variable(name="mask_")

    gcn1 = GraphSage(graph.num_features, hidden_layer_size, activation="relu", dropout=0.1)
    gcn2 = GraphSage(2*hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1)

    x = gcn1(x_)
    x = gcn2(x)
    W = initializers.xavier_uniform(shape=(2*hidden_layer_size, graph.num_classes))
    B = initializers.zeros(shape=(graph.num_classes,))
    x = ad.matmul_op(x, W)
    y = x + ad.broadcastto_op(B, x)

    loss = ad.softmaxcrossentropy_op(y, y_)
    loss = ad.mul_op(loss, mask_)
    opt = optimizer.AdamOptimizer(0.01)
    train_op = opt.minimize(loss)
    executor = ad.Executor([loss, y, train_op], ctx=ctx)

    def eval():
        start = time.time()
        ad.Dropout.DropoutOp.phase = "eval"
        mp_val = mp_matrix(graph_full, ctx)

        feed_dict = {
            gcn1.mp : mp_val,
            gcn2.mp : mp_val,
            x_ : ndarray.array(graph_full.x, ctx=ctx),
        }
        executor_eval = ad.Executor([y], ctx=ctx)
        y_predicted, = executor_eval.run(feed_dict=feed_dict)
        y_predicted = y_predicted.asnumpy().argmax(axis=1)
        acc = (y_predicted == graph_full.y)[train_split:].sum()
        print("Test accuracy:", acc/len(y_predicted[train_split:]))
        ad.Dropout.DropoutOp.phase = "training"
    epoch = 0
    nnodes = 0
    batch_size = 1000
    with GraphSageSampler(graph, batch_size, depth=2, num_sample_thread=4) as sampler:
        start = time.time()
        while True:
            g_sample, mask = sampler.sample()
            mp_val = mp_matrix(g_sample, ctx)
            #print(time.time() - start)
            feed_dict = {
                gcn1.mp : mp_val,
                gcn2.mp : mp_val,
                mask_ : ndarray.array(mask,ctx=ctx),
                x_ : ndarray.array(g_sample.x, ctx=ctx),
                y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=graph.num_classes), ctx=ctx)
            }
            loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict)
            y_predicted = y_predicted.asnumpy().argmax(axis=1)
            acc = ((y_predicted == g_sample.y) * mask).sum()
            # print(i, "Train loss :", loss_val.asnumpy().mean())
            # print(i, "Train accuracy:", acc/len(y_predicted))
            nnodes += batch_size
            if nnodes > graph_full.num_nodes:
                nnodes = 0
                epoch += 1
                print("Epoch :", epoch, time.time() - start)
                print("Train accuracy:", acc/mask.sum())
                eval()
                start = time.time()
                if epoch >= num_epoch:
                    break
Exemple #6
0
def layer_norm(input_tensor, feature_size, eps=1e-8):
    scale = init.ones(name='layer_norm_scale', shape=(feature_size, ))
    bias = init.zeros(name='layer_norm_biad', shape=(feature_size, ))
    return ad.layer_normalization_op(input_tensor, scale, bias, eps=eps)
Exemple #7
0
def train_hetu(num_epoch):
    ctx = ndarray.gpu(0)

    x_ = ad.Variable(name="x_")
    y_ = ad.Variable(name="y_")

    gcn1 = GraphSage(graph.num_features,
                     hidden_layer_size,
                     activation="relu",
                     dropout=0.1)
    gcn2 = GraphSage(2 * hidden_layer_size,
                     hidden_layer_size,
                     activation="relu",
                     dropout=0.1)

    x = gcn1(x_)
    x = gcn2(x)
    W = initializers.xavier_uniform(shape=(2 * hidden_layer_size,
                                           graph.num_classes))
    B = initializers.zeros(shape=(graph.num_classes, ))
    x = ad.matmul_op(x, W)
    y = x + ad.broadcastto_op(B, x)

    loss = ad.softmaxcrossentropy_op(y, y_)

    opt = optimizer.AdamOptimizer(0.01)
    train_op = opt.minimize(loss)
    executor = ad.Executor([loss, y, train_op], ctx=ctx)

    def eval():
        start = time.time()
        ad.Dropout.DropoutOp.phase = "eval"
        mp_val = mp_matrix(graph_full, ctx)

        feed_dict = {
            gcn1.mp: mp_val,
            gcn2.mp: mp_val,
            x_: ndarray.array(graph_full.x, ctx=ctx),
        }
        executor_eval = ad.Executor([y], ctx=ctx)
        y_predicted, = executor_eval.run(feed_dict=feed_dict)
        y_predicted = y_predicted.asnumpy().argmax(axis=1)
        acc = (y_predicted == graph_full.y)[train_split:].sum()
        print("Test accuracy:", acc / len(y_predicted[train_split:]))
        ad.Dropout.DropoutOp.phase = "training"

    with RandomWalkSampler(graph,
                           4000,
                           2,
                           transformer=transform,
                           num_sample_thread=3) as sampler:
        for i in range(num_epoch):
            start = time.time()
            g_sample, mp_val = sampler.sample()
            #mp_val = mp_matrix(g_sample, ctx)
            #print(time.time() - start)
            feed_dict = {
                gcn1.mp:
                mp_val,
                gcn2.mp:
                mp_val,
                x_:
                ndarray.array(g_sample.x, ctx=ctx),
                y_:
                ndarray.array(convert_to_one_hot(g_sample.y,
                                                 max_val=graph.num_classes),
                              ctx=ctx)
            }
            loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict)
            y_predicted = y_predicted.asnumpy().argmax(axis=1)
            acc = (y_predicted == g_sample.y).sum()
            print(i, "Train loss :", loss_val.asnumpy().mean())
            print(i, "Train accuracy:", acc / len(y_predicted))
            if (i + 1) % 100 == 0:
                eval()
            print(time.time() - start)