Esempio n. 1
0
def main(setting):
    for key, value in setting.items():
        os.environ[key] = str(value)
    if os.environ['DMLC_ROLE'] == "server":
        ad.server_init()
        ad.server_finish()
    elif os.environ['DMLC_ROLE'] == "worker":
        ad.worker_init()
        worker()
        ad.worker_finish()
    elif os.environ['DMLC_ROLE'] == "scheduler":
        ad.scheduler_init()
        ad.scheduler_finish()
    else:
        raise ValueError("Unknown role", os.environ['DMLC_ROLE'])
Esempio n. 2
0
def start_process(settings, args):
    for key, value in settings.items():
        os.environ[key] = str(value)
    if os.environ['DMLC_ROLE'] == "server":
        ad.server_init()
        ad.server_finish()
    elif os.environ['DMLC_ROLE'] == "worker":
        ad.worker_init()
        test()
        ad.worker_finish()
    elif os.environ['DMLC_ROLE'] == "scheduler":
        ad.scheduler_init()
        ad.scheduler_finish()
    else:
        raise ValueError("Unknown role", os.environ['DMLC_ROLE'])
Esempio n. 3
0
def start_process(settings, args):
    for key, value in settings.items():
        os.environ[key] = str(value)
    if os.environ['DMLC_ROLE'] == "server":
        ad.server_init()
        ad.server_finish()
    elif os.environ['DMLC_ROLE'] == "worker":
        ad.worker_init()
        test(args.func)
        # test_dense_n_draw(range(100000, 1000000, 100000), 'pushpull')
        # test_sparse_n_draw(range(100, 600, 100), range(1000, 6000, 1000), 'sparsepush')
        ad.worker_finish()
    elif os.environ['DMLC_ROLE'] == "scheduler":
        ad.scheduler_init()
        ad.scheduler_finish()
    else:
        raise ValueError("Unknown role", os.environ['DMLC_ROLE'])
Esempio n. 4
0
def start_process(settings, args, arr=None, push_arr=None, pull_arr=None):
    for key, value in settings.items():
        os.environ[key] = str(value)
    if os.environ['DMLC_ROLE'] == "server":
        ad.server_init()
        ad.server_finish()
    elif os.environ['DMLC_ROLE'] == "worker":
        ad.worker_init()
        test_api(arr, push_arr, pull_arr)
        test_init_ps(arr, 'constant', 1234.567)
        test_init_ps(arr, 'uniform', -0.5, 0.4)
        test_init_ps(arr, 'normal', 5.6, 2.0)
        test_init_ps(arr, 'truncated_normal', -2.3, 1.4)
        test_api(arr, push_arr, pull_arr, True)
        test_init_ps(arr, 'constant', 1234.567, True)
        test_init_ps(arr, 'uniform', -0.5, 0.4, True)
        test_init_ps(arr, 'normal', 5.6, 2.0, True)
        test_init_ps(arr, 'truncated_normal', -2.3, 1.4, True)
        ad.worker_finish()
    elif os.environ['DMLC_ROLE'] == "scheduler":
        ad.scheduler_init()
        ad.scheduler_finish()
    else:
        raise ValueError("Unknown role", os.environ['DMLC_ROLE'])
Esempio n. 5
0
def train_main(args):
    with open(os.path.join(args.path, "meta.yml"), 'rb') as f:
        meta = yaml.load(f.read(), Loader=yaml.FullLoader)
    hidden_layer_size = args.hidden_size
    num_epoch = args.num_epoch
    rank = ad.get_worker_communicate().rank()
    nrank = int(os.environ["DMLC_NUM_WORKER"])
    ctx = ndarray.gpu(rank % args.num_local_worker)
    embedding_width = args.hidden_size
    extract_width = embedding_width * (meta["feature"] - 1)

    y_ = dl.GNNDataLoaderOp(lambda g: ndarray.array(
        convert_to_one_hot(g.y, max_val=g.num_classes), ctx=ndarray.cpu()))
    mask_ = ad.Variable(name="mask_")
    gcn1 = GCN(extract_width, hidden_layer_size, activation="relu")
    gcn2 = GCN(hidden_layer_size, meta["class"])
    index = dl.GNNDataLoaderOp(
        lambda g: ndarray.array(g.x[:, 0:-1], ctx=ndarray.cpu()),
        ctx=ndarray.cpu())
    embedding = initializers.random_normal([meta["idx_max"], embedding_width],
                                           stddev=0.1)
    embed = ad.embedding_lookup_op(embedding, index)
    embed = ad.array_reshape_op(embed, (-1, extract_width))
    # embed = ad.reduce_mean_op(embed, axes=1)
    # x = ad.concat_op(x_, embed, axis=1)
    x = gcn1(embed)
    y = gcn2(x)
    loss = ad.softmaxcrossentropy_op(y, y_)
    train_loss = loss * mask_
    train_loss = ad.reduce_mean_op(train_loss, [0])
    opt = optimizer.SGDOptimizer(args.learning_rate)
    train_op = opt.minimize(train_loss)
    ad.worker_init()
    distributed.ps_init(rank, nrank)

    ngraph = meta["partition"]["nodes"][rank] // args.batch_size
    graphs = prepare_data(ngraph)
    idx = 0
    g_sample, mp_val, mask, mask_eval = graphs[idx]
    idx = (idx + 1) % ngraph
    dl.GNNDataLoaderOp.step(g_sample)
    dl.GNNDataLoaderOp.step(g_sample)
    epoch = 0
    nnodes = 0
    executor = ad.Executor([loss, y, train_op],
                           ctx=ctx,
                           comm_mode='PS',
                           use_sparse_pull=False,
                           cstable_policy=args.cache)
    while True:
        g_sample_nxt, mp_val_nxt, mask_nxt, mask_eval_nxt = graphs[idx]
        idx = (idx + 1) % ngraph
        dl.GNNDataLoaderOp.step(g_sample_nxt)
        feed_dict = {gcn1.mp: mp_val, gcn2.mp: mp_val, mask_: mask}
        loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict)
        y_predicted = y_predicted.asnumpy().argmax(axis=1)

        acc = np.sum((y_predicted == g_sample.y) * mask_eval)
        train_acc = np.sum((y_predicted == g_sample.y) * mask)
        stat.update(acc, mask_eval.sum(),
                    np.sum(loss_val.asnumpy() * mask_eval) / mask_eval.sum())
        stat.update_train(train_acc, mask.sum(),
                          np.sum(loss_val.asnumpy() * mask) / mask.sum())

        # distributed.ps_get_worker_communicator().BarrierWorker()
        nnodes += mask.sum() + mask_eval.sum()
        if nnodes > meta["partition"]["nodes"][rank]:
            nnodes = 0
            epoch += 1
            if rank == 0:
                stat.print(epoch)
            if epoch >= num_epoch:
                break
        g_sample, mp_val, mask, mask_eval = g_sample_nxt, mp_val_nxt, mask_nxt, mask_eval_nxt