def train_hetu(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = int(os.environ["WORKER_ID"]) nrank = int(os.environ["DMLC_NUM_WORKER"]) ctx = ndarray.gpu(rank) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") mask_ = ad.Variable(name="mask_") gcn1 = GraphSage(meta["feature"], hidden_layer_size, activation="relu", dropout=0.1) gcn2 = GraphSage(2*hidden_layer_size, hidden_layer_size, activation="relu", dropout=0.1) x = gcn1(x_) x = gcn2(x) W = initializers.xavier_uniform(shape=(2*hidden_layer_size, meta["class"])) B = initializers.zeros(shape=(meta["class"],)) x = ad.matmul_op(x, W) y = x + ad.broadcastto_op(B, x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.mul_op(loss, mask_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(0.1) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx, comm_mode='PS') distributed.ps_init(rank, nrank) batch_size = 4000 with DistributedGraphSageSampler(args.path, batch_size, 2, 2, rank=rank, nrank=nrank) as sampler: epoch = 0 nnodes = 0 start = time.time() while True: g_sample, mask = sampler.sample() mp_val = mp_matrix(g_sample, ndarray.gpu(rank)) feed_dict = { gcn1.mp : mp_val, gcn2.mp : mp_val, mask_ : ndarray.array(mask, ctx=ctx), x_ : ndarray.array(g_sample.x, ctx=ctx), y_ : ndarray.array(convert_to_one_hot(g_sample.y, max_val=g_sample.num_classes), ctx=ctx) } loss_val, y_predicted, _ = executor.run(feed_dict = feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = ((y_predicted == g_sample.y) * mask).sum() distributed.ps_get_worker_communicator().BarrierWorker() nnodes += batch_size if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 print("Epoch :", epoch, time.time() - start) print("Train accuracy:", acc/mask.sum()) start = time.time() if epoch >= num_epoch: break
def train_main(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = ad.get_worker_communicate().rank() device_id = rank % args.num_local_worker nrank = ad.get_worker_communicate().nrank() distributed.ps_init(rank, nrank) ngraph = meta["partition"]["nodes"][rank] // args.batch_size graphs = prepare_data(ngraph) idx, epoch, nnodes = 0, 0, 0 worker_device = "gpu:0" graph_len = graphs[0][0].y.shape[0] with tf.device(worker_device): norm_adj = tf.compat.v1.sparse.placeholder(tf.float32, name="norm_adj") sparse_feature = tf.placeholder(tf.int32, [graph_len, meta["feature"] - 1]) y_ = tf.placeholder(tf.int32, [graph_len], name="y_") train_mask = tf.placeholder(tf.float32, [graph_len], name="train_mask") loss, y, train_op = model(norm_adj, sparse_feature, y_, train_mask) init=tf.global_variables_initializer() gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(init) acc_cnt, total_cnt = 0, 0 train_acc, train_cnt = 0, 0 start = time.time() while True: g_sample, mp_val, mask, mask_eval = graphs[idx] idx = (idx + 1) % ngraph feed_dict = { norm_adj : mp_val, sparse_feature : g_sample.x[:, 0:-1], y_ : g_sample.y, train_mask : mask } loss_val = sess.run([loss, y, train_op], feed_dict=feed_dict) pred_val = loss_val[1] acc_val = np.equal(np.argmax(pred_val, 1), g_sample.y).astype(np.float) acc_cnt += (acc_val * mask_eval).sum() total_cnt += mask_eval.sum() nnodes += mask.sum() + mask_eval.sum() train_acc += (acc_val * mask).sum() train_cnt += mask.sum() if nnodes > meta["partition"]["nodes"][rank] // 10: nnodes = 0 epoch += 1 print("Acc : ", acc_cnt / total_cnt, train_acc / train_cnt ,"Time : ", time.time() - start) print(pred_val) start = time.time() acc_cnt, total_cnt = 0, 0 train_acc, train_cnt = 0, 0 if epoch >= num_epoch: break
def train_main(args): autodist = AutoDist(resource_spec_file, Parallaxx()) with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = ad.get_worker_communicate().rank() device_id = rank % args.num_local_worker nrank = ad.get_worker_communicate().nrank() distributed.ps_init(rank, nrank) ngraph = meta["partition"]["nodes"][rank] // args.batch_size graphs = prepare_data(ngraph) idx, epoch, nnodes = 0, 0, 0 graph_len = graphs[0][0].y.shape[0] with tf.Graph().as_default() as g, autodist.scope(): norm_adj = tf.compat.v1.sparse.placeholder(tf.float32, name="norm_adj") sparse_feature = tf.placeholder(tf.int32, [graph_len, meta["feature"] - 1]) y_ = tf.placeholder(tf.int32, [graph_len], name="y_") train_mask = tf.placeholder(tf.float32, [graph_len], name="train_mask") loss, y, train_op = model(norm_adj, sparse_feature, y_, train_mask) sess = autodist.create_distributed_session() acc_stat = [] start = time.time() while True: g_sample, mp_val, mask, mask_eval = graphs[idx] idx = (idx + 1) % ngraph feed_dict = { norm_adj: mp_val, sparse_feature: g_sample.x[:, 0:-1], y_: g_sample.y, train_mask: mask } print("Before training") loss_val = sess.run([loss, y, y_, train_op], feed_dict=feed_dict) print(loss_val) pred_val = loss_val[1] true_val = loss_val[2] acc_val = np.equal(np.argmax(pred_val, 1), true_val).astype(np.float) acc_stat.append(acc_val) nnodes += mask.sum() + mask_eval.sum() if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 print("Acc : ", np.mean(acc_stat), "Time : ", time.time() - start) start = time.time() acc_stat = [] if epoch >= num_epoch: break
def train_main(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = ad.get_worker_communicate().rank() nrank = int(os.environ["DMLC_NUM_WORKER"]) ctx = ndarray.gpu(rank % args.num_local_worker) embedding_width = args.hidden_size extract_width = embedding_width * (meta["feature"] - 1) y_ = dl.GNNDataLoaderOp(lambda g: ndarray.array( convert_to_one_hot(g.y, max_val=g.num_classes), ctx=ndarray.cpu())) mask_ = ad.Variable(name="mask_") gcn1 = GCN(extract_width, hidden_layer_size, activation="relu") gcn2 = GCN(hidden_layer_size, meta["class"]) index = dl.GNNDataLoaderOp( lambda g: ndarray.array(g.x[:, 0:-1], ctx=ndarray.cpu()), ctx=ndarray.cpu()) embedding = initializers.random_normal([meta["idx_max"], embedding_width], stddev=0.1) embed = ad.embedding_lookup_op(embedding, index) embed = ad.array_reshape_op(embed, (-1, extract_width)) # embed = ad.reduce_mean_op(embed, axes=1) # x = ad.concat_op(x_, embed, axis=1) x = gcn1(embed) y = gcn2(x) loss = ad.softmaxcrossentropy_op(y, y_) train_loss = loss * mask_ train_loss = ad.reduce_mean_op(train_loss, [0]) opt = optimizer.SGDOptimizer(args.learning_rate) train_op = opt.minimize(train_loss) ad.worker_init() distributed.ps_init(rank, nrank) ngraph = meta["partition"]["nodes"][rank] // args.batch_size graphs = prepare_data(ngraph) idx = 0 g_sample, mp_val, mask, mask_eval = graphs[idx] idx = (idx + 1) % ngraph dl.GNNDataLoaderOp.step(g_sample) dl.GNNDataLoaderOp.step(g_sample) epoch = 0 nnodes = 0 executor = ad.Executor([loss, y, train_op], ctx=ctx, comm_mode='PS', use_sparse_pull=False, cstable_policy=args.cache) while True: g_sample_nxt, mp_val_nxt, mask_nxt, mask_eval_nxt = graphs[idx] idx = (idx + 1) % ngraph dl.GNNDataLoaderOp.step(g_sample_nxt) feed_dict = {gcn1.mp: mp_val, gcn2.mp: mp_val, mask_: mask} loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = np.sum((y_predicted == g_sample.y) * mask_eval) train_acc = np.sum((y_predicted == g_sample.y) * mask) stat.update(acc, mask_eval.sum(), np.sum(loss_val.asnumpy() * mask_eval) / mask_eval.sum()) stat.update_train(train_acc, mask.sum(), np.sum(loss_val.asnumpy() * mask) / mask.sum()) # distributed.ps_get_worker_communicator().BarrierWorker() nnodes += mask.sum() + mask_eval.sum() if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 if rank == 0: stat.print(epoch) if epoch >= num_epoch: break g_sample, mp_val, mask, mask_eval = g_sample_nxt, mp_val_nxt, mask_nxt, mask_eval_nxt
def train_main(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = ad.get_worker_communicate().rank() device_id = rank % args.num_local_worker os.environ["CUDA_VISIBLE_DEVICES"] = str(device_id) nrank = ad.get_worker_communicate().nrank() distributed.ps_init(rank, nrank) ngraph = meta["partition"]["nodes"][rank] // args.batch_size graphs = prepare_data(ngraph) idx, epoch, nnodes = 0, 0, 0 worker_device = "/job:worker/task:{}/gpu:0".format(rank) graph_len = graphs[0][0].y.shape[0] with tf.device(worker_device): norm_adj = tf.compat.v1.sparse.placeholder(tf.float32, name="norm_adj") sparse_feature = tf.placeholder(tf.int32, [graph_len, meta["feature"] - 1]) y_ = tf.placeholder(tf.int32, [graph_len], name="y_") train_mask = tf.placeholder(tf.float32, [graph_len], name="train_mask") loss, y, train_op, global_step = model(norm_adj, sparse_feature, y_, train_mask, cluster, rank) with tf.device( tf.train.replica_device_setter(worker_device=worker_device, cluster=cluster)): server = tf.train.Server(cluster, job_name="worker", task_index=rank) init = tf.global_variables_initializer() sv = tf.train.Supervisor(is_chief=(rank == 0), init_op=init, recovery_wait_secs=1, global_step=global_step) sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, device_filters=["/job:ps", "/job:worker/task:%d" % rank]) sess = sv.prepare_or_wait_for_session(server.target, config=sess_config) sess.run(init) acc_stat = [] start = time.time() while True: g_sample, mp_val, mask, mask_eval = graphs[idx] idx = (idx + 1) % ngraph feed_dict = { norm_adj: mp_val, sparse_feature: g_sample.x[:, 0:-1], y_: g_sample.y, train_mask: mask } loss_val = sess.run([loss, y, y_, train_op], feed_dict=feed_dict) pred_val = loss_val[1] true_val = loss_val[2] acc_val = np.equal(np.argmax(pred_val, 1), true_val).astype(np.float) acc_stat.append(acc_val) nnodes += mask.sum() + mask_eval.sum() if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 print("Acc : ", np.mean(acc_stat), "Time : ", time.time() - start) start = time.time() acc_stat = [] if epoch >= num_epoch: break
def train_hetu(args): with open(os.path.join(args.path, "meta.yml"), 'rb') as f: meta = yaml.load(f.read(), Loader=yaml.FullLoader) hidden_layer_size = args.hidden_size num_epoch = args.num_epoch rank = int(os.environ["WORKER_ID"]) nrank = int(os.environ["DMLC_NUM_WORKER"]) ctx = ndarray.gpu(rank) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") gcn1 = GCN(meta["feature"], hidden_layer_size, activation="relu") gcn2 = GCN(hidden_layer_size, meta["class"]) x = gcn1(x_) y = gcn2(x) loss = ad.softmaxcrossentropy_op(y, y_) loss = ad.reduce_mean_op(loss, [0]) opt = optimizer.SGDOptimizer(0.1) train_op = opt.minimize(loss) executor = ad.Executor([loss, y, train_op], ctx=ctx, comm_mode='PS') distributed.ps_init(rank, nrank) def transform(graph): mp_val = mp_matrix(graph, ndarray.gpu(rank)) return graph, mp_val with DistributedSubgraphSampler(args.path, 4000, 2, rank=rank, nrank=nrank, transformer=transform, cache_size_factor=0, reduce_nonlocal_factor=0.5) as sampler: epoch = 0 nnodes = 0 start = time.time() while True: g_sample, mp_val = sampler.sample() feed_dict = { gcn1.mp: mp_val, gcn2.mp: mp_val, x_: ndarray.array(g_sample.x, ctx=ctx), y_: ndarray.array(convert_to_one_hot(g_sample.y, max_val=g_sample.num_classes), ctx=ctx) } loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict) y_predicted = y_predicted.asnumpy().argmax(axis=1) acc = (y_predicted == g_sample.y).sum() distributed.ps_get_worker_communicator().BarrierWorker() nnodes += g_sample.num_nodes if nnodes > meta["partition"]["nodes"][rank]: nnodes = 0 epoch += 1 print("Epoch :", epoch, time.time() - start) print("Train accuracy:", acc / len(y_predicted)) start = time.time() if epoch >= num_epoch: break