def run_client(graph_name, part_id, num_nodes, num_edges): gpb = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids # clean up dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() print('end')
def start_hetero_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['n1'].data assert 'feat' not in dist_graph.nodes['n2'].data assert 'feat' not in dist_graph.nodes['n3'].data if gpb is None: gpb = dist_graph.get_partition_book() try: nodes = {'n3': [0, 10, 99, 66, 124, 208]} sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout) block = dgl.to_block(sampled_graph, nodes) block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] except Exception as e: print(e) block = None dgl.distributed.exit_client() return block, gpb
def start_bipartite_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3, nodes={}): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['user'].data assert 'feat' in dist_graph.nodes['game'].data if dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form='eid') letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb
def run_client_hierarchy(graph_name, part_id, server_count, node_mask, edge_mask, return_dict): os.environ['DGL_NUM_SERVER'] = str(server_count) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( '/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph(graph_name, gpb=gpb) node_mask = F.tensor(node_mask) edge_mask = F.tensor(edge_mask) nodes = node_split(node_mask, g.get_partition_book(), node_trainer_ids=g.ndata['trainer_id']) edges = edge_split(edge_mask, g.get_partition_book(), edge_trainer_ids=g.edata['trainer_id']) rank = g.rank() return_dict[rank] = (nodes, edges)
def run_client(graph_name, barrier, num_nodes, num_edges): barrier.wait() g = DistGraph(server_namebook, graph_name) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids g.shut_down() print('end')
def run_client(graph_name, cli_id, part_id, server_count): device = F.ctx() time.sleep(5) os.environ['DGL_NUM_SERVER'] = str(server_count) dgl.distributed.initialize("optim_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( '/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph(graph_name, gpb=gpb) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) num_nodes = g.number_of_nodes() emb_dim = 4 dgl_emb = NodeEmbedding(num_nodes, emb_dim, name='optim', init_func=initializer, part_policy=policy) dgl_emb_zero = NodeEmbedding(num_nodes, emb_dim, name='optim-zero', init_func=initializer, part_policy=policy) dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01) dgl_adam._world_size = 1 dgl_adam._rank = 0 torch_emb = th.nn.Embedding(num_nodes, emb_dim, sparse=True) torch_emb_zero = th.nn.Embedding(num_nodes, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(torch_emb_zero.weight, 0, 1.0) torch_adam = th.optim.SparseAdam(list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01) labels = th.ones((4, )).long() idx = th.randint(0, num_nodes, size=(4, )) dgl_value = dgl_emb(idx, device).to(th.device('cpu')) torch_value = torch_emb(idx) torch_adam.zero_grad() torch_loss = th.nn.functional.cross_entropy(torch_value, labels) torch_loss.backward() torch_adam.step() dgl_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) dgl_loss.backward() dgl_adam.step() assert F.allclose(dgl_emb.weight[0:num_nodes // 2], torch_emb.weight[0:num_nodes // 2])
def start_bipartite_sample_client(rank, tmpdir, disable_shared_mem, nodes): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['user'].data assert 'feat' in dist_graph.nodes['game'].data if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_neighbors(dist_graph, nodes, 3) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb
def start_hetero_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3, nodes={'n3': [0, 10, 99, 66, 124, 208]}, etype_sorted=False): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['n1'].data assert 'feat' not in dist_graph.nodes['n2'].data assert 'feat' not in dist_graph.nodes['n3'].data if dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form='eid') letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() try: sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout, etype_sorted=etype_sorted) block = dgl.to_block(sampled_graph, nodes) block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] except Exception as e: print(e) block = None dgl.distributed.exit_client() return block, gpb
def run_client(graph_name, part_id, num_nodes, num_edges): time.sleep(5) gpb = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test sparse emb try: new_shape = (g.number_of_nodes(), 1) emb = SparseNodeEmbedding(g, 'emb1', new_shape, emb_init) lr = 0.001 optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats = emb(nids) assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) grad_sum = dgl.distributed.DistTensor(g, 'node:emb1_sum', policy) assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1))) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) emb = SparseNodeEmbedding(g, 'emb2', new_shape, emb_init) optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats1 = emb(nids) feats2 = emb(nids) feats = F.cat([feats1, feats2], 0) assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * math.sqrt(2) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) except NotImplementedError as e: pass # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids # clean up dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() print('end')