def start_bipartite_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3, nodes={}): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['user'].data assert 'feat' in dist_graph.nodes['game'].data if dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form='eid') letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb
def start_hetero_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['n1'].data assert 'feat' not in dist_graph.nodes['n2'].data assert 'feat' not in dist_graph.nodes['n3'].data if gpb is None: gpb = dist_graph.get_partition_book() try: nodes = {'n3': [0, 10, 99, 66, 124, 208]} sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout) block = dgl.to_block(sampled_graph, nodes) block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] except Exception as e: print(e) block = None dgl.distributed.exit_client() return block, gpb
def start_node_dataloader(rank, tmpdir, num_server, num_workers): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt", 1, num_workers=num_workers) gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') orig_nid = F.zeros((dist_graph.number_of_nodes(), ), dtype=F.int64) orig_eid = F.zeros((dist_graph.number_of_edges(), ), dtype=F.int64) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] # Create sampler sampler = dgl.dataloading.MultiLayerNeighborSampler([5, 10]) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = dgl.dataloading.NodeDataLoader(dist_graph, train_nid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for epoch in range(2): for idx, (_, _, blocks) in zip(range(0, num_nodes_to_sample, batch_size), dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] src_nodes_id = orig_nid[src_nodes_id] dst_nodes_id = orig_nid[dst_nodes_id] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id) assert np.all(F.asnumpy(has_edges)) max_nid.append(np.max(F.asnumpy(dst_nodes_id))) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) del dataloader dgl.distributed.exit_client( ) # this is needed since there's two test here in one process
def start_dist_dataloader(rank, tmpdir, num_server, drop_last): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt") gpb = None disable_shared_mem = num_server > 0 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') orig_nid = F.arange(0, dist_graph.number_of_nodes()) orig_eid = F.arange(0, dist_graph.number_of_edges()) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) if 'orig_id' in part.ndata: orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] if 'orig_id' in part.edata: orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] # Create sampler sampler = NeighborSampler(dist_graph, [5, 10], dgl.distributed.sample_neighbors) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=False, drop_last=drop_last) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for epoch in range(2): for idx, blocks in zip(range(0, num_nodes_to_sample, batch_size), dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] max_nid.append(np.max(F.asnumpy(dst_nodes_id))) src_nodes_id = orig_nid[src_nodes_id] dst_nodes_id = orig_nid[dst_nodes_id] has_edges = groundtruth_g.has_edges_between(src_nodes_id, dst_nodes_id) assert np.all(F.asnumpy(has_edges)) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) if drop_last: assert np.max(max_nid) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size else: assert np.max(max_nid) == num_nodes_to_sample - 1 del dataloader dgl.distributed.exit_client() # this is needed since there's two test here in one process
def run_client_hierarchy(graph_name, part_id, server_count, node_mask, edge_mask, return_dict): os.environ['DGL_NUM_SERVER'] = str(server_count) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph(graph_name, gpb=gpb) node_mask = F.tensor(node_mask) edge_mask = F.tensor(edge_mask) nodes = node_split(node_mask, g.get_partition_book(), node_trainer_ids=g.ndata['trainer_id']) edges = edge_split(edge_mask, g.get_partition_book(), edge_trainer_ids=g.edata['trainer_id']) rank = g.rank() return_dict[rank] = (nodes, edges)
def start_find_edges_client(rank, tmpdir, disable_shared_mem, eids, etype=None): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_find_edges.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_find_edges", gpb=gpb) try: u, v = dist_graph.find_edges(eids, etype=etype) except Exception as e: print(e) u, v = None, None dgl.distributed.exit_client() return u, v
def run_client(graph_name, cli_id, part_id, server_count): device = F.ctx() time.sleep(5) os.environ['DGL_NUM_SERVER'] = str(server_count) dgl.distributed.initialize("optim_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( '/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph(graph_name, gpb=gpb) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) num_nodes = g.number_of_nodes() emb_dim = 4 dgl_emb = NodeEmbedding(num_nodes, emb_dim, name='optim', init_func=initializer, part_policy=policy) dgl_emb_zero = NodeEmbedding(num_nodes, emb_dim, name='optim-zero', init_func=initializer, part_policy=policy) dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01) dgl_adam._world_size = 1 dgl_adam._rank = 0 torch_emb = th.nn.Embedding(num_nodes, emb_dim, sparse=True) torch_emb_zero = th.nn.Embedding(num_nodes, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(torch_emb_zero.weight, 0, 1.0) torch_adam = th.optim.SparseAdam(list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01) labels = th.ones((4, )).long() idx = th.randint(0, num_nodes, size=(4, )) dgl_value = dgl_emb(idx, device).to(th.device('cpu')) torch_value = torch_emb(idx) torch_adam.zero_grad() torch_loss = th.nn.functional.cross_entropy(torch_value, labels) torch_loss.backward() torch_adam.step() dgl_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) dgl_loss.backward() dgl_adam.step() assert F.allclose(dgl_emb.weight[0:num_nodes // 2], torch_emb.weight[0:num_nodes // 2])
def check_standalone_sampling(tmpdir): g = CitationGraphDataset("cora")[0] g.readonly() num_parts = 1 num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=False) dist_graph = DistGraph(None, "test_sampling", conf_file=tmpdir / 'test_sampling.json') sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3) src, dst = sampled_graph.edges() assert sampled_graph.number_of_nodes() == g.number_of_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids))
def check_standalone_etype_sampling(tmpdir, reshuffle): hg = CitationGraphDataset('cora')[0] num_parts = 1 num_hops = 1 partition_graph(hg, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=reshuffle) os.environ['DGL_DIST_MODE'] = 'standalone' dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", part_config=tmpdir / 'test_sampling.json') sampled_graph = sample_etype_neighbors(dist_graph, [0, 10, 99, 66, 1023], dgl.ETYPE, 3) src, dst = sampled_graph.edges() assert sampled_graph.number_of_nodes() == hg.number_of_nodes() assert np.all(F.asnumpy(hg.has_edges_between(src, dst))) eids = hg.edge_ids(src, dst) assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids)) dgl.distributed.exit_client()
def run_client_empty(graph_name, part_id, server_count, num_clients, num_nodes, num_edges): os.environ['DGL_NUM_SERVER'] = str(server_count) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph(graph_name, gpb=gpb) check_dist_graph_empty(g, num_clients, num_nodes, num_edges)
def check_standalone_etype_sampling_heterograph(tmpdir, reshuffle): hg = CitationGraphDataset('cora')[0] num_parts = 1 num_hops = 1 src, dst = hg.edges() new_hg = dgl.heterograph( { ('paper', 'cite', 'paper'): (src, dst), ('paper', 'cite-by', 'paper'): (dst, src) }, {'paper': hg.number_of_nodes()}) partition_graph(new_hg, 'test_hetero_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=reshuffle) os.environ['DGL_DIST_MODE'] = 'standalone' dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_hetero_sampling", part_config=tmpdir / 'test_hetero_sampling.json') sampled_graph = sample_etype_neighbors( dist_graph, [0, 1, 2, 10, 99, 66, 1023, 1024, 2700, 2701], dgl.ETYPE, 1) src, dst = sampled_graph.edges(etype=('paper', 'cite', 'paper')) assert len(src) == 10 src, dst = sampled_graph.edges(etype=('paper', 'cite-by', 'paper')) assert len(src) == 10 assert sampled_graph.number_of_nodes() == new_hg.number_of_nodes() dgl.distributed.exit_client()
def run_client(graph_name, part_id, num_clients, num_nodes, num_edges): time.sleep(5) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name = load_partition_book( '/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb) check_dist_graph(g, num_clients, num_nodes, num_edges)
def start_sample_client_shuffle(rank, tmpdir, disable_shared_mem, g, num_servers, group_id=0): os.environ['DGL_GROUP_ID'] = str(group_id) gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3) orig_nid = F.zeros((g.number_of_nodes(), ), dtype=F.int64, ctx=F.cpu()) orig_eid = F.zeros((g.number_of_edges(), ), dtype=F.int64, ctx=F.cpu()) for i in range(num_servers): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] src, dst = sampled_graph.edges() src = orig_nid[src] dst = orig_nid[dst] assert sampled_graph.number_of_nodes() == g.number_of_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) eids1 = orig_eid[sampled_graph.edata[dgl.EID]] assert np.array_equal(F.asnumpy(eids1), F.asnumpy(eids))
def start_bipartite_sample_client(rank, tmpdir, disable_shared_mem, nodes): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['user'].data assert 'feat' in dist_graph.nodes['game'].data if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_neighbors(dist_graph, nodes, 3) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb
def start_find_edges_client(rank, tmpdir, disable_shared_mem, eids): gpb = None if disable_shared_mem: _, _, _, gpb, _ = load_partition(tmpdir / 'test_find_edges.json', rank) dist_graph = DistGraph("rpc_ip_config.txt", "test_find_edges", gpb=gpb) u, v = find_edges(dist_graph, eids) dgl.distributed.exit_client() return u, v
def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes): gpb = None if disable_shared_mem: _, _, _, gpb, _ = load_partition(tmpdir / 'test_in_subgraph.json', rank) dist_graph = DistGraph("rpc_ip_config.txt", "test_in_subgraph", gpb=gpb) sampled_graph = dgl.distributed.in_subgraph(dist_graph, nodes) dgl.distributed.exit_client() return sampled_graph
def start_sample_client(rank, tmpdir, disable_shared_mem): gpb = None if disable_shared_mem: _, _, _, gpb, _ = load_partition(tmpdir / 'test_sampling.json', rank) dist_graph = DistGraph("rpc_ip_config.txt", "test_sampling", gpb=gpb) sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3) dgl.distributed.exit_client() return sampled_graph
def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes): import dgl gpb = None if disable_shared_mem: _, _, _, gpb = load_partition(tmpdir / 'test_in_subgraph.json', rank) dist_graph = DistGraph("rpc_ip_config.txt", "test_in_subgraph", gpb=gpb) sampled_graph = dgl.distributed.in_subgraph(dist_graph, nodes) dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() return sampled_graph
def start_client(rank, tmpdir): import dgl _, _, _, gpb = load_partition(tmpdir / 'test_sampling.json', rank) dist_graph = DistGraph("rpc_sampling_ip_config.txt", "test_sampling", gpb=gpb) sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3) dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() return sampled_graph
def start_hetero_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3, nodes={'n3': [0, 10, 99, 66, 124, 208]}, etype_sorted=False): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['n1'].data assert 'feat' not in dist_graph.nodes['n2'].data assert 'feat' not in dist_graph.nodes['n3'].data if dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form='eid') letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() try: sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout, etype_sorted=etype_sorted) block = dgl.to_block(sampled_graph, nodes) block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] except Exception as e: print(e) block = None dgl.distributed.exit_client() return block, gpb
def test_standalone(): os.environ['DGL_DIST_MODE'] = 'standalone' g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_3' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') dist_g = DistGraph("kv_ip_config.txt", graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name)) check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes): gpb = None dgl.distributed.initialize("rpc_ip_config.txt") if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_in_subgraph.json', rank) dist_graph = DistGraph("test_in_subgraph", gpb=gpb) try: sampled_graph = dgl.distributed.in_subgraph(dist_graph, nodes) except Exception as e: print(e) sampled_graph = None dgl.distributed.exit_client() return sampled_graph
def start_client(rank, tmpdir, disable_shared_mem, num_workers, drop_last): import dgl import torch as th os.environ['DGL_DIST_MODE'] = 'distributed' dgl.distributed.initialize("mp_ip_config.txt", num_workers=4) gpb = None if disable_shared_mem: _, _, _, gpb, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) dist_graph = DistGraph("mp_ip_config.txt", "test_mp", gpb=gpb) # Create sampler sampler = NeighborSampler(dist_graph, [5, 10], dgl.distributed.sample_neighbors) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = DistDataLoader(dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=False, drop_last=drop_last) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for epoch in range(2): for idx, blocks in zip(range(0, num_nodes_to_sample, batch_size), dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id) assert np.all(F.asnumpy(has_edges)) print(np.unique(np.sort(F.asnumpy(dst_nodes_id)))) max_nid.append(np.max(F.asnumpy(dst_nodes_id))) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) if drop_last: assert np.max( max_nid ) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size else: assert np.max(max_nid) == num_nodes_to_sample - 1 dgl.distributed.exit_client( ) # this is needed since there's two test here in one process
def test_standalone(): os.environ['DGL_DIST_MODE'] = 'standalone' g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_3' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') dgl.distributed.initialize("kv_ip_config.txt") dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name)) check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges()) dgl.distributed.exit_client() # this is needed since there's two test here in one process
def start_get_degrees_client(rank, tmpdir, disable_shared_mem, nids=None): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_get_degrees.json', rank) dgl.distributed.initialize("rpc_ip_config.txt", 1) dist_graph = DistGraph("test_get_degrees", gpb=gpb) try: in_deg = dist_graph.in_degrees(nids) all_in_deg = dist_graph.in_degrees() out_deg = dist_graph.out_degrees(nids) all_out_deg = dist_graph.out_degrees() except Exception as e: print(e) in_deg, out_deg, all_in_deg, all_out_deg = None, None, None, None dgl.distributed.exit_client() return in_deg, out_deg, all_in_deg, all_out_deg
def start_dist_neg_dataloader(rank, tmpdir, num_server, num_workers, orig_nid, groundtruth_g): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt") gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_edges_to_sample = 202 batch_size = 32 dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') assert len(dist_graph.ntypes) == len(groundtruth_g.ntypes) assert len(dist_graph.etypes) == len(groundtruth_g.etypes) if len(dist_graph.etypes) == 1: train_eid = th.arange(num_edges_to_sample) else: train_eid = {dist_graph.etypes[0]: th.arange(num_edges_to_sample)} for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) num_negs = 5 sampler = dgl.dataloading.MultiLayerNeighborSampler([5, 10]) negative_sampler = dgl.dataloading.negative_sampler.Uniform(num_negs) dataloader = dgl.dataloading.DistEdgeDataLoader( dist_graph, train_eid, sampler, batch_size=batch_size, negative_sampler=negative_sampler, shuffle=True, drop_last=False, num_workers=num_workers) for _ in range(2): for _, (_, pos_graph, neg_graph, blocks) in zip(range(0, num_edges_to_sample, batch_size), dataloader): block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] src_nodes_id = orig_nid[src_type][src_nodes_id] dst_nodes_id = orig_nid[dst_type][dst_nodes_id] has_edges = groundtruth_g.has_edges_between(src_nodes_id, dst_nodes_id, etype=etype) assert np.all(F.asnumpy(has_edges)) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy(pos_graph.nodes[dst_type].data[dgl.NID])) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy(neg_graph.nodes[dst_type].data[dgl.NID])) assert pos_graph.num_edges() * num_negs == neg_graph.num_edges( ) del dataloader dgl.distributed.exit_client( ) # this is needed since there's two test here in one process
def start_edge_dataloader(rank, tmpdir, num_server, num_workers, orig_nid, orig_eid, groundtruth_g): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt") gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_edges_to_sample = 202 batch_size = 32 dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') assert len(dist_graph.ntypes) == len(groundtruth_g.ntypes) assert len(dist_graph.etypes) == len(groundtruth_g.etypes) if len(dist_graph.etypes) == 1: train_eid = th.arange(num_edges_to_sample) else: train_eid = {dist_graph.etypes[0]: th.arange(num_edges_to_sample)} for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) # Create sampler sampler = dgl.dataloading.MultiLayerNeighborSampler([5, 10]) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = dgl.dataloading.EdgeDataLoader(dist_graph, train_eid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers) for epoch in range(2): for idx, (input_nodes, pos_pair_graph, blocks) in zip(range(0, num_edges_to_sample, batch_size), dataloader): block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[ dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[ dgl.NID][o_dst] src_nodes_id = orig_nid[src_type][src_nodes_id] dst_nodes_id = orig_nid[dst_type][dst_nodes_id] has_edges = groundtruth_g.has_edges_between(src_nodes_id, dst_nodes_id, etype=etype) assert np.all(F.asnumpy(has_edges)) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F. asnumpy(pos_pair_graph.nodes[dst_type].data[dgl.NID])) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) del dataloader dgl.distributed.exit_client( ) # this is needed since there's two test here in one process
def run_client(graph_name, part_id, num_nodes, num_edges): time.sleep(5) gpb = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test sparse emb try: new_shape = (g.number_of_nodes(), 1) emb = SparseNodeEmbedding(g, 'emb1', new_shape, emb_init) lr = 0.001 optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats = emb(nids) assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) grad_sum = dgl.distributed.DistTensor(g, 'node:emb1_sum', policy) assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1))) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) emb = SparseNodeEmbedding(g, 'emb2', new_shape, emb_init) optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats1 = emb(nids) feats2 = emb(nids) feats = F.cat([feats1, feats2], 0) assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * math.sqrt(2) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) except NotImplementedError as e: pass # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids # clean up dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() print('end')
def run_client(graph_name, barrier, num_nodes, num_edges): barrier.wait() g = DistGraph(server_namebook, graph_name) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids g.shut_down() print('end')
def run_client(graph_name, part_id, num_nodes, num_edges): gpb = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids # clean up dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() print('end')