def run_client(graph_name, part_id, num_nodes, num_edges): gpb = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids # clean up dgl.distributed.shutdown_servers() dgl.distributed.finalize_client() print('end')
def test_random_choice(): # test 1 a = F.arange(0, 100) x = dgl.random.choice(a, 10, replace=True, prob=None) assert len(x) == 10 for i in range(len(x)): assert F.asnumpy(x[i]) >= 0 and F.asnumpy(x[i]) < 100 # test 2, replace=False, small num a = F.arange(0, 100) x = dgl.random.choice(a, 10, replace=False, prob=None) assert len(x) == 10 for i in range(len(x)): assert F.asnumpy(x[i]) >= 0 and F.asnumpy(x[i]) < 100 # test 3, replace=False, large num a = F.arange(0, 100) x = dgl.random.choice(a, 100, replace=False, prob=None) assert len(x) == 100 assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a)) # test 4, first arg is integer x = dgl.random.choice(100, 100, replace=False, prob=None) assert len(x) == 100 assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a)) # test 5, with prob prob = np.ones((100, )) prob[37:40] = 0. prob -= prob.min() prob /= prob.sum() prob = F.tensor(prob) x = dgl.random.choice(100, 97, replace=False, prob=prob) assert len(x) == 97 for i in range(len(x)): assert F.asnumpy(x[i]) < 37 or F.asnumpy(x[i]) >= 40
def test_edge_subgraph(): # Test when the graph has no node data and edge data. g = generate_graph(add_data=False) eid = [0, 2, 3, 6, 7, 9] sg = g.edge_subgraph(eid) sg.ndata['h'] = F.arange(0, sg.number_of_nodes()) sg.edata['h'] = F.arange(0, sg.number_of_edges())
def test_segment_reduce(reducer): ctx = F.ctx() value = F.tensor(np.random.rand(10, 5)) v1 = F.attach_grad(F.clone(value)) v2 = F.attach_grad(F.clone(value)) seglen = F.tensor([2, 3, 0, 4, 1, 0, 0]) u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx) v = F.repeat(F.copy_to(F.arange(0, len(seglen), F.int32), ctx), seglen, dim=0) num_nodes = {'_U': len(u), '_V': len(seglen)} g = dgl.convert.heterograph({('_U', '_E', '_V'): (u, v)}, num_nodes_dict=num_nodes) with F.record_grad(): rst1 = gspmm(g, 'copy_lhs', reducer, v1, None) if reducer in ['max', 'min']: rst1 = F.replace_inf_with_zero(rst1) F.backward(F.reduce_sum(rst1)) grad1 = F.grad(v1) with F.record_grad(): rst2 = segment_reduce(seglen, v2, reducer=reducer) F.backward(F.reduce_sum(rst2)) assert F.allclose(rst1, rst2) print('forward passed') grad2 = F.grad(v2) assert F.allclose(grad1, grad2) print('backward passed')
def test_server_client(): g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'test' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp') # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. barrier = mp.Barrier(2) serv_ps = [] for serv_id in range(1): p = Process(target=run_server, args=(graph_name, serv_id, 1, barrier)) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(1): print('start client', cli_id) p = Process(target=run_client, args=(graph_name, barrier, g.number_of_nodes(), g.number_of_edges())) p.start() cli_ps.append(p) for p in cli_ps: p.join() print('clients have terminated')
def test_graph_partition_book(): g = create_random_graph(10000) g.ndata['labels'] = F.arange(0, g.number_of_nodes()) g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10)) num_parts = 4 num_hops = 2 partition_graph(g, 'gpb_test', num_parts, '/tmp/gpb', num_hops=num_hops, part_method='metis') for i in range(num_parts): part_g, node_feats, edge_feats, meta = load_partition('/tmp/gpb/gpb_test.json', i) num_nodes, num_edges, node_map, edge_map, num_partitions = meta gpb = GraphPartitionBook(part_id=i, num_parts=num_partitions, node_map=node_map, edge_map=edge_map, part_graph=part_g) assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert np.all(F.asnumpy(gpb.nid2partid(F.arange(0, len(node_map)))) == node_map) assert np.all(F.asnumpy(gpb.eid2partid(F.arange(0, len(edge_map)))) == edge_map) assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes'] assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges'] local_nid = gpb.nid2localnid(part_g.ndata[dgl.NID], i) assert np.all(F.asnumpy(local_nid) == F.asnumpy(F.arange(0, len(local_nid)))) local_eid = gpb.eid2localeid(part_g.edata[dgl.EID], i) assert np.all(F.asnumpy(local_eid) == F.asnumpy(F.arange(0, len(local_eid))))
def check_server_client(shared_mem): prepare_dist() g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_2' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context('spawn') for serv_id in range(1): p = ctx.Process(target=run_server, args=(graph_name, serv_id, 1, shared_mem)) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(1): print('start client', cli_id) p = ctx.Process(target=run_client, args=(graph_name, cli_id, g.number_of_nodes(), g.number_of_edges())) p.start() cli_ps.append(p) for p in cli_ps: p.join() for p in serv_ps: p.join() print('clients have terminated')
def _test_map_to_subgraph(): g = dgl.DGLGraph() g.add_nodes(10) g.add_edges(F.arange(0, 9), F.arange(1, 10)) h = g.subgraph([0, 1, 2, 5, 8]) v = h.map_to_subgraph_nid([0, 8, 2]) assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2]))
def _test_layer_sampler(prefetch=False): g = generate_rand_graph(100) nid = g.nodes() src, dst, eid = g.all_edges(form='all', order='eid') n_batches = 5 batch_size = 50 seed_batches = [ np.sort(np.random.choice(F.asnumpy(nid), batch_size, replace=False)) for i in range(n_batches) ] seed_nodes = np.hstack(seed_batches) layer_sizes = [50] * 3 LayerSampler = getattr(dgl.contrib.sampling, 'LayerSampler') sampler = LayerSampler(g, batch_size, layer_sizes, 'in', seed_nodes=seed_nodes, num_workers=4, prefetch=prefetch) for sub_g in sampler: assert all( sub_g.layer_size(i) < size for i, size in enumerate(layer_sizes)) sub_nid = F.arange(0, sub_g.number_of_nodes()) assert all( np.all(np.isin(F.asnumpy(sub_g.layer_nid(i)), F.asnumpy(sub_nid))) for i in range(sub_g.num_layers)) assert np.all( np.isin(F.asnumpy(sub_g.map_to_parent_nid(sub_nid)), F.asnumpy(nid))) sub_eid = F.arange(0, sub_g.number_of_edges()) assert np.all( np.isin(F.asnumpy(sub_g.map_to_parent_eid(sub_eid)), F.asnumpy(eid))) assert any( np.all( np.sort(F.asnumpy(sub_g.layer_parent_nid(-1))) == seed_batch) for seed_batch in seed_batches) sub_src, sub_dst = sub_g.all_edges(order='eid') for i in range(sub_g.num_blocks): block_eid = sub_g.block_eid(i) block_src = sub_g.map_to_parent_nid(sub_src[block_eid]) block_dst = sub_g.map_to_parent_nid(sub_dst[block_eid]) block_parent_eid = sub_g.block_parent_eid(i) block_parent_src = src[block_parent_eid] block_parent_dst = dst[block_parent_eid] assert np.all(F.asnumpy(block_src == block_parent_src)) n_layers = sub_g.num_layers sub_n = sub_g.number_of_nodes() assert sum(F.shape(sub_g.layer_nid(i))[0] for i in range(n_layers)) == sub_n n_blocks = sub_g.num_blocks sub_m = sub_g.number_of_edges() assert sum(F.shape(sub_g.block_eid(i))[0] for i in range(n_blocks)) == sub_m
def start_dist_dataloader(rank, tmpdir, num_server, drop_last): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt") gpb = None disable_shared_mem = num_server > 0 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') orig_nid = F.arange(0, dist_graph.number_of_nodes()) orig_eid = F.arange(0, dist_graph.number_of_edges()) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) if 'orig_id' in part.ndata: orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] if 'orig_id' in part.edata: orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] # Create sampler sampler = NeighborSampler(dist_graph, [5, 10], dgl.distributed.sample_neighbors) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=False, drop_last=drop_last) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for epoch in range(2): for idx, blocks in zip(range(0, num_nodes_to_sample, batch_size), dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] max_nid.append(np.max(F.asnumpy(dst_nodes_id))) src_nodes_id = orig_nid[src_nodes_id] dst_nodes_id = orig_nid[dst_nodes_id] has_edges = groundtruth_g.has_edges_between(src_nodes_id, dst_nodes_id) assert np.all(F.asnumpy(has_edges)) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) if drop_last: assert np.max(max_nid) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size else: assert np.max(max_nid) == num_nodes_to_sample - 1 del dataloader dgl.distributed.exit_client() # this is needed since there's two test here in one process
def run_client(graph_name, barrier, num_nodes, num_edges): barrier.wait() g = DistGraph(server_namebook, graph_name) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids g.shut_down() print('end')
def check_partition(reshuffle): g = create_random_graph(10000) g.ndata['labels'] = F.arange(0, g.number_of_nodes()) g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10)) num_parts = 4 num_hops = 2 partition_graph(g, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method='metis', reshuffle=reshuffle) part_sizes = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb = load_partition('/tmp/partition/test.json', i) # Check the metadata assert gpb._num_nodes() == g.number_of_nodes() assert gpb._num_edges() == g.number_of_edges() assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes'] assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges'] part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges'])) local_nid = gpb.nid2localnid(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']), i) assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) local_eid = gpb.eid2localeid(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']), i) assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid))) # Check the node map. local_nodes = F.asnumpy(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])) local_nodes1 = F.asnumpy(gpb.partid2nids(i)) assert np.all(np.sort(local_nodes) == np.sort(local_nodes1)) # Check the edge map. local_edges = F.asnumpy(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])) local_edges1 = F.asnumpy(gpb.partid2eids(i)) assert np.all(np.sort(local_edges) == np.sort(local_edges1)) for name in ['labels', 'feats']: assert name in node_feats assert node_feats[name].shape[0] == len(local_nodes) assert len(local_nodes) == len(node_feats[name]) assert np.all(F.asnumpy(g.ndata[name])[local_nodes] == F.asnumpy(node_feats[name])) assert len(edge_feats) == 0 if reshuffle: node_map = [] edge_map = [] for i, (num_nodes, num_edges) in enumerate(part_sizes): node_map.append(np.ones(num_nodes) * i) edge_map.append(np.ones(num_edges) * i) node_map = np.concatenate(node_map) edge_map = np.concatenate(edge_map) assert np.all(F.asnumpy(gpb.nid2partid(F.arange(0, len(node_map)))) == node_map) assert np.all(F.asnumpy(gpb.eid2partid(F.arange(0, len(edge_map)))) == edge_map)
def check_server_client_hierarchy(shared_mem, num_servers, num_clients): prepare_dist() g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_2' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph', num_trainers_per_machine=num_clients) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context('spawn') for serv_id in range(num_servers): p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, num_clients, shared_mem)) serv_ps.append(p) p.start() cli_ps = [] manager = mp.Manager() return_dict = manager.dict() node_mask = np.zeros((g.number_of_nodes(),), np.int32) edge_mask = np.zeros((g.number_of_edges(),), np.int32) nodes = np.random.choice(g.number_of_nodes(), g.number_of_nodes() // 10, replace=False) edges = np.random.choice(g.number_of_edges(), g.number_of_edges() // 10, replace=False) node_mask[nodes] = 1 edge_mask[edges] = 1 nodes = np.sort(nodes) edges = np.sort(edges) for cli_id in range(num_clients): print('start client', cli_id) p = ctx.Process(target=run_client_hierarchy, args=(graph_name, 0, num_servers, node_mask, edge_mask, return_dict)) p.start() cli_ps.append(p) for p in cli_ps: p.join() for p in serv_ps: p.join() nodes1 = [] edges1 = [] for n, e in return_dict.values(): nodes1.append(n) edges1.append(e) nodes1, _ = F.sort_1d(F.cat(nodes1, 0)) edges1, _ = F.sort_1d(F.cat(edges1, 0)) assert np.all(F.asnumpy(nodes1) == nodes) assert np.all(F.asnumpy(edges1) == edges) print('clients have terminated')
def test_standalone(): os.environ['DGL_DIST_MODE'] = 'standalone' g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_3' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') dist_g = DistGraph("kv_ip_config.txt", graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name)) check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
def check_dist_emb_server_client(shared_mem, num_servers, num_clients, num_groups=1): prepare_dist(num_servers) g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = f'check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context('spawn') keep_alive = num_groups > 1 for serv_id in range(num_servers): p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, num_clients, shared_mem, keep_alive)) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(num_clients): for group_id in range(num_groups): print('start client[{}] for group[{}]'.format(cli_id, group_id)) p = ctx.Process(target=run_emb_client, args=(graph_name, 0, num_servers, num_clients, g.number_of_nodes(), g.number_of_edges(), group_id)) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 if keep_alive: for p in serv_ps: assert p.is_alive() # force shutdown server dgl.distributed.shutdown_servers("kv_ip_config.txt", num_servers) for p in serv_ps: p.join() print('clients have terminated')
def test_standalone(): os.environ['DGL_DIST_MODE'] = 'standalone' g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_3' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') dgl.distributed.initialize("kv_ip_config.txt") dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name)) check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges()) dgl.distributed.exit_client() # this is needed since there's two test here in one process
def create_random_hetero(): num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020} etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', random_state=100) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1) g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1) return g
def run_client(graph_name, barrier, num_nodes, num_edges): barrier.wait() g = DistGraph(server_namebook, graph_name) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) g.shut_down() print('end')
def test_edge_removal(idtype): g = dgl.DGLGraph() g = g.astype(idtype).to(F.ctx()) g.add_nodes(5) for i in range(5): for j in range(5): g.add_edge(i, j) g.edata['id'] = F.arange(0, 25) # remove edges g.remove_edges(range(13, 20)) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 18 assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)))) assert dgl.NID not in g.ndata assert dgl.EID not in g.edata # add edges g.add_edge(3, 3) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 19 assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)) + [0])) # remove edges g.remove_edges(range(2, 10), store_ids=True) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 11 assert F.array_equal(g.edata['id'], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0])) assert dgl.NID in g.ndata assert dgl.EID in g.edata
def test_basic(): num_layers = 2 g = generate_rand_graph(100, connect_more=True) nf = create_full_nodeflow(g, num_layers) assert nf.number_of_nodes() == g.number_of_nodes() * (num_layers + 1) assert nf.number_of_edges() == g.number_of_edges() * num_layers assert nf.num_layers == num_layers + 1 assert nf.layer_size(0) == g.number_of_nodes() assert nf.layer_size(1) == g.number_of_nodes() check_basic(g, nf) parent_nids = F.copy_to(F.arange(0, g.number_of_nodes()), F.cpu()) nids = nf.map_from_parent_nid(0, parent_nids, remap_local=True) assert_array_equal(F.asnumpy(nids), F.asnumpy(parent_nids)) # should also work for negative layer ids for l in range(-1, -num_layers, -1): nids1 = nf.map_from_parent_nid(l, parent_nids, remap_local=True) nids2 = nf.map_from_parent_nid(l + num_layers, parent_nids, remap_local=True) assert_array_equal(F.asnumpy(nids1), F.asnumpy(nids2)) g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 check_basic(g, nf) g = generate_rand_graph(100, add_self_loop=True) nf = create_mini_batch(g, num_layers, add_self_loop=True) assert nf.num_layers == num_layers + 1 check_basic(g, nf)
def test_partition(): g = create_random_graph(10000) g.ndata['labels'] = F.arange(0, g.number_of_nodes()) g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10)) num_parts = 4 num_hops = 2 partition_graph(g, 'test', num_parts, '/tmp', num_hops=num_hops, part_method='metis') for i in range(num_parts): part_g, node_feats, edge_feats, meta = load_partition('/tmp/test.json', i) num_nodes, num_edges, node_map, edge_map = meta # Check the metadata assert num_nodes == g.number_of_nodes() assert num_edges == g.number_of_edges() # Check the node map. local_nodes = np.nonzero(node_map == i)[0] part_ids = node_map[F.asnumpy(part_g.ndata[dgl.NID])] local_nodes1 = F.asnumpy(part_g.ndata[dgl.NID])[part_ids == i] assert np.all(local_nodes == local_nodes1) # Check the edge map. assert np.all(edge_map >= 0) local_edges = np.nonzero(edge_map == i)[0] part_ids = edge_map[F.asnumpy(part_g.edata[dgl.EID])] local_edges1 = F.asnumpy(part_g.edata[dgl.EID])[part_ids == i] assert np.all(local_edges == np.sort(local_edges1)) for name in ['labels', 'feats']: assert name in node_feats assert node_feats[name].shape[0] == len(local_nodes) assert len(local_nodes) == len(node_feats[name]) assert np.all(F.asnumpy(g.ndata[name])[local_nodes] == F.asnumpy(node_feats[name])) assert len(edge_feats) == 0
def test_edge_coarsening(idtype, g, weight, relabel): num_nodes = g.num_nodes() g = dgl.to_bidirected(g) g = g.astype(idtype).to(F.ctx()) edge_weight = None if weight: edge_weight = F.abs(F.randn((g.num_edges(),))).to(F.ctx()) node_labels = neighbor_matching(g, edge_weight, relabel_idx=relabel) unique_ids, counts = th.unique(node_labels, return_counts=True) num_result_ids = unique_ids.size(0) # shape correct assert node_labels.shape == (g.num_nodes(),) # all nodes marked assert F.reduce_sum(node_labels < 0).item() == 0 # number of unique node ids correct. assert num_result_ids >= num_nodes // 2 and num_result_ids <= num_nodes # each unique id has <= 2 nodes assert F.reduce_sum(counts > 2).item() == 0 # if two nodes have the same id, they must be neighbors idxs = F.arange(0, num_nodes, idtype) for l in unique_ids: l = l.item() idx = idxs[(node_labels == l)] if idx.size(0) == 2: u, v = idx[0].item(), idx[1].item() assert g.has_edges_between(u, v)
def test_edge_removal(): g = dgl.DGLGraph() g.add_nodes(5) for i in range(5): for j in range(5): g.add_edge(i, j) g.edata['id'] = F.arange(0, 25) # remove edges g.remove_edges(range(13, 20)) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 18 assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)))) # add edges g.add_edge(3, 3) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 19 assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)) + [0])) # remove edges g.remove_edges(range(2, 10)) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 11 assert F.array_equal(g.edata['id'], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0]))
def check_dist_graph_empty(g, num_clients, num_nodes, num_edges): # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test init node data new_shape = (g.number_of_nodes(), 2) g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32) nids = F.arange(0, int(g.number_of_nodes() / 2)) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # create a tensor and destroy a tensor and create it again. test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init) del test3 test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3') del test3 # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert g.node_attr_schemes()['test1'].dtype == F.int32 print('end')
def check_dist_emb(g, num_clients, num_nodes, num_edges): from dgl.distributed.optim import SparseAdagrad from dgl.distributed import DistEmbedding # Test sparse emb try: emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init) nids = F.arange(0, int(g.number_of_nodes())) lr = 0.001 optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats = emb(nids) assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) if num_clients == 1: assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(), 1), F.float32, 'emb1_sum', policy) if num_clients == 1: assert np.all( F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) * num_clients) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init) with F.no_grad(): feats1 = emb(nids) assert np.all(F.asnumpy(feats1) == 0) optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats1 = emb(nids) feats2 = emb(nids) feats = F.cat([feats1, feats2], 0) assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() with F.no_grad(): feats = emb(nids) if num_clients == 1: assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * 1 * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) except NotImplementedError as e: pass except Exception as e: print(e) sys.exit(-1)
def check_hetero_partition(hg, part_method): hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1')) hg.nodes['n1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_nodes('n1'), 10), F.float32) hg.edges['r1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_edges('r1'), 10), F.float32) num_parts = 4 num_hops = 1 orig_nids, orig_eids = partition_graph(hg, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=True, return_mapping=True) assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype) for etype in hg.etypes: assert len(orig_eids[etype]) == hg.number_of_edges(etype) parts = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( '/tmp/partition/test.json', i) # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. part_src_ids, part_dst_ids = part_g.edges() # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # These are original per-type IDs. for etype_id, etype in enumerate(hg.etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask(src_ntype_ids, etype_ids == etype_id) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids, etype_ids == etype_id) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) verify_graph_feats(hg, part_g, node_feats) verify_hetero_graph(hg, parts)
def test_node_removal(idtype): g = dgl.DGLGraph() g = g.astype(idtype).to(F.ctx()) g.add_nodes(10) g.add_edge(0, 0) assert g.number_of_nodes() == 10 g.ndata['id'] = F.arange(0, 10) # remove nodes g.remove_nodes(range(4, 7)) assert g.number_of_nodes() == 7 assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9])) assert dgl.NID not in g.ndata assert dgl.EID not in g.edata # add nodes g.add_nodes(3) assert g.number_of_nodes() == 10 assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0])) # remove nodes g.remove_nodes(range(1, 4), store_ids=True) assert g.number_of_nodes() == 7 assert F.array_equal(g.ndata['id'], F.tensor([0, 7, 8, 9, 0, 0, 0])) assert dgl.NID in g.ndata assert dgl.EID in g.edata
def test_view(): # test data view g = create_test_heterograph() f1 = F.randn((3, 6)) g.nodes['user'].data['h'] = f1 # ok f2 = g.nodes['user'].data['h'] assert F.array_equal(f1, f2) assert F.array_equal(F.tensor(g.nodes('user')), F.arange(0, 3)) f3 = F.randn((2, 4)) g.edges['user', 'follows', 'user'].data['h'] = f3 f4 = g.edges['user', 'follows', 'user'].data['h'] f5 = g.edges['follows'].data['h'] assert F.array_equal(f3, f4) assert F.array_equal(f3, f5) assert F.array_equal(F.tensor(g.edges(etype='follows', form='eid')), F.arange(0, 2))
def test_group_apply_edges2(): m = ssp.random(10, 10, 0.2) g = DGLGraph(m, readonly=True) g.ndata['deg'] = g.in_degrees() g.ndata['id'] = F.arange(0, g.number_of_nodes()) g.edata['id'] = F.arange(0, g.number_of_edges()) def apply(edges): w = edges.data['id'] n_nodes, deg = w.shape dst = edges.dst['id'][:, 0] eid1 = F.asnumpy(g.in_edges(dst, 'eid')).reshape(n_nodes, deg).sort(1) eid2 = F.asnumpy(edges.data['id']).sort(1) assert np.array_equal(eid1, eid2) return {'id2': w} g.group_apply_edges('dst', apply, inplace=True)
def test_edge_prediction_sampler(idtype): g = create_test_graph(idtype) sampler = NeighborSampler([10, 10]) sampler = as_edge_prediction_sampler( sampler, negative_sampler=negative_sampler.Uniform(1)) seeds = F.copy_to(F.arange(0, 2, dtype=idtype), ctx=F.ctx()) # just a smoke test to make sure we don't fail internal assertions result = sampler.sample(g, {'follows': seeds})