def test_row1(): # test row getter/setter data = create_test_data() f = FrameRef(Frame(data)) # getter # test non-duplicate keys rowid = Index(F.tensor([0, 2])) rows = f[rowid] for k, v in rows.items(): assert tuple(F.shape(v)) == (len(rowid), D) assert F.allclose(v, F.gather_row(data[k], F.tensor(rowid.tousertensor()))) # test duplicate keys rowid = Index(F.tensor([8, 2, 2, 1])) rows = f[rowid] for k, v in rows.items(): assert tuple(F.shape(v)) == (len(rowid), D) assert F.allclose(v, F.gather_row(data[k], F.tensor(rowid.tousertensor()))) # setter rowid = Index(F.tensor([0, 2, 4])) vals = {'a1' : F.zeros((len(rowid), D)), 'a2' : F.zeros((len(rowid), D)), 'a3' : F.zeros((len(rowid), D)), } f[rowid] = vals for k, v in f[rowid].items(): assert F.allclose(v, F.zeros((len(rowid), D))) # setting rows with new column should raise error with error initializer f.set_initializer(lambda shape, dtype : assert_(False)) def failed_update_rows(): vals['a4'] = F.ones((len(rowid), D)) f[rowid] = vals assert check_fail(failed_update_rows)
def test_column_subcolumn(): data = F.copy_to( F.tensor([[1., 1., 1., 1.], [0., 2., 9., 0.], [3., 2., 1., 0.], [1., 1., 1., 1.], [0., 2., 4., 0.]]), F.ctx()) original = Column(data) # subcolumn from cpu context i1 = F.tensor([0, 2, 1, 3], dtype=F.int64) l1 = original.subcolumn(i1) assert len(l1) == i1.shape[0] assert F.array_equal(l1.data, F.gather_row(data, i1)) # next subcolumn from target context i2 = F.copy_to(F.tensor([0, 2], dtype=F.int64), F.ctx()) l2 = l1.subcolumn(i2) assert len(l2) == i2.shape[0] i1i2 = F.copy_to(F.gather_row(i1, F.copy_to(i2, F.context(i1))), F.ctx()) assert F.array_equal(l2.data, F.gather_row(data, i1i2)) # next subcolumn also from target context i3 = F.copy_to(F.tensor([1], dtype=F.int64), F.ctx()) l3 = l2.subcolumn(i3) assert len(l3) == i3.shape[0] i1i2i3 = F.copy_to(F.gather_row(i1i2, F.copy_to(i3, F.context(i1i2))), F.ctx()) assert F.array_equal(l3.data, F.gather_row(data, i1i2i3))
def test_partition_with_halo(): g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True) node_part = np.random.choice(4, g.number_of_nodes()) subgs = dgl.transform.partition_graph_with_halo(g, node_part, 2) for part_id, subg in subgs.items(): node_ids = np.nonzero(node_part == part_id)[0] lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0] nf = get_nodeflow(g, node_ids, 2) lnf = get_nodeflow(subg, lnode_ids, 2) for i in range(nf.num_layers): layer_nids1 = F.asnumpy(nf.layer_parent_nid(i)) layer_nids2 = lnf.layer_parent_nid(i) layer_nids2 = F.asnumpy(F.gather_row(subg.parent_nid, layer_nids2)) assert np.all(np.sort(layer_nids1) == np.sort(layer_nids2)) for i in range(nf.num_blocks): block_eids1 = F.asnumpy(nf.block_parent_eid(i)) block_eids2 = lnf.block_parent_eid(i) block_eids2 = F.asnumpy(F.gather_row(subg.parent_eid, block_eids2)) assert np.all(np.sort(block_eids1) == np.sort(block_eids2)) subgs = dgl.transform.partition_graph_with_halo(g, node_part, 2, reshuffle=True) for part_id, subg in subgs.items(): node_ids = np.nonzero(node_part == part_id)[0] lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0] assert np.all(np.sort(F.asnumpy(subg.ndata['orig_id'])[lnode_ids]) == node_ids)
def sort_edges(edges): edges = [e.tousertensor() for e in edges] if np.prod(edges[2].shape) > 0: val, idx = F.sort_1d(edges[2]) return (F.gather_row(edges[0], idx), F.gather_row(edges[1], idx), F.gather_row(edges[2], idx)) else: return (edges[0], edges[1], edges[2])
def check_rpc_hetero_etype_sampling_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero(dense=True) num_parts = num_server num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling')) p.start() time.sleep(1) pserver_list.append(p) fanout = 3 block, gpb = start_hetero_etype_sample_client(0, tmpdir, num_server > 1, fanout, nodes={'n3': [0, 10, 99, 66, 124, 208]}) print("Done sampling") for p in pserver_list: p.join() src, dst = block.edges(etype=('n1', 'r2', 'n3')) assert len(src) == 18 src, dst = block.edges(etype=('n2', 'r3', 'n3')) assert len(src) == 18 orig_nid_map = {ntype: F.zeros((g.number_of_nodes(ntype),), dtype=F.int64) for ntype in g.ntypes} orig_eid_map = {etype: F.zeros((g.number_of_edges(etype),), dtype=F.int64) for etype in g.etypes} for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) ntype_ids, type_nids = gpb.map_to_per_ntype(part.ndata[dgl.NID]) for ntype_id, ntype in enumerate(g.ntypes): idx = ntype_ids == ntype_id F.scatter_row_inplace(orig_nid_map[ntype], F.boolean_mask(type_nids, idx), F.boolean_mask(part.ndata['orig_id'], idx)) etype_ids, type_eids = gpb.map_to_per_etype(part.edata[dgl.EID]) for etype_id, etype in enumerate(g.etypes): idx = etype_ids == etype_id F.scatter_row_inplace(orig_eid_map[etype], F.boolean_mask(type_eids, idx), F.boolean_mask(part.edata['orig_id'], idx)) for src_type, etype, dst_type in block.canonical_etypes: src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) shuffled_eid = block.edges[etype].data[dgl.EID] orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) orig_eid = F.asnumpy(F.gather_row(orig_eid_map[etype], shuffled_eid)) # Check the node Ids and edge Ids. orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst)
def check_hetero_partition(hg, part_method): hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1')) hg.nodes['n1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_nodes('n1'), 10), F.float32) hg.edges['r1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_edges('r1'), 10), F.float32) num_parts = 4 num_hops = 1 orig_nids, orig_eids = partition_graph(hg, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=True, return_mapping=True) assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype) for etype in hg.etypes: assert len(orig_eids[etype]) == hg.number_of_edges(etype) parts = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( '/tmp/partition/test.json', i) # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. part_src_ids, part_dst_ids = part_g.edges() # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # These are original per-type IDs. for etype_id, etype in enumerate(hg.etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask(src_ntype_ids, etype_ids == etype_id) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids, etype_ids == etype_id) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) verify_graph_feats(hg, part_g, node_feats) verify_hetero_graph(hg, parts)
def check_mapping(g, fg): if len(fg.ntypes) == 1: SRC = DST = fg.ntypes[0] else: SRC = fg.ntypes[0] DST = fg.ntypes[1] etypes = F.asnumpy(fg.edata[dgl.ETYPE]).tolist() eids = F.asnumpy(fg.edata[dgl.EID]).tolist() for i, (etype, eid) in enumerate(zip(etypes, eids)): src_g, dst_g = g.find_edges([eid], g.canonical_etypes[etype]) src_fg, dst_fg = fg.find_edges([i]) # TODO(gq): I feel this code is quite redundant; can we just add new members (like # "induced_srcid") to returned heterograph object and not store them as features? assert src_g == F.gather_row(fg.nodes[SRC].data[dgl.NID], src_fg)[0] tid = F.asnumpy(F.gather_row(fg.nodes[SRC].data[dgl.NTYPE], src_fg)).item() assert g.canonical_etypes[etype][0] == g.ntypes[tid] assert dst_g == F.gather_row(fg.nodes[DST].data[dgl.NID], dst_fg)[0] tid = F.asnumpy(F.gather_row(fg.nodes[DST].data[dgl.NTYPE], dst_fg)).item() assert g.canonical_etypes[etype][2] == g.ntypes[tid]
def test_split_even(): prepare_dist() g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] all_nodes1 = [] all_nodes2 = [] all_edges1 = [] all_edges2 = [] for i in range(num_parts): dgl.distributed.set_num_client(num_parts) part_g, node_feats, edge_feats, gpb = load_partition('/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes = node_split(node_mask, gpb, i, force_even=True) all_nodes1.append(nodes) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) print('part {} get {} nodes and {} are in the partition'.format(i, len(nodes), len(subset))) dgl.distributed.set_num_client(num_parts * 2) nodes1 = node_split(node_mask, gpb, i * 2, force_even=True) nodes2 = node_split(node_mask, gpb, i * 2 + 1, force_even=True) nodes3 = F.cat([nodes1, nodes2], 0) all_nodes2.append(nodes3) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) print('intersection has', len(subset)) dgl.distributed.set_num_client(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges = edge_split(edge_mask, gpb, i, force_even=True) all_edges1.append(edges) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) print('part {} get {} edges and {} are in the partition'.format(i, len(edges), len(subset))) dgl.distributed.set_num_client(num_parts * 2) edges1 = edge_split(edge_mask, gpb, i * 2, force_even=True) edges2 = edge_split(edge_mask, gpb, i * 2 + 1, force_even=True) edges3 = F.cat([edges1, edges2], 0) all_edges2.append(edges3) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) print('intersection has', len(subset)) all_nodes1 = F.cat(all_nodes1, 0) all_edges1 = F.cat(all_edges1, 0) all_nodes2 = F.cat(all_nodes2, 0) all_edges2 = F.cat(all_edges2, 0) all_nodes = np.nonzero(node_mask)[0] all_edges = np.nonzero(edge_mask)[0] assert np.all(all_nodes == F.asnumpy(all_nodes1)) assert np.all(all_edges == F.asnumpy(all_edges1)) assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_edges == F.asnumpy(all_edges2))
def test_spmm(idtype, g, shp, msg, reducer): g = g.astype(idtype).to(F.ctx()) if dgl.backend.backend_name == 'tensorflow' and (reducer in ['min', 'max']): pytest.skip() # tensorflow dlpack has problem writing into int32 arrays on GPU. print(g) print(g.idtype) hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(),) + shp[0])) + 1) he = F.tensor(np.random.rand(*((g.number_of_edges(),) + shp[1])) + 1) print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he))) g.srcdata['x'] = F.attach_grad(F.clone(hu)) g.edata['w'] = F.attach_grad(F.clone(he)) print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer)) u = F.attach_grad(F.clone(hu)) e = F.attach_grad(F.clone(he)) with F.record_grad(): v = gspmm(g, msg, reducer, u, e) non_degree_indices = F.tensor( np.nonzero(F.asnumpy(g.in_degrees()) != 0)[0]) v = F.gather_row(v, non_degree_indices) if g.number_of_edges() > 0: F.backward(F.reduce_sum(v)) if msg != 'copy_rhs': grad_u = F.grad(u) if msg != 'copy_lhs': grad_e = F.grad(e) with F.record_grad(): g.update_all(udf_msg[msg], udf_reduce[reducer]) if g.number_of_edges() > 0: v1 = F.gather_row(g.dstdata['v'], non_degree_indices) assert F.allclose(v, v1) print('forward passed') F.backward(F.reduce_sum(v1)) if msg != 'copy_rhs': if reducer in ['min', 'max']: # there might be some numerical errors rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\ F.reduce_sum(F.abs(grad_u)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.srcdata['x']), grad_u) if msg != 'copy_lhs': if reducer in ['min', 'max']: rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\ F.reduce_sum(F.abs(grad_e)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.edata['w']), grad_e) print('backward passed') g.srcdata.pop('x') g.edata.pop('w') if 'v' in g.dstdata: g.dstdata.pop('v')
def test_split(): #prepare_dist() g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.GLOBAL_RANK = {i:i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK['default'] = {i:i for i in range(num_clients)} for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition('/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes1 = np.intersect1d(selected_nodes, F.asnumpy(local_nids)) nodes2 = node_split(node_mask, gpb, rank=i, force_even=False) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2))) local_nids = F.asnumpy(local_nids) for n in nodes1: assert n in local_nids set_roles(num_parts * 2) nodes3 = node_split(node_mask, gpb, rank=i * 2, force_even=False) nodes4 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=False) nodes5 = F.cat([nodes3, nodes4], 0) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5))) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges1 = np.intersect1d(selected_edges, F.asnumpy(local_eids)) edges2 = edge_split(edge_mask, gpb, rank=i, force_even=False) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2))) local_eids = F.asnumpy(local_eids) for e in edges1: assert e in local_eids set_roles(num_parts * 2) edges3 = edge_split(edge_mask, gpb, rank=i * 2, force_even=False) edges4 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=False) edges5 = F.cat([edges3, edges4], 0) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5)))
def test_split(): prepare_dist() g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] for i in range(num_parts): dgl.distributed.set_num_client(num_parts) part_g, node_feats, edge_feats, gpb, _ = load_partition( '/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes1 = np.intersect1d(selected_nodes, F.asnumpy(local_nids)) nodes2 = node_split(node_mask, gpb, i, force_even=False) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2))) local_nids = F.asnumpy(local_nids) for n in nodes1: assert n in local_nids dgl.distributed.set_num_client(num_parts * 2) nodes3 = node_split(node_mask, gpb, i * 2, force_even=False) nodes4 = node_split(node_mask, gpb, i * 2 + 1, force_even=False) nodes5 = F.cat([nodes3, nodes4], 0) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5))) dgl.distributed.set_num_client(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges1 = np.intersect1d(selected_edges, F.asnumpy(local_eids)) edges2 = edge_split(edge_mask, gpb, i, force_even=False) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2))) local_eids = F.asnumpy(local_eids) for e in edges1: assert e in local_eids dgl.distributed.set_num_client(num_parts * 2) edges3 = edge_split(edge_mask, gpb, i * 2, force_even=False) edges4 = edge_split(edge_mask, gpb, i * 2 + 1, force_even=False) edges5 = F.cat([edges3, edges4], 0) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5)))
def verify_graph_feats(g, part, node_feats): for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) for name in g.nodes[ntype].data: if name in [dgl.NID, 'inner_node']: continue inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) min_nids = F.min(inner_nids, 0) orig_id = F.boolean_mask(part.ndata['orig_id'], inner_node_mask) true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) ndata = F.gather_row(node_feats[ntype + '/' + name], inner_nids - min_nids) assert np.all(F.asnumpy(ndata == true_feats))
def test_node_batch(): g = dgl.DGLGraph(nx.path_graph(20)) feat = F.randn((g.number_of_nodes(), 10)) g.ndata['x'] = feat # test all v = ALL n_repr = g.get_n_repr(v) nbatch = NodeBatch(g, v, n_repr) assert F.allclose(nbatch.data['x'], feat) assert nbatch.mailbox is None assert F.allclose(nbatch.nodes(), g.nodes()) assert nbatch.batch_size() == g.number_of_nodes() assert len(nbatch) == g.number_of_nodes() # test partial v = utils.toindex(F.tensor([0, 3, 5, 7, 9])) n_repr = g.get_n_repr(v) nbatch = NodeBatch(g, v, n_repr) assert F.allclose(nbatch.data['x'], F.gather_row(feat, F.tensor([0, 3, 5, 7, 9]))) assert nbatch.mailbox is None assert F.allclose(nbatch.nodes(), F.tensor([0, 3, 5, 7, 9])) assert nbatch.batch_size() == 5 assert len(nbatch) == 5
def test_mean_zero_degree(g, idtype): g = g.astype(idtype).to(F.ctx()) g.ndata['h'] = F.ones((g.number_of_nodes(), 3)) g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'x')) deg = F.asnumpy(g.in_degrees()) v = F.tensor(np.where(deg == 0)[0]) assert F.allclose(F.gather_row(g.ndata['x'], v), F.zeros((len(v), 3)))
def check_metis_partition(g, extra_hops): # partitions with 1-hop HALO nodes subgs = dgl.transform.metis_partition(g, 4, extra_cached_hops=extra_hops) num_inner_nodes = 0 num_inner_edges = 0 if subgs is not None: for part_id, subg in subgs.items(): lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0] ledge_ids = np.nonzero(F.asnumpy(subg.edata['inner_edge']))[0] num_inner_nodes += len(lnode_ids) num_inner_edges += len(ledge_ids) assert np.sum(F.asnumpy(subg.ndata['part_id']) == part_id) == len(lnode_ids) assert num_inner_nodes == g.number_of_nodes() print(g.number_of_edges() - num_inner_edges) if extra_hops == 0: return # partitions with 1-hop HALO nodes and reshuffling nodes subgs = dgl.transform.metis_partition(g, 4, extra_cached_hops=extra_hops, reshuffle=True) num_inner_nodes = 0 num_inner_edges = 0 edge_cnts = np.zeros((g.number_of_edges(),)) if subgs is not None: for part_id, subg in subgs.items(): lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0] ledge_ids = np.nonzero(F.asnumpy(subg.edata['inner_edge']))[0] num_inner_nodes += len(lnode_ids) num_inner_edges += len(ledge_ids) assert np.sum(F.asnumpy(subg.ndata['part_id']) == part_id) == len(lnode_ids) nids = F.asnumpy(subg.ndata[dgl.NID]) # ensure the local node Ids are contiguous. parent_ids = F.asnumpy(subg.ndata[dgl.NID]) parent_ids = parent_ids[:len(lnode_ids)] assert np.all(parent_ids == np.arange(parent_ids[0], parent_ids[-1] + 1)) # count the local edges. parent_ids = F.asnumpy(subg.edata[dgl.EID])[ledge_ids] edge_cnts[parent_ids] += 1 orig_ids = subg.ndata['orig_id'] inner_node = F.asnumpy(subg.ndata['inner_node']) for nid in range(subg.number_of_nodes()): neighs = subg.predecessors(nid) old_neighs1 = F.gather_row(orig_ids, neighs) old_nid = F.asnumpy(orig_ids[nid]) old_neighs2 = g.predecessors(old_nid) # If this is an inner node, it should have the full neighborhood. if inner_node[nid]: assert np.all(np.sort(F.asnumpy(old_neighs1)) == np.sort(F.asnumpy(old_neighs2))) # Normally, local edges are only counted once. assert np.all(edge_cnts == 1) assert num_inner_nodes == g.number_of_nodes() print(g.number_of_edges() - num_inner_edges)
def test_apply_nodes(): def _upd(nodes): return {'h' : nodes.data['h'] * 2} g = generate_graph() old = g.ndata['h'] g.apply_nodes(_upd) assert F.allclose(old * 2, g.ndata['h']) u = F.tensor([0, 3, 4, 6]) g.apply_nodes(lambda nodes : {'h' : nodes.data['h'] * 0.}, u) assert F.allclose(F.gather_row(g.ndata['h'], u), F.zeros((4, D)))
def check_features(g, bg): for ntype in bg.srctypes: for key in g.nodes[ntype].data: assert F.array_equal( bg.srcnodes[ntype].data[key], F.gather_row(g.nodes[ntype].data[key], bg.srcnodes[ntype].data[dgl.NID])) for ntype in bg.dsttypes: for key in g.nodes[ntype].data: assert F.array_equal( bg.dstnodes[ntype].data[key], F.gather_row(g.nodes[ntype].data[key], bg.dstnodes[ntype].data[dgl.NID])) for etype in bg.canonical_etypes: for key in g.edges[etype].data: assert F.array_equal( bg.edges[etype].data[key], F.gather_row(g.edges[etype].data[key], bg.edges[etype].data[dgl.EID]))
def test_basics(): g = generate_graph() h = g.ndata['h'] l = g.edata['l'] nid = [0, 2, 3, 6, 7, 9] sg = g.subgraph(nid) eid = {2, 3, 4, 5, 10, 11, 12, 13, 16} assert set(F.zerocopy_to_numpy(sg.parent_eid)) == eid eid = F.tensor(sg.parent_eid) # the subgraph is empty initially assert len(sg.ndata) == 0 assert len(sg.edata) == 0 # the data is copied after explict copy from sg.copy_from_parent() assert len(sg.ndata) == 1 assert len(sg.edata) == 1 sh = sg.ndata['h'] assert F.allclose(F.gather_row(h, F.tensor(nid)), sh) ''' s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 1 2, 9, 3 1 0, 3, 4 1 3, 9, 5 1 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 3 0, 6, 10 1 6, 9, 11 1 3 0, 7, 12 1 7, 9, 13 1 3 0, 8, 14 8, 9, 15 3 9, 0, 16 1 ''' assert F.allclose(F.gather_row(l, eid), sg.edata['l']) # update the node/edge features on the subgraph should NOT # reflect to the parent graph. sg.ndata['h'] = F.zeros((6, D)) assert F.allclose(h, g.ndata['h'])
def test_split(): prepare_dist() g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'test', num_parts, '/tmp', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] for i in range(num_parts): part_g, node_feats, edge_feats, meta = load_partition( '/tmp/test.json', i) num_nodes, num_edges, node_map, edge_map, num_partitions = meta gpb = GraphPartitionBook(part_id=i, num_parts=num_partitions, node_map=node_map, edge_map=edge_map, part_graph=part_g) local_nids = F.nonzero_1d(part_g.ndata['local_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes1 = np.intersect1d(selected_nodes, F.asnumpy(local_nids)) nodes2 = node_split(node_mask, gpb, i) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2))) local_nids = F.asnumpy(local_nids) for n in nodes1: assert n in local_nids local_eids = F.nonzero_1d(part_g.edata['local_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges1 = np.intersect1d(selected_edges, F.asnumpy(local_eids)) edges2 = edge_split(edge_mask, gpb, i) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2))) local_eids = F.asnumpy(local_eids) for e in edges1: assert e in local_eids
def test_nccl_sparse_pull_single_remainder(): nccl_id = nccl.UniqueId() comm = nccl.Communicator(1, 0, nccl_id) req_index = F.randint([10000], F.int64, F.ctx(), 0, 100000) value = F.uniform([100000, 100], F.float32, F.ctx(), -1.0, 1.0) part = NDArrayPartition(100000, 1, 'remainder') rv = comm.sparse_all_to_all_pull(req_index, value, part) exp_rv = F.gather_row(value, req_index) assert F.array_equal(rv, exp_rv)
def test_apply_edges(): def _upd(edges): return {'w' : edges.data['w'] * 2} g = generate_graph() old = g.edata['w'] g.apply_edges(_upd) assert F.allclose(old * 2, g.edata['w']) u = F.tensor([0, 0, 0, 4, 5, 6]) v = F.tensor([1, 2, 3, 9, 9, 9]) g.apply_edges(lambda edges : {'w' : edges.data['w'] * 0.}, (u, v)) eid = F.tensor(g.edge_ids(u, v)) assert F.allclose(F.gather_row(g.edata['w'], eid), F.zeros((6, D)))
def test_subgraph(): g = generate_graph() h = g.ndata['h'] l = g.edata['l'] nid = [0, 2, 3, 6, 7, 9] sg = g.subgraph(nid) eid = {2, 3, 4, 5, 10, 11, 12, 13, 16} assert set(F.asnumpy(sg.edata[dgl.EID])) == eid eid = sg.edata[dgl.EID] # the subgraph is empty initially except for NID/EID field assert len(sg.ndata) == 2 assert len(sg.edata) == 2 sh = sg.ndata['h'] assert F.allclose(F.gather_row(h, F.tensor(nid)), sh) ''' s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 1 2, 9, 3 1 0, 3, 4 1 3, 9, 5 1 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 3 0, 6, 10 1 6, 9, 11 1 3 0, 7, 12 1 7, 9, 13 1 3 0, 8, 14 8, 9, 15 3 9, 0, 16 1 ''' assert F.allclose(F.gather_row(l, eid), sg.edata['l']) # update the node/edge features on the subgraph should NOT # reflect to the parent graph. sg.ndata['h'] = F.zeros((6, D)) assert F.allclose(h, g.ndata['h'])
def check(g, bg, ntype, etype, dst_nodes): if dst_nodes is not None: assert F.array_equal(bg.dstnodes[ntype].data[dgl.NID], dst_nodes) n_dst_nodes = bg.number_of_nodes('DST/' + ntype) assert F.array_equal(bg.srcnodes[ntype].data[dgl.NID][:n_dst_nodes], bg.dstnodes[ntype].data[dgl.NID]) g = g[etype] bg = bg[etype] induced_src = bg.srcdata[dgl.NID] induced_dst = bg.dstdata[dgl.NID] induced_eid = bg.edata[dgl.EID] bg_src, bg_dst = bg.all_edges(order='eid') src_ans, dst_ans = g.all_edges(order='eid') induced_src_bg = F.gather_row(induced_src, bg_src) induced_dst_bg = F.gather_row(induced_dst, bg_dst) induced_src_ans = F.gather_row(src_ans, induced_eid) induced_dst_ans = F.gather_row(dst_ans, induced_eid) assert F.array_equal(induced_src_bg, induced_src_ans) assert F.array_equal(induced_dst_bg, induced_dst_ans)
def verify_graph_feats(g, gpb, part, node_feats, edge_feats): for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) assert np.all(F.asnumpy(ntype_ids) == ntype_id) assert np.all(F.asnumpy(partid) == gpb.partid) orig_id = F.boolean_mask(part.ndata['orig_id'], inner_node_mask) local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) for name in g.nodes[ntype].data: if name in [dgl.NID, 'inner_node']: continue true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) ndata = F.gather_row(node_feats[ntype + '/' + name], local_nids) assert np.all(F.asnumpy(ndata == true_feats)) for etype in g.etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) assert np.all(F.asnumpy(etype_ids) == etype_id) assert np.all(F.asnumpy(partid) == gpb.partid) orig_id = F.boolean_mask(part.edata['orig_id'], inner_edge_mask) local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) for name in g.edges[etype].data: if name in [dgl.EID, 'inner_edge']: continue true_feats = F.gather_row(g.edges[etype].data[name], orig_id) edata = F.gather_row(edge_feats[etype + '/' + name], local_eids) assert np.all(F.asnumpy(edata == true_feats))
def test_sort_with_tag(idtype): num_nodes, num_adj, num_tags = 200, [20, 50], 5 g = create_test_heterograph(num_nodes, num_adj, idtype=idtype) tag = F.tensor(np.random.choice(num_tags, g.number_of_nodes())) src, dst = g.edges() edge_tag_dst = F.gather_row(tag, F.tensor(dst)) edge_tag_src = F.gather_row(tag, F.tensor(src)) for tag_type in ['node', 'edge']: new_g = dgl.sort_csr_by_tag( g, tag if tag_type == 'node' else edge_tag_dst, tag_type=tag_type) old_csr = g.adjacency_matrix(scipy_fmt='csr') new_csr = new_g.adjacency_matrix(scipy_fmt='csr') assert(check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"])) assert(not check_sort(old_csr, tag)) # Check the original csr is not modified. for tag_type in ['node', 'edge']: new_g = dgl.sort_csc_by_tag( g, tag if tag_type == 'node' else edge_tag_src, tag_type=tag_type) old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr') new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr') assert(check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"])) assert(not check_sort(old_csc, tag))
def test_nccl_sparse_pull_single_range(): nccl_id = nccl.UniqueId() comm = nccl.Communicator(1, 0, nccl_id) req_index = F.randint([10000], F.int64, F.ctx(), 0, 100000) value = F.uniform([100000, 100], F.float32, F.ctx(), -1.0, 1.0) part_ranges = F.copy_to(F.tensor([0, value.shape[0]], dtype=F.int64), F.ctx()) part = NDArrayPartition(100000, 1, 'range', part_ranges=part_ranges) rv = comm.sparse_all_to_all_pull(req_index, value, part) exp_rv = F.gather_row(value, req_index) assert F.array_equal(rv, exp_rv)
def test_out_subgraph(idtype): hg = dgl.heterograph( { ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]), ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]), ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0]) }, idtype=idtype) subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1, 0), (0, 1), (0, 2)} assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (0, 1), (1, 2)} assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0)} assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0)} assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID]) for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test store_ids subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}, store_ids=False) for etype in subg.canonical_etypes: assert dgl.EID not in subg.edges[etype].data for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test relabel nodes subg = dgl.out_subgraph(hg, {'user': [1], 'game': 0}, relabel_nodes=True) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 2)} assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(0, 0)} assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['coin'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal(hg['flips'].edge_ids(old_u, old_v), subg['flips'].edata[dgl.EID]) assert subg.num_nodes('user') == 2 assert subg.num_nodes('game') == 2 assert subg.num_nodes('coin') == 1
def test_split_even(): g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] all_nodes1 = [] all_nodes2 = [] all_edges1 = [] all_edges2 = [] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK['default'] = { i: i for i in range(num_clients) } for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( '/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes = node_split(node_mask, gpb, rank=i, force_even=True) all_nodes1.append(nodes) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) print('part {} get {} nodes and {} are in the partition'.format( i, len(nodes), len(subset))) set_roles(num_parts * 2) nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True) nodes2 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=True) nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0)) all_nodes2.append(nodes3) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) print('intersection has', len(subset)) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges = edge_split(edge_mask, gpb, rank=i, force_even=True) all_edges1.append(edges) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) print('part {} get {} edges and {} are in the partition'.format( i, len(edges), len(subset))) set_roles(num_parts * 2) edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True) edges2 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=True) edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0)) all_edges2.append(edges3) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) print('intersection has', len(subset)) all_nodes1 = F.cat(all_nodes1, 0) all_edges1 = F.cat(all_edges1, 0) all_nodes2 = F.cat(all_nodes2, 0) all_edges2 = F.cat(all_edges2, 0) all_nodes = np.nonzero(node_mask)[0] all_edges = np.nonzero(edge_mask)[0] assert np.all(all_nodes == F.asnumpy(all_nodes1)) assert np.all(all_edges == F.asnumpy(all_edges1)) assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_edges == F.asnumpy(all_edges2))
def check_partition(part_method, reshuffle): g = create_random_graph(10000) g.ndata['labels'] = F.arange(0, g.number_of_nodes()) g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10)) g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10)) g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h')) g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh')) num_parts = 4 num_hops = 2 partition_graph(g, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=reshuffle) part_sizes = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _ = load_partition('/tmp/partition/test.json', i) # Check the metadata assert gpb._num_nodes() == g.number_of_nodes() assert gpb._num_edges() == g.number_of_edges() assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes'] assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges'] part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges'])) local_nid = gpb.nid2localnid(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']), i) assert F.dtype(local_nid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) local_eid = gpb.eid2localeid(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']), i) assert F.dtype(local_eid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid))) # Check the node map. local_nodes = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']) llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node']) local_nodes1 = gpb.partid2nids(i) assert F.dtype(local_nodes1) in (F.int32, F.int64) assert np.all(np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(local_nodes1))) # Check the edge map. local_edges = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']) local_edges1 = gpb.partid2eids(i) assert F.dtype(local_edges1) in (F.int32, F.int64) assert np.all(np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(local_edges1))) if reshuffle: part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata['orig_id']) part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata['orig_id']) # when we read node data from the original global graph, we should use orig_id. local_nodes = F.boolean_mask(part_g.ndata['orig_id'], part_g.ndata['inner_node']) local_edges = F.boolean_mask(part_g.edata['orig_id'], part_g.edata['inner_edge']) else: part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata[dgl.NID]) part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata[dgl.NID]) part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h')) part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh')) assert F.allclose(F.gather_row(g.ndata['h'], local_nodes), F.gather_row(part_g.ndata['h'], llocal_nodes)) assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes), F.gather_row(part_g.ndata['eh'], llocal_nodes)) for name in ['labels', 'feats']: assert name in node_feats assert node_feats[name].shape[0] == len(local_nodes) assert np.all(F.asnumpy(g.ndata[name])[F.asnumpy(local_nodes)] == F.asnumpy(node_feats[name])) for name in ['feats']: assert name in edge_feats assert edge_feats[name].shape[0] == len(local_edges) assert np.all(F.asnumpy(g.edata[name])[F.asnumpy(local_edges)] == F.asnumpy(edge_feats[name])) if reshuffle: node_map = [] edge_map = [] for i, (num_nodes, num_edges) in enumerate(part_sizes): node_map.append(np.ones(num_nodes) * i) edge_map.append(np.ones(num_edges) * i) node_map = np.concatenate(node_map) edge_map = np.concatenate(edge_map) nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) assert F.dtype(nid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(nid2pid) == node_map) eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) assert F.dtype(eid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(eid2pid) == edge_map)
def verify_hetero_graph(g, parts): num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.etypes} for part in parts: assert len(g.ntypes) == len(F.unique(part.ndata[dgl.NTYPE])) assert len(g.etypes) == len(F.unique(part.edata[dgl.ETYPE])) for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) num_nodes[ntype] += num_inner_nodes for etype in g.etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges # Verify the number of nodes are correct. for ntype in g.ntypes: print('node {}: {}, {}'.format(ntype, g.number_of_nodes(ntype), num_nodes[ntype])) assert g.number_of_nodes(ntype) == num_nodes[ntype] # Verify the number of edges are correct. for etype in g.etypes: print('edge {}: {}, {}'.format(etype, g.number_of_edges(etype), num_edges[etype])) assert g.number_of_edges(etype) == num_edges[etype] nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.etypes} for part in parts: src, dst, eid = part.edges(form='all') orig_src = F.gather_row(part.ndata['orig_id'], src) orig_dst = F.gather_row(part.ndata['orig_id'], dst) orig_eid = F.gather_row(part.edata['orig_id'], eid) etype_arr = F.gather_row(part.edata[dgl.ETYPE], eid) eid_type = F.gather_row(part.edata[dgl.EID], eid) for etype in g.etypes: etype_id = g.get_etype_id(etype) src1 = F.boolean_mask(orig_src, etype_arr == etype_id) dst1 = F.boolean_mask(orig_dst, etype_arr == etype_id) eid1 = F.boolean_mask(orig_eid, etype_arr == etype_id) exist = g.has_edges_between(src1, dst1, etype=etype) assert np.all(F.asnumpy(exist)) eid2 = g.edge_ids(src1, dst1, etype=etype) assert np.all(F.asnumpy(eid1 == eid2)) eids[etype].append(F.boolean_mask(eid_type, etype_arr == etype_id)) # Make sure edge Ids fall into a range. inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = np.sort( F.asnumpy(F.boolean_mask(part.edata[dgl.EID], inner_edge_mask))) assert np.all( inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1)) for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) # Make sure inner nodes have Ids fall into a range. inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) assert np.all( F.asnumpy( inner_nids == F.arange(F.as_scalar(inner_nids[0]), F.as_scalar(inner_nids[-1]) + 1))) nids[ntype].append(inner_nids) for ntype in nids: nids_type = F.cat(nids[ntype], 0) uniq_ids = F.unique(nids_type) # We should get all nodes. assert len(uniq_ids) == g.number_of_nodes(ntype) for etype in eids: eids_type = F.cat(eids[etype], 0) uniq_ids = F.unique(eids_type) assert len(uniq_ids) == g.number_of_edges(etype)