def check_rpc_hetero_etype_sampling_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero(dense=True) num_parts = num_server num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling')) p.start() time.sleep(1) pserver_list.append(p) fanout = 3 block, gpb = start_hetero_etype_sample_client(0, tmpdir, num_server > 1, fanout, nodes={'n3': [0, 10, 99, 66, 124, 208]}) print("Done sampling") for p in pserver_list: p.join() src, dst = block.edges(etype=('n1', 'r2', 'n3')) assert len(src) == 18 src, dst = block.edges(etype=('n2', 'r3', 'n3')) assert len(src) == 18 orig_nid_map = {ntype: F.zeros((g.number_of_nodes(ntype),), dtype=F.int64) for ntype in g.ntypes} orig_eid_map = {etype: F.zeros((g.number_of_edges(etype),), dtype=F.int64) for etype in g.etypes} for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) ntype_ids, type_nids = gpb.map_to_per_ntype(part.ndata[dgl.NID]) for ntype_id, ntype in enumerate(g.ntypes): idx = ntype_ids == ntype_id F.scatter_row_inplace(orig_nid_map[ntype], F.boolean_mask(type_nids, idx), F.boolean_mask(part.ndata['orig_id'], idx)) etype_ids, type_eids = gpb.map_to_per_etype(part.edata[dgl.EID]) for etype_id, etype in enumerate(g.etypes): idx = etype_ids == etype_id F.scatter_row_inplace(orig_eid_map[etype], F.boolean_mask(type_eids, idx), F.boolean_mask(part.edata['orig_id'], idx)) for src_type, etype, dst_type in block.canonical_etypes: src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) shuffled_eid = block.edges[etype].data[dgl.EID] orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) orig_eid = F.asnumpy(F.gather_row(orig_eid_map[etype], shuffled_eid)) # Check the node Ids and edge Ids. orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst)
def check_rpc_bipartite_etype_sampling_shuffle(tmpdir, num_server): """sample on bipartite via sample_etype_neighbors() which yields non-empty sample results""" generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_bipartite() num_parts = num_server num_hops = 1 orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling')) p.start() time.sleep(1) pserver_list.append(p) fanout = 3 deg = get_degrees(g, orig_nids['game'], 'game') nids = F.nonzero_1d(deg > 0) block, gpb = start_bipartite_etype_sample_client(0, tmpdir, num_server > 1, fanout, nodes={ 'game': nids, 'user': [0] }) print("Done sampling") for p in pserver_list: p.join() orig_nid_map = { ntype: F.zeros((g.number_of_nodes(ntype), ), dtype=F.int64) for ntype in g.ntypes } orig_eid_map = { etype: F.zeros((g.number_of_edges(etype), ), dtype=F.int64) for etype in g.etypes } for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) ntype_ids, type_nids = gpb.map_to_per_ntype(part.ndata[dgl.NID]) for ntype_id, ntype in enumerate(g.ntypes): idx = ntype_ids == ntype_id F.scatter_row_inplace(orig_nid_map[ntype], F.boolean_mask(type_nids, idx), F.boolean_mask(part.ndata['orig_id'], idx)) etype_ids, type_eids = gpb.map_to_per_etype(part.edata[dgl.EID]) for etype_id, etype in enumerate(g.etypes): idx = etype_ids == etype_id F.scatter_row_inplace(orig_eid_map[etype], F.boolean_mask(type_eids, idx), F.boolean_mask(part.edata['orig_id'], idx)) for src_type, etype, dst_type in block.canonical_etypes: src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) shuffled_eid = block.edges[etype].data[dgl.EID] orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) orig_eid = F.asnumpy(F.gather_row(orig_eid_map[etype], shuffled_eid)) # Check the node Ids and edge Ids. orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst)
def check_rpc_hetero_sampling_shuffle(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{}\n'.format(get_local_usable_addr())) ip_config.close() g = create_random_hetero() num_parts = num_server num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling')) p.start() time.sleep(1) pserver_list.append(p) time.sleep(3) block, gpb = start_hetero_sample_client(0, tmpdir, num_server > 1) print("Done sampling") for p in pserver_list: p.join() orig_nid_map = F.zeros((g.number_of_nodes(), ), dtype=F.int64) orig_eid_map = F.zeros((g.number_of_edges(), ), dtype=F.int64) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) F.scatter_row_inplace(orig_nid_map, part.ndata[dgl.NID], part.ndata['orig_id']) F.scatter_row_inplace(orig_eid_map, part.edata[dgl.EID], part.edata['orig_id']) src, dst = block.edges() # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcdata[dgl.NID], src) shuffled_dst = F.gather_row(block.dstdata[dgl.NID], dst) shuffled_eid = block.edata[dgl.EID] # Get node/edge types. etype, _ = gpb.map_to_per_etype(shuffled_eid) src_type, _ = gpb.map_to_per_ntype(shuffled_src) dst_type, _ = gpb.map_to_per_ntype(shuffled_dst) etype = F.asnumpy(etype) src_type = F.asnumpy(src_type) dst_type = F.asnumpy(dst_type) # These are global Ids in the original graph. orig_src = F.asnumpy(F.gather_row(orig_nid_map, shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map, shuffled_dst)) orig_eid = F.asnumpy(F.gather_row(orig_eid_map, shuffled_eid)) etype_map = {g.get_etype_id(etype): etype for etype in g.etypes} etype_to_eptype = { g.get_etype_id(etype): (src_ntype, dst_ntype) for src_ntype, etype, dst_ntype in g.canonical_etypes } for e in np.unique(etype): src_t = src_type[etype == e] dst_t = dst_type[etype == e] assert np.all(src_t == src_t[0]) assert np.all(dst_t == dst_t[0]) # Check the node Ids and edge Ids. orig_src1, orig_dst1 = g.find_edges(orig_eid[etype == e], etype=etype_map[e]) assert np.all(F.asnumpy(orig_src1) == orig_src[etype == e]) assert np.all(F.asnumpy(orig_dst1) == orig_dst[etype == e]) # Check the node types. src_ntype, dst_ntype = etype_to_eptype[e] assert np.all(src_t == g.get_ntype_id(src_ntype)) assert np.all(dst_t == g.get_ntype_id(dst_ntype))