Ejemplo n.º 1
0
def check_rpc_hetero_etype_sampling_shuffle(tmpdir, num_server):
    generate_ip_config("rpc_ip_config.txt", num_server, num_server)

    g = create_random_hetero(dense=True)
    num_parts = num_server
    num_hops = 1

    partition_graph(g, 'test_sampling', num_parts, tmpdir,
                    num_hops=num_hops, part_method='metis', reshuffle=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
    for i in range(num_server):
        p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling'))
        p.start()
        time.sleep(1)
        pserver_list.append(p)

    fanout = 3
    block, gpb = start_hetero_etype_sample_client(0, tmpdir, num_server > 1, fanout,
                                                  nodes={'n3': [0, 10, 99, 66, 124, 208]})
    print("Done sampling")
    for p in pserver_list:
        p.join()

    src, dst = block.edges(etype=('n1', 'r2', 'n3'))
    assert len(src) == 18
    src, dst = block.edges(etype=('n2', 'r3', 'n3'))
    assert len(src) == 18

    orig_nid_map = {ntype: F.zeros((g.number_of_nodes(ntype),), dtype=F.int64) for ntype in g.ntypes}
    orig_eid_map = {etype: F.zeros((g.number_of_edges(etype),), dtype=F.int64) for etype in g.etypes}
    for i in range(num_server):
        part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i)
        ntype_ids, type_nids = gpb.map_to_per_ntype(part.ndata[dgl.NID])
        for ntype_id, ntype in enumerate(g.ntypes):
            idx = ntype_ids == ntype_id
            F.scatter_row_inplace(orig_nid_map[ntype], F.boolean_mask(type_nids, idx),
                                  F.boolean_mask(part.ndata['orig_id'], idx))
        etype_ids, type_eids = gpb.map_to_per_etype(part.edata[dgl.EID])
        for etype_id, etype in enumerate(g.etypes):
            idx = etype_ids == etype_id
            F.scatter_row_inplace(orig_eid_map[etype], F.boolean_mask(type_eids, idx),
                                  F.boolean_mask(part.edata['orig_id'], idx))

    for src_type, etype, dst_type in block.canonical_etypes:
        src, dst = block.edges(etype=etype)
        # These are global Ids after shuffling.
        shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src)
        shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst)
        shuffled_eid = block.edges[etype].data[dgl.EID]

        orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src))
        orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst))
        orig_eid = F.asnumpy(F.gather_row(orig_eid_map[etype], shuffled_eid))

        # Check the node Ids and edge Ids.
        orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype)
        assert np.all(F.asnumpy(orig_src1) == orig_src)
        assert np.all(F.asnumpy(orig_dst1) == orig_dst)
Ejemplo n.º 2
0
def check_rpc_bipartite_etype_sampling_shuffle(tmpdir, num_server):
    """sample on bipartite via sample_etype_neighbors() which yields non-empty sample results"""
    generate_ip_config("rpc_ip_config.txt", num_server, num_server)

    g = create_random_bipartite()
    num_parts = num_server
    num_hops = 1

    orig_nids, _ = partition_graph(g,
                                   'test_sampling',
                                   num_parts,
                                   tmpdir,
                                   num_hops=num_hops,
                                   part_method='metis',
                                   reshuffle=True,
                                   return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
    for i in range(num_server):
        p = ctx.Process(target=start_server,
                        args=(i, tmpdir, num_server > 1, 'test_sampling'))
        p.start()
        time.sleep(1)
        pserver_list.append(p)

    fanout = 3
    deg = get_degrees(g, orig_nids['game'], 'game')
    nids = F.nonzero_1d(deg > 0)
    block, gpb = start_bipartite_etype_sample_client(0,
                                                     tmpdir,
                                                     num_server > 1,
                                                     fanout,
                                                     nodes={
                                                         'game': nids,
                                                         'user': [0]
                                                     })
    print("Done sampling")
    for p in pserver_list:
        p.join()

    orig_nid_map = {
        ntype: F.zeros((g.number_of_nodes(ntype), ), dtype=F.int64)
        for ntype in g.ntypes
    }
    orig_eid_map = {
        etype: F.zeros((g.number_of_edges(etype), ), dtype=F.int64)
        for etype in g.etypes
    }
    for i in range(num_server):
        part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json',
                                                i)
        ntype_ids, type_nids = gpb.map_to_per_ntype(part.ndata[dgl.NID])
        for ntype_id, ntype in enumerate(g.ntypes):
            idx = ntype_ids == ntype_id
            F.scatter_row_inplace(orig_nid_map[ntype],
                                  F.boolean_mask(type_nids, idx),
                                  F.boolean_mask(part.ndata['orig_id'], idx))
        etype_ids, type_eids = gpb.map_to_per_etype(part.edata[dgl.EID])
        for etype_id, etype in enumerate(g.etypes):
            idx = etype_ids == etype_id
            F.scatter_row_inplace(orig_eid_map[etype],
                                  F.boolean_mask(type_eids, idx),
                                  F.boolean_mask(part.edata['orig_id'], idx))

    for src_type, etype, dst_type in block.canonical_etypes:
        src, dst = block.edges(etype=etype)
        # These are global Ids after shuffling.
        shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID],
                                    src)
        shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID],
                                    dst)
        shuffled_eid = block.edges[etype].data[dgl.EID]

        orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type],
                                          shuffled_src))
        orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type],
                                          shuffled_dst))
        orig_eid = F.asnumpy(F.gather_row(orig_eid_map[etype], shuffled_eid))

        # Check the node Ids and edge Ids.
        orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype)
        assert np.all(F.asnumpy(orig_src1) == orig_src)
        assert np.all(F.asnumpy(orig_dst1) == orig_dst)
Ejemplo n.º 3
0
def check_rpc_hetero_sampling_shuffle(tmpdir, num_server):
    ip_config = open("rpc_ip_config.txt", "w")
    for _ in range(num_server):
        ip_config.write('{}\n'.format(get_local_usable_addr()))
    ip_config.close()

    g = create_random_hetero()
    num_parts = num_server
    num_hops = 1

    partition_graph(g,
                    'test_sampling',
                    num_parts,
                    tmpdir,
                    num_hops=num_hops,
                    part_method='metis',
                    reshuffle=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
    for i in range(num_server):
        p = ctx.Process(target=start_server,
                        args=(i, tmpdir, num_server > 1, 'test_sampling'))
        p.start()
        time.sleep(1)
        pserver_list.append(p)

    time.sleep(3)
    block, gpb = start_hetero_sample_client(0, tmpdir, num_server > 1)
    print("Done sampling")
    for p in pserver_list:
        p.join()

    orig_nid_map = F.zeros((g.number_of_nodes(), ), dtype=F.int64)
    orig_eid_map = F.zeros((g.number_of_edges(), ), dtype=F.int64)
    for i in range(num_server):
        part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json',
                                                i)
        F.scatter_row_inplace(orig_nid_map, part.ndata[dgl.NID],
                              part.ndata['orig_id'])
        F.scatter_row_inplace(orig_eid_map, part.edata[dgl.EID],
                              part.edata['orig_id'])

    src, dst = block.edges()
    # These are global Ids after shuffling.
    shuffled_src = F.gather_row(block.srcdata[dgl.NID], src)
    shuffled_dst = F.gather_row(block.dstdata[dgl.NID], dst)
    shuffled_eid = block.edata[dgl.EID]
    # Get node/edge types.
    etype, _ = gpb.map_to_per_etype(shuffled_eid)
    src_type, _ = gpb.map_to_per_ntype(shuffled_src)
    dst_type, _ = gpb.map_to_per_ntype(shuffled_dst)
    etype = F.asnumpy(etype)
    src_type = F.asnumpy(src_type)
    dst_type = F.asnumpy(dst_type)
    # These are global Ids in the original graph.
    orig_src = F.asnumpy(F.gather_row(orig_nid_map, shuffled_src))
    orig_dst = F.asnumpy(F.gather_row(orig_nid_map, shuffled_dst))
    orig_eid = F.asnumpy(F.gather_row(orig_eid_map, shuffled_eid))

    etype_map = {g.get_etype_id(etype): etype for etype in g.etypes}
    etype_to_eptype = {
        g.get_etype_id(etype): (src_ntype, dst_ntype)
        for src_ntype, etype, dst_ntype in g.canonical_etypes
    }
    for e in np.unique(etype):
        src_t = src_type[etype == e]
        dst_t = dst_type[etype == e]
        assert np.all(src_t == src_t[0])
        assert np.all(dst_t == dst_t[0])

        # Check the node Ids and edge Ids.
        orig_src1, orig_dst1 = g.find_edges(orig_eid[etype == e],
                                            etype=etype_map[e])
        assert np.all(F.asnumpy(orig_src1) == orig_src[etype == e])
        assert np.all(F.asnumpy(orig_dst1) == orig_dst[etype == e])

        # Check the node types.
        src_ntype, dst_ntype = etype_to_eptype[e]
        assert np.all(src_t == g.get_ntype_id(src_ntype))
        assert np.all(dst_t == g.get_ntype_id(dst_ntype))