Ejemplo n.º 1
0
def check_standalone_sampling(tmpdir, reshuffle):
    g = CitationGraphDataset("cora")[0]
    num_parts = 1
    num_hops = 1
    partition_graph(g,
                    'test_sampling',
                    num_parts,
                    tmpdir,
                    num_hops=num_hops,
                    part_method='metis',
                    reshuffle=reshuffle)

    os.environ['DGL_DIST_MODE'] = 'standalone'
    dgl.distributed.initialize("rpc_ip_config.txt")
    dist_graph = DistGraph("test_sampling",
                           part_config=tmpdir / 'test_sampling.json')
    sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008],
                                     3)

    src, dst = sampled_graph.edges()
    assert sampled_graph.number_of_nodes() == g.number_of_nodes()
    assert np.all(F.asnumpy(g.has_edges_between(src, dst)))
    eids = g.edge_ids(src, dst)
    assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]),
                          F.asnumpy(eids))
    dgl.distributed.exit_client()
Ejemplo n.º 2
0
def start_sample_client_shuffle(rank,
                                tmpdir,
                                disable_shared_mem,
                                g,
                                num_servers,
                                group_id=0):
    os.environ['DGL_GROUP_ID'] = str(group_id)
    gpb = None
    if disable_shared_mem:
        _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json',
                                               rank)
    dgl.distributed.initialize("rpc_ip_config.txt")
    dist_graph = DistGraph("test_sampling", gpb=gpb)
    sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008],
                                     3)

    orig_nid = F.zeros((g.number_of_nodes(), ), dtype=F.int64, ctx=F.cpu())
    orig_eid = F.zeros((g.number_of_edges(), ), dtype=F.int64, ctx=F.cpu())
    for i in range(num_servers):
        part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json',
                                                i)
        orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id']
        orig_eid[part.edata[dgl.EID]] = part.edata['orig_id']

    src, dst = sampled_graph.edges()
    src = orig_nid[src]
    dst = orig_nid[dst]
    assert sampled_graph.number_of_nodes() == g.number_of_nodes()
    assert np.all(F.asnumpy(g.has_edges_between(src, dst)))
    eids = g.edge_ids(src, dst)
    eids1 = orig_eid[sampled_graph.edata[dgl.EID]]
    assert np.array_equal(F.asnumpy(eids1), F.asnumpy(eids))
Ejemplo n.º 3
0
def check_standalone_sampling(tmpdir):
    g = CitationGraphDataset("cora")[0]
    g.readonly()
    num_parts = 1
    num_hops = 1
    partition_graph(g,
                    'test_sampling',
                    num_parts,
                    tmpdir,
                    num_hops=num_hops,
                    part_method='metis',
                    reshuffle=False)

    dist_graph = DistGraph(None,
                           "test_sampling",
                           conf_file=tmpdir / 'test_sampling.json')
    sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008],
                                     3)

    src, dst = sampled_graph.edges()
    assert sampled_graph.number_of_nodes() == g.number_of_nodes()
    assert np.all(F.asnumpy(g.has_edges_between(src, dst)))
    eids = g.edge_ids(src, dst)
    assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]),
                          F.asnumpy(eids))
Ejemplo n.º 4
0
def start_sample_client(rank, tmpdir, disable_shared_mem):
    gpb = None
    if disable_shared_mem:
        _, _, _, gpb, _ = load_partition(tmpdir / 'test_sampling.json', rank)
    dist_graph = DistGraph("rpc_ip_config.txt", "test_sampling", gpb=gpb)
    sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3)
    dgl.distributed.exit_client()
    return sampled_graph
Ejemplo n.º 5
0
def start_bipartite_sample_client(rank, tmpdir, disable_shared_mem, nodes):
    gpb = None
    if disable_shared_mem:
        _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json',
                                               rank)
    dgl.distributed.initialize("rpc_ip_config.txt")
    dist_graph = DistGraph("test_sampling", gpb=gpb)
    assert 'feat' in dist_graph.nodes['user'].data
    assert 'feat' in dist_graph.nodes['game'].data
    if gpb is None:
        gpb = dist_graph.get_partition_book()
    sampled_graph = sample_neighbors(dist_graph, nodes, 3)
    block = dgl.to_block(sampled_graph, nodes)
    if sampled_graph.num_edges() > 0:
        block.edata[dgl.EID] = sampled_graph.edata[dgl.EID]
    dgl.distributed.exit_client()
    return block, gpb
Ejemplo n.º 6
0
def start_hetero_sample_client(rank, tmpdir, disable_shared_mem, nodes):
    gpb = None
    if disable_shared_mem:
        _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank)
    dgl.distributed.initialize("rpc_ip_config.txt")
    dist_graph = DistGraph("test_sampling", gpb=gpb)
    assert 'feat' in dist_graph.nodes['n1'].data
    assert 'feat' not in dist_graph.nodes['n2'].data
    assert 'feat' not in dist_graph.nodes['n3'].data
    if gpb is None:
        gpb = dist_graph.get_partition_book()
    try:
        sampled_graph = sample_neighbors(dist_graph, nodes, 3)
        block = dgl.to_block(sampled_graph, nodes)
        block.edata[dgl.EID] = sampled_graph.edata[dgl.EID]
    except Exception as e:
        print(e)
        block = None
    dgl.distributed.exit_client()
    return block, gpb