コード例 #1
0
ファイル: test_dist_graph_store.py プロジェクト: youhe12/dgl
def run_client(graph_name, part_id, num_nodes, num_edges):
    gpb = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
                              part_id, None)
    g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb)

    # Test API
    assert g.number_of_nodes() == num_nodes
    assert g.number_of_edges() == num_edges

    # Test reading node data
    nids = F.arange(0, int(g.number_of_nodes() / 2))
    feats1 = g.ndata['features'][nids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == nids))

    # Test reading edge data
    eids = F.arange(0, int(g.number_of_edges() / 2))
    feats1 = g.edata['features'][eids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == eids))

    # Test init node data
    new_shape = (g.number_of_nodes(), 2)
    g.init_ndata('test1', new_shape, F.int32)
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test init edge data
    new_shape = (g.number_of_edges(), 2)
    g.init_edata('test1', new_shape, F.int32)
    feats = g.edata['test1'][eids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test write data
    new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
    g.ndata['test1'][nids] = new_feats
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 1)

    # Test metadata operations.
    assert len(g.ndata['features']) == g.number_of_nodes()
    assert g.ndata['features'].shape == (g.number_of_nodes(), 1)
    assert g.ndata['features'].dtype == F.int64
    assert g.node_attr_schemes()['features'].dtype == F.int64
    assert g.node_attr_schemes()['test1'].dtype == F.int32
    assert g.node_attr_schemes()['features'].shape == (1, )

    selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    # Test node split
    nodes = node_split(selected_nodes, g.get_partition_book(), g.rank())
    nodes = F.asnumpy(nodes)
    # We only have one partition, so the local nodes are basically all nodes in the graph.
    local_nids = np.arange(g.number_of_nodes())
    for n in nodes:
        assert n in local_nids

    # clean up
    dgl.distributed.shutdown_servers()
    dgl.distributed.finalize_client()
    print('end')
コード例 #2
0
def test_random_choice():
    # test 1
    a = F.arange(0, 100)
    x = dgl.random.choice(a, 10, replace=True, prob=None)
    assert len(x) == 10
    for i in range(len(x)):
        assert F.asnumpy(x[i]) >= 0 and F.asnumpy(x[i]) < 100
    # test 2, replace=False, small num
    a = F.arange(0, 100)
    x = dgl.random.choice(a, 10, replace=False, prob=None)
    assert len(x) == 10
    for i in range(len(x)):
        assert F.asnumpy(x[i]) >= 0 and F.asnumpy(x[i]) < 100
    # test 3, replace=False, large num
    a = F.arange(0, 100)
    x = dgl.random.choice(a, 100, replace=False, prob=None)
    assert len(x) == 100
    assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a))
    # test 4, first arg is integer
    x = dgl.random.choice(100, 100, replace=False, prob=None)
    assert len(x) == 100
    assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a))
    # test 5, with prob
    prob = np.ones((100, ))
    prob[37:40] = 0.
    prob -= prob.min()
    prob /= prob.sum()
    prob = F.tensor(prob)
    x = dgl.random.choice(100, 97, replace=False, prob=prob)
    assert len(x) == 97
    for i in range(len(x)):
        assert F.asnumpy(x[i]) < 37 or F.asnumpy(x[i]) >= 40
コード例 #3
0
ファイル: test_subgraph.py プロジェクト: yuk12/dgl
def test_edge_subgraph():
    # Test when the graph has no node data and edge data.
    g = generate_graph(add_data=False)
    eid = [0, 2, 3, 6, 7, 9]
    sg = g.edge_subgraph(eid)
    sg.ndata['h'] = F.arange(0, sg.number_of_nodes())
    sg.edata['h'] = F.arange(0, sg.number_of_edges())
コード例 #4
0
def test_segment_reduce(reducer):
    ctx = F.ctx()
    value = F.tensor(np.random.rand(10, 5))
    v1 = F.attach_grad(F.clone(value))
    v2 = F.attach_grad(F.clone(value))
    seglen = F.tensor([2, 3, 0, 4, 1, 0, 0])
    u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx)
    v = F.repeat(F.copy_to(F.arange(0, len(seglen), F.int32), ctx),
                 seglen,
                 dim=0)

    num_nodes = {'_U': len(u), '_V': len(seglen)}
    g = dgl.convert.heterograph({('_U', '_E', '_V'): (u, v)},
                                num_nodes_dict=num_nodes)
    with F.record_grad():
        rst1 = gspmm(g, 'copy_lhs', reducer, v1, None)
        if reducer in ['max', 'min']:
            rst1 = F.replace_inf_with_zero(rst1)
        F.backward(F.reduce_sum(rst1))
        grad1 = F.grad(v1)

    with F.record_grad():
        rst2 = segment_reduce(seglen, v2, reducer=reducer)
        F.backward(F.reduce_sum(rst2))
        assert F.allclose(rst1, rst2)
        print('forward passed')

        grad2 = F.grad(v2)
        assert F.allclose(grad1, grad2)
        print('backward passed')
コード例 #5
0
ファイル: test_dist_graph_store.py プロジェクト: yueyub/dgl
def test_server_client():
    g = create_random_graph(10000)

    # Partition the graph
    num_parts = 1
    graph_name = 'test'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g, graph_name, num_parts, '/tmp')

    # let's just test on one partition for now.
    # We cannot run multiple servers and clients on the same machine.
    barrier = mp.Barrier(2)
    serv_ps = []
    for serv_id in range(1):
        p = Process(target=run_server, args=(graph_name, serv_id, 1, barrier))
        serv_ps.append(p)
        p.start()

    cli_ps = []
    for cli_id in range(1):
        print('start client', cli_id)
        p = Process(target=run_client,
                    args=(graph_name, barrier, g.number_of_nodes(),
                          g.number_of_edges()))
        p.start()
        cli_ps.append(p)

    for p in cli_ps:
        p.join()
    print('clients have terminated')
コード例 #6
0
def test_graph_partition_book():
    g = create_random_graph(10000)
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10))
    num_parts = 4
    num_hops = 2

    partition_graph(g, 'gpb_test', num_parts, '/tmp/gpb', num_hops=num_hops, part_method='metis')

    for i in range(num_parts):
        part_g, node_feats, edge_feats, meta = load_partition('/tmp/gpb/gpb_test.json', i)
        num_nodes, num_edges, node_map, edge_map, num_partitions = meta
        gpb = GraphPartitionBook(part_id=i,
                                 num_parts=num_partitions,
                                 node_map=node_map,
                                 edge_map=edge_map,
                                 part_graph=part_g)
        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert np.all(F.asnumpy(gpb.nid2partid(F.arange(0, len(node_map)))) == node_map)
        assert np.all(F.asnumpy(gpb.eid2partid(F.arange(0, len(edge_map)))) == edge_map)
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        local_nid = gpb.nid2localnid(part_g.ndata[dgl.NID], i)
        assert np.all(F.asnumpy(local_nid) == F.asnumpy(F.arange(0, len(local_nid))))
        local_eid = gpb.eid2localeid(part_g.edata[dgl.EID], i)
        assert np.all(F.asnumpy(local_eid) == F.asnumpy(F.arange(0, len(local_eid))))
コード例 #7
0
def check_server_client(shared_mem):
    prepare_dist()
    g = create_random_graph(10000)

    # Partition the graph
    num_parts = 1
    graph_name = 'dist_graph_test_2'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')

    # let's just test on one partition for now.
    # We cannot run multiple servers and clients on the same machine.
    serv_ps = []
    ctx = mp.get_context('spawn')
    for serv_id in range(1):
        p = ctx.Process(target=run_server, args=(graph_name, serv_id, 1, shared_mem))
        serv_ps.append(p)
        p.start()

    cli_ps = []
    for cli_id in range(1):
        print('start client', cli_id)
        p = ctx.Process(target=run_client, args=(graph_name, cli_id, g.number_of_nodes(),
                                                 g.number_of_edges()))
        p.start()
        cli_ps.append(p)

    for p in cli_ps:
        p.join()

    for p in serv_ps:
        p.join()

    print('clients have terminated')
コード例 #8
0
ファイル: test_subgraph.py プロジェクト: yuk12/dgl
def _test_map_to_subgraph():
    g = dgl.DGLGraph()
    g.add_nodes(10)
    g.add_edges(F.arange(0, 9), F.arange(1, 10))
    h = g.subgraph([0, 1, 2, 5, 8])
    v = h.map_to_subgraph_nid([0, 8, 2])
    assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2]))
コード例 #9
0
ファイル: test_sampler.py プロジェクト: yucoian/dgl
def _test_layer_sampler(prefetch=False):
    g = generate_rand_graph(100)
    nid = g.nodes()
    src, dst, eid = g.all_edges(form='all', order='eid')
    n_batches = 5
    batch_size = 50
    seed_batches = [
        np.sort(np.random.choice(F.asnumpy(nid), batch_size, replace=False))
        for i in range(n_batches)
    ]
    seed_nodes = np.hstack(seed_batches)
    layer_sizes = [50] * 3
    LayerSampler = getattr(dgl.contrib.sampling, 'LayerSampler')
    sampler = LayerSampler(g,
                           batch_size,
                           layer_sizes,
                           'in',
                           seed_nodes=seed_nodes,
                           num_workers=4,
                           prefetch=prefetch)
    for sub_g in sampler:
        assert all(
            sub_g.layer_size(i) < size for i, size in enumerate(layer_sizes))
        sub_nid = F.arange(0, sub_g.number_of_nodes())
        assert all(
            np.all(np.isin(F.asnumpy(sub_g.layer_nid(i)), F.asnumpy(sub_nid)))
            for i in range(sub_g.num_layers))
        assert np.all(
            np.isin(F.asnumpy(sub_g.map_to_parent_nid(sub_nid)),
                    F.asnumpy(nid)))
        sub_eid = F.arange(0, sub_g.number_of_edges())
        assert np.all(
            np.isin(F.asnumpy(sub_g.map_to_parent_eid(sub_eid)),
                    F.asnumpy(eid)))
        assert any(
            np.all(
                np.sort(F.asnumpy(sub_g.layer_parent_nid(-1))) == seed_batch)
            for seed_batch in seed_batches)

        sub_src, sub_dst = sub_g.all_edges(order='eid')
        for i in range(sub_g.num_blocks):
            block_eid = sub_g.block_eid(i)
            block_src = sub_g.map_to_parent_nid(sub_src[block_eid])
            block_dst = sub_g.map_to_parent_nid(sub_dst[block_eid])

            block_parent_eid = sub_g.block_parent_eid(i)
            block_parent_src = src[block_parent_eid]
            block_parent_dst = dst[block_parent_eid]

            assert np.all(F.asnumpy(block_src == block_parent_src))

        n_layers = sub_g.num_layers
        sub_n = sub_g.number_of_nodes()
        assert sum(F.shape(sub_g.layer_nid(i))[0]
                   for i in range(n_layers)) == sub_n
        n_blocks = sub_g.num_blocks
        sub_m = sub_g.number_of_edges()
        assert sum(F.shape(sub_g.block_eid(i))[0]
                   for i in range(n_blocks)) == sub_m
コード例 #10
0
def start_dist_dataloader(rank, tmpdir, num_server, drop_last):
    import dgl
    import torch as th
    dgl.distributed.initialize("mp_ip_config.txt")
    gpb = None
    disable_shared_mem = num_server > 0
    if disable_shared_mem:
        _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank)
    num_nodes_to_sample = 202
    batch_size = 32
    train_nid = th.arange(num_nodes_to_sample)
    dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json')

    orig_nid = F.arange(0, dist_graph.number_of_nodes())
    orig_eid = F.arange(0, dist_graph.number_of_edges())
    for i in range(num_server):
        part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i)
        if 'orig_id' in part.ndata:
            orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id']
        if 'orig_id' in part.edata:
            orig_eid[part.edata[dgl.EID]] = part.edata['orig_id']

    # Create sampler
    sampler = NeighborSampler(dist_graph, [5, 10],
                              dgl.distributed.sample_neighbors)

    # We need to test creating DistDataLoader multiple times.
    for i in range(2):
        # Create DataLoader for constructing blocks
        dataloader = DistDataLoader(
            dataset=train_nid.numpy(),
            batch_size=batch_size,
            collate_fn=sampler.sample_blocks,
            shuffle=False,
            drop_last=drop_last)

        groundtruth_g = CitationGraphDataset("cora")[0]
        max_nid = []

        for epoch in range(2):
            for idx, blocks in zip(range(0, num_nodes_to_sample, batch_size), dataloader):
                block = blocks[-1]
                o_src, o_dst =  block.edges()
                src_nodes_id = block.srcdata[dgl.NID][o_src]
                dst_nodes_id = block.dstdata[dgl.NID][o_dst]
                max_nid.append(np.max(F.asnumpy(dst_nodes_id)))

                src_nodes_id = orig_nid[src_nodes_id]
                dst_nodes_id = orig_nid[dst_nodes_id]
                has_edges = groundtruth_g.has_edges_between(src_nodes_id, dst_nodes_id)
                assert np.all(F.asnumpy(has_edges))
                # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size))
            if drop_last:
                assert np.max(max_nid) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size
            else:
                assert np.max(max_nid) == num_nodes_to_sample - 1
    del dataloader
    dgl.distributed.exit_client() # this is needed since there's two test here in one process
コード例 #11
0
ファイル: test_dist_graph_store.py プロジェクト: yueyub/dgl
def run_client(graph_name, barrier, num_nodes, num_edges):
    barrier.wait()
    g = DistGraph(server_namebook, graph_name)

    # Test API
    assert g.number_of_nodes() == num_nodes
    assert g.number_of_edges() == num_edges

    # Test reading node data
    nids = F.arange(0, int(g.number_of_nodes() / 2))
    feats1 = g.ndata['features'][nids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == nids))

    # Test reading edge data
    eids = F.arange(0, int(g.number_of_edges() / 2))
    feats1 = g.edata['features'][eids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == eids))

    # Test init node data
    new_shape = (g.number_of_nodes(), 2)
    g.init_ndata('test1', new_shape, F.int32)
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test init edge data
    new_shape = (g.number_of_edges(), 2)
    g.init_edata('test1', new_shape, F.int32)
    feats = g.edata['test1'][eids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test write data
    new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
    g.ndata['test1'][nids] = new_feats
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 1)

    # Test metadata operations.
    assert len(g.ndata['features']) == g.number_of_nodes()
    assert g.ndata['features'].shape == (g.number_of_nodes(), 1)
    assert g.ndata['features'].dtype == F.int64
    assert g.node_attr_schemes()['features'].dtype == F.int64
    assert g.node_attr_schemes()['test1'].dtype == F.int32
    assert g.node_attr_schemes()['features'].shape == (1, )

    selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    # Test node split
    nodes = node_split(selected_nodes, g.get_partition_book(), g.rank())
    nodes = F.asnumpy(nodes)
    # We only have one partition, so the local nodes are basically all nodes in the graph.
    local_nids = np.arange(g.number_of_nodes())
    for n in nodes:
        assert n in local_nids

    g.shut_down()
    print('end')
コード例 #12
0
def check_partition(reshuffle):
    g = create_random_graph(10000)
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10))
    num_parts = 4
    num_hops = 2

    partition_graph(g, 'test', num_parts, '/tmp/partition', num_hops=num_hops,
                    part_method='metis', reshuffle=reshuffle)
    part_sizes = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb = load_partition('/tmp/partition/test.json', i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        local_nid = gpb.nid2localnid(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']), i)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        local_eid = gpb.eid2localeid(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']), i)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.asnumpy(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']))
        local_nodes1 = F.asnumpy(gpb.partid2nids(i))
        assert np.all(np.sort(local_nodes) == np.sort(local_nodes1))

        # Check the edge map.
        local_edges = F.asnumpy(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']))
        local_edges1 = F.asnumpy(gpb.partid2eids(i))
        assert np.all(np.sort(local_edges) == np.sort(local_edges1))

        for name in ['labels', 'feats']:
            assert name in node_feats
            assert node_feats[name].shape[0] == len(local_nodes)
            assert len(local_nodes) == len(node_feats[name])
            assert np.all(F.asnumpy(g.ndata[name])[local_nodes] == F.asnumpy(node_feats[name]))
        assert len(edge_feats) == 0

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        assert np.all(F.asnumpy(gpb.nid2partid(F.arange(0, len(node_map)))) == node_map)
        assert np.all(F.asnumpy(gpb.eid2partid(F.arange(0, len(edge_map)))) == edge_map)
コード例 #13
0
def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
    prepare_dist()
    g = create_random_graph(10000)

    # Partition the graph
    num_parts = 1
    graph_name = 'dist_graph_test_2'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g, graph_name, num_parts, '/tmp/dist_graph', num_trainers_per_machine=num_clients)

    # let's just test on one partition for now.
    # We cannot run multiple servers and clients on the same machine.
    serv_ps = []
    ctx = mp.get_context('spawn')
    for serv_id in range(num_servers):
        p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
                                                 num_clients, shared_mem))
        serv_ps.append(p)
        p.start()

    cli_ps = []
    manager = mp.Manager()
    return_dict = manager.dict()
    node_mask = np.zeros((g.number_of_nodes(),), np.int32)
    edge_mask = np.zeros((g.number_of_edges(),), np.int32)
    nodes = np.random.choice(g.number_of_nodes(), g.number_of_nodes() // 10, replace=False)
    edges = np.random.choice(g.number_of_edges(), g.number_of_edges() // 10, replace=False)
    node_mask[nodes] = 1
    edge_mask[edges] = 1
    nodes = np.sort(nodes)
    edges = np.sort(edges)
    for cli_id in range(num_clients):
        print('start client', cli_id)
        p = ctx.Process(target=run_client_hierarchy, args=(graph_name, 0, num_servers,
                                                           node_mask, edge_mask, return_dict))
        p.start()
        cli_ps.append(p)

    for p in cli_ps:
        p.join()
    for p in serv_ps:
        p.join()

    nodes1 = []
    edges1 = []
    for n, e in return_dict.values():
        nodes1.append(n)
        edges1.append(e)
    nodes1, _ = F.sort_1d(F.cat(nodes1, 0))
    edges1, _ = F.sort_1d(F.cat(edges1, 0))
    assert np.all(F.asnumpy(nodes1) == nodes)
    assert np.all(F.asnumpy(edges1) == edges)

    print('clients have terminated')
コード例 #14
0
ファイル: test_dist_graph_store.py プロジェクト: zwwlp/dgl
def test_standalone():
    os.environ['DGL_DIST_MODE'] = 'standalone'
    g = create_random_graph(10000)
    # Partition the graph
    num_parts = 1
    graph_name = 'dist_graph_test_3'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
    dist_g = DistGraph("kv_ip_config.txt", graph_name,
                       part_config='/tmp/dist_graph/{}.json'.format(graph_name))
    check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
コード例 #15
0
def check_dist_emb_server_client(shared_mem,
                                 num_servers,
                                 num_clients,
                                 num_groups=1):
    prepare_dist(num_servers)
    g = create_random_graph(10000)

    # Partition the graph
    num_parts = 1
    graph_name = f'check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')

    # let's just test on one partition for now.
    # We cannot run multiple servers and clients on the same machine.
    serv_ps = []
    ctx = mp.get_context('spawn')
    keep_alive = num_groups > 1
    for serv_id in range(num_servers):
        p = ctx.Process(target=run_server,
                        args=(graph_name, serv_id, num_servers, num_clients,
                              shared_mem, keep_alive))
        serv_ps.append(p)
        p.start()

    cli_ps = []
    for cli_id in range(num_clients):
        for group_id in range(num_groups):
            print('start client[{}] for group[{}]'.format(cli_id, group_id))
            p = ctx.Process(target=run_emb_client,
                            args=(graph_name, 0, num_servers, num_clients,
                                  g.number_of_nodes(), g.number_of_edges(),
                                  group_id))
            p.start()
            time.sleep(1)  # avoid race condition when instantiating DistGraph
            cli_ps.append(p)

    for p in cli_ps:
        p.join()
        assert p.exitcode == 0

    if keep_alive:
        for p in serv_ps:
            assert p.is_alive()
        # force shutdown server
        dgl.distributed.shutdown_servers("kv_ip_config.txt", num_servers)
    for p in serv_ps:
        p.join()

    print('clients have terminated')
コード例 #16
0
ファイル: test_dist_graph_store.py プロジェクト: may5hap/dgl
def test_standalone():
    os.environ['DGL_DIST_MODE'] = 'standalone'

    g = create_random_graph(10000)
    # Partition the graph
    num_parts = 1
    graph_name = 'dist_graph_test_3'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')

    dgl.distributed.initialize("kv_ip_config.txt")
    dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name))
    check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
    dgl.distributed.exit_client() # this is needed since there's two test here in one process
コード例 #17
0
def create_random_hetero():
    num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020}
    etypes = [('n1', 'r1', 'n2'),
              ('n1', 'r2', 'n3'),
              ('n2', 'r3', 'n3')]
    edges = {}
    for etype in etypes:
        src_ntype, _, dst_ntype = etype
        arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo',
                          random_state=100)
        edges[etype] = (arr.row, arr.col)
    g = dgl.heterograph(edges, num_nodes)
    g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1)
    g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1)
    return g
コード例 #18
0
def run_client(graph_name, barrier, num_nodes, num_edges):
    barrier.wait()
    g = DistGraph(server_namebook, graph_name)

    # Test API
    assert g.number_of_nodes() == num_nodes
    assert g.number_of_edges() == num_edges

    # Test reading node data
    nids = F.arange(0, int(g.number_of_nodes() / 2))
    feats1 = g.ndata['features'][nids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == nids))

    # Test reading edge data
    eids = F.arange(0, int(g.number_of_edges() / 2))
    feats1 = g.edata['features'][eids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == eids))

    # Test init node data
    new_shape = (g.number_of_nodes(), 2)
    g.init_ndata('test1', new_shape, F.int32)
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test init edge data
    new_shape = (g.number_of_edges(), 2)
    g.init_edata('test1', new_shape, F.int32)
    feats = g.edata['test1'][eids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test write data
    new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
    g.ndata['test1'][nids] = new_feats
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 1)

    # Test metadata operations.
    assert len(g.ndata['features']) == g.number_of_nodes()
    assert g.ndata['features'].shape == (g.number_of_nodes(), 1)
    assert g.ndata['features'].dtype == F.int64
    assert g.node_attr_schemes()['features'].dtype == F.int64
    assert g.node_attr_schemes()['test1'].dtype == F.int32
    assert g.node_attr_schemes()['features'].shape == (1, )

    g.shut_down()
    print('end')
コード例 #19
0
def test_edge_removal(idtype):
    g = dgl.DGLGraph()
    g = g.astype(idtype).to(F.ctx())
    g.add_nodes(5)
    for i in range(5):
        for j in range(5):
            g.add_edge(i, j)
    g.edata['id'] = F.arange(0, 25)

    # remove edges
    g.remove_edges(range(13, 20))
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 18
    assert F.array_equal(g.edata['id'],
                         F.tensor(list(range(13)) + list(range(20, 25))))
    assert dgl.NID not in g.ndata
    assert dgl.EID not in g.edata

    # add edges
    g.add_edge(3, 3)
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 19
    assert F.array_equal(g.edata['id'],
                         F.tensor(list(range(13)) + list(range(20, 25)) + [0]))

    # remove edges
    g.remove_edges(range(2, 10), store_ids=True)
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 11
    assert F.array_equal(g.edata['id'],
                         F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0]))
    assert dgl.NID in g.ndata
    assert dgl.EID in g.edata
コード例 #20
0
def test_basic():
    num_layers = 2
    g = generate_rand_graph(100, connect_more=True)
    nf = create_full_nodeflow(g, num_layers)
    assert nf.number_of_nodes() == g.number_of_nodes() * (num_layers + 1)
    assert nf.number_of_edges() == g.number_of_edges() * num_layers
    assert nf.num_layers == num_layers + 1
    assert nf.layer_size(0) == g.number_of_nodes()
    assert nf.layer_size(1) == g.number_of_nodes()
    check_basic(g, nf)

    parent_nids = F.copy_to(F.arange(0, g.number_of_nodes()), F.cpu())
    nids = nf.map_from_parent_nid(0, parent_nids, remap_local=True)
    assert_array_equal(F.asnumpy(nids), F.asnumpy(parent_nids))

    # should also work for negative layer ids
    for l in range(-1, -num_layers, -1):
        nids1 = nf.map_from_parent_nid(l, parent_nids, remap_local=True)
        nids2 = nf.map_from_parent_nid(l + num_layers,
                                       parent_nids,
                                       remap_local=True)
        assert_array_equal(F.asnumpy(nids1), F.asnumpy(nids2))

    g = generate_rand_graph(100)
    nf = create_mini_batch(g, num_layers)
    assert nf.num_layers == num_layers + 1
    check_basic(g, nf)

    g = generate_rand_graph(100, add_self_loop=True)
    nf = create_mini_batch(g, num_layers, add_self_loop=True)
    assert nf.num_layers == num_layers + 1
    check_basic(g, nf)
コード例 #21
0
def test_partition():
    g = create_random_graph(10000)
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10))
    num_parts = 4
    num_hops = 2

    partition_graph(g, 'test', num_parts, '/tmp', num_hops=num_hops, part_method='metis')
    for i in range(num_parts):
        part_g, node_feats, edge_feats, meta = load_partition('/tmp/test.json', i)
        num_nodes, num_edges, node_map, edge_map = meta

        # Check the metadata
        assert num_nodes == g.number_of_nodes()
        assert num_edges == g.number_of_edges()

        # Check the node map.
        local_nodes = np.nonzero(node_map == i)[0]
        part_ids = node_map[F.asnumpy(part_g.ndata[dgl.NID])]
        local_nodes1 = F.asnumpy(part_g.ndata[dgl.NID])[part_ids == i]
        assert np.all(local_nodes == local_nodes1)

        # Check the edge map.
        assert np.all(edge_map >= 0)
        local_edges = np.nonzero(edge_map == i)[0]
        part_ids = edge_map[F.asnumpy(part_g.edata[dgl.EID])]
        local_edges1 = F.asnumpy(part_g.edata[dgl.EID])[part_ids == i]
        assert np.all(local_edges == np.sort(local_edges1))

        for name in ['labels', 'feats']:
            assert name in node_feats
            assert node_feats[name].shape[0] == len(local_nodes)
            assert len(local_nodes) == len(node_feats[name])
            assert np.all(F.asnumpy(g.ndata[name])[local_nodes] == F.asnumpy(node_feats[name]))
        assert len(edge_feats) == 0
コード例 #22
0
ファイル: test_geometry.py プロジェクト: zhujl1991/dgl
def test_edge_coarsening(idtype, g, weight, relabel):
    num_nodes = g.num_nodes()
    g = dgl.to_bidirected(g)
    g = g.astype(idtype).to(F.ctx())
    edge_weight = None
    if weight:
        edge_weight = F.abs(F.randn((g.num_edges(),))).to(F.ctx())
    node_labels = neighbor_matching(g, edge_weight, relabel_idx=relabel)
    unique_ids, counts = th.unique(node_labels, return_counts=True)
    num_result_ids = unique_ids.size(0)

    # shape correct
    assert node_labels.shape == (g.num_nodes(),)

    # all nodes marked
    assert F.reduce_sum(node_labels < 0).item() == 0

    # number of unique node ids correct.
    assert num_result_ids >= num_nodes // 2 and num_result_ids <= num_nodes

    # each unique id has <= 2 nodes
    assert F.reduce_sum(counts > 2).item() == 0

    # if two nodes have the same id, they must be neighbors
    idxs = F.arange(0, num_nodes, idtype)
    for l in unique_ids:
        l = l.item()
        idx = idxs[(node_labels == l)]
        if idx.size(0) == 2:
            u, v = idx[0].item(), idx[1].item()
            assert g.has_edges_between(u, v)
コード例 #23
0
def test_edge_removal():
    g = dgl.DGLGraph()
    g.add_nodes(5)
    for i in range(5):
        for j in range(5):
            g.add_edge(i, j)
    g.edata['id'] = F.arange(0, 25)

    # remove edges
    g.remove_edges(range(13, 20))
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 18
    assert F.array_equal(g.edata['id'],
                         F.tensor(list(range(13)) + list(range(20, 25))))

    # add edges
    g.add_edge(3, 3)
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 19
    assert F.array_equal(g.edata['id'],
                         F.tensor(list(range(13)) + list(range(20, 25)) + [0]))

    # remove edges
    g.remove_edges(range(2, 10))
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 11
    assert F.array_equal(g.edata['id'],
                         F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0]))
コード例 #24
0
def check_dist_graph_empty(g, num_clients, num_nodes, num_edges):
    # Test API
    assert g.number_of_nodes() == num_nodes
    assert g.number_of_edges() == num_edges

    # Test init node data
    new_shape = (g.number_of_nodes(), 2)
    g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32)
    nids = F.arange(0, int(g.number_of_nodes() / 2))
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 0)

    # create a tensor and destroy a tensor and create it again.
    test3 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       'test3',
                                       init_func=rand_init)
    del test3
    test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32,
                                       'test3')
    del test3

    # Test write data
    new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
    g.ndata['test1'][nids] = new_feats
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 1)

    # Test metadata operations.
    assert g.node_attr_schemes()['test1'].dtype == F.int32

    print('end')
コード例 #25
0
def check_dist_emb(g, num_clients, num_nodes, num_edges):
    from dgl.distributed.optim import SparseAdagrad
    from dgl.distributed import DistEmbedding
    # Test sparse emb
    try:
        emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init)
        nids = F.arange(0, int(g.number_of_nodes()))
        lr = 0.001
        optimizer = SparseAdagrad([emb], lr=lr)
        with F.record_grad():
            feats = emb(nids)
            assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1)))
            loss = F.sum(feats + 1, 0)
        loss.backward()
        optimizer.step()
        feats = emb(nids)
        if num_clients == 1:
            assert_almost_equal(F.asnumpy(feats),
                                np.ones((len(nids), 1)) * -lr)
        rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
        feats1 = emb(rest)
        assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))

        policy = dgl.distributed.PartitionPolicy('node',
                                                 g.get_partition_book())
        grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(), 1),
                                              F.float32, 'emb1_sum', policy)
        if num_clients == 1:
            assert np.all(
                F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) *
                num_clients)
        assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1)))

        emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init)
        with F.no_grad():
            feats1 = emb(nids)
        assert np.all(F.asnumpy(feats1) == 0)

        optimizer = SparseAdagrad([emb], lr=lr)
        with F.record_grad():
            feats1 = emb(nids)
            feats2 = emb(nids)
            feats = F.cat([feats1, feats2], 0)
            assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1)))
            loss = F.sum(feats + 1, 0)
        loss.backward()
        optimizer.step()
        with F.no_grad():
            feats = emb(nids)
        if num_clients == 1:
            assert_almost_equal(F.asnumpy(feats),
                                np.ones((len(nids), 1)) * 1 * -lr)
        rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
        feats1 = emb(rest)
        assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
    except NotImplementedError as e:
        pass
    except Exception as e:
        print(e)
        sys.exit(-1)
コード例 #26
0
def check_hetero_partition(hg, part_method):
    hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1'))
    hg.nodes['n1'].data['feats'] = F.tensor(
        np.random.randn(hg.number_of_nodes('n1'), 10), F.float32)
    hg.edges['r1'].data['feats'] = F.tensor(
        np.random.randn(hg.number_of_edges('r1'), 10), F.float32)
    num_parts = 4
    num_hops = 1

    orig_nids, orig_eids = partition_graph(hg,
                                           'test',
                                           num_parts,
                                           '/tmp/partition',
                                           num_hops=num_hops,
                                           part_method=part_method,
                                           reshuffle=True,
                                           return_mapping=True)
    assert len(orig_nids) == len(hg.ntypes)
    assert len(orig_eids) == len(hg.etypes)
    for ntype in hg.ntypes:
        assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype)
    for etype in hg.etypes:
        assert len(orig_eids[etype]) == hg.number_of_edges(etype)
    parts = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition(
            '/tmp/partition/test.json', i)
        # Verify the mapping between the reshuffled IDs and the original IDs.
        # These are partition-local IDs.
        part_src_ids, part_dst_ids = part_g.edges()
        # These are reshuffled global homogeneous IDs.
        part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids)
        part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids)
        part_eids = part_g.edata[dgl.EID]
        # These are reshuffled per-type IDs.
        src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids)
        dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids)
        etype_ids, part_eids = gpb.map_to_per_etype(part_eids)
        # These are original per-type IDs.
        for etype_id, etype in enumerate(hg.etypes):
            part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id)
            src_ntype_ids1 = F.boolean_mask(src_ntype_ids,
                                            etype_ids == etype_id)
            part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id)
            dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids,
                                            etype_ids == etype_id)
            part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id)
            assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0]))
            assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0]))
            src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])]
            dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])]
            orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1)
            orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1)
            orig_eids1 = F.gather_row(orig_eids[etype], part_eids1)
            orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype)
            assert len(orig_eids1) == len(orig_eids2)
            assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))
        parts.append(part_g)
        verify_graph_feats(hg, part_g, node_feats)
    verify_hetero_graph(hg, parts)
コード例 #27
0
def test_node_removal(idtype):
    g = dgl.DGLGraph()
    g = g.astype(idtype).to(F.ctx())
    g.add_nodes(10)
    g.add_edge(0, 0)
    assert g.number_of_nodes() == 10
    g.ndata['id'] = F.arange(0, 10)

    # remove nodes
    g.remove_nodes(range(4, 7))
    assert g.number_of_nodes() == 7
    assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9]))
    assert dgl.NID not in g.ndata
    assert dgl.EID not in g.edata

    # add nodes
    g.add_nodes(3)
    assert g.number_of_nodes() == 10
    assert F.array_equal(g.ndata['id'],
                         F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0]))

    # remove nodes
    g.remove_nodes(range(1, 4), store_ids=True)
    assert g.number_of_nodes() == 7
    assert F.array_equal(g.ndata['id'], F.tensor([0, 7, 8, 9, 0, 0, 0]))
    assert dgl.NID in g.ndata
    assert dgl.EID in g.edata
コード例 #28
0
ファイル: test_heterograph.py プロジェクト: hacors/Drug
def test_view():
    # test data view
    g = create_test_heterograph()

    f1 = F.randn((3, 6))
    g.nodes['user'].data['h'] = f1       # ok
    f2 = g.nodes['user'].data['h']
    assert F.array_equal(f1, f2)
    assert F.array_equal(F.tensor(g.nodes('user')), F.arange(0, 3))

    f3 = F.randn((2, 4))
    g.edges['user', 'follows', 'user'].data['h'] = f3
    f4 = g.edges['user', 'follows', 'user'].data['h']
    f5 = g.edges['follows'].data['h']
    assert F.array_equal(f3, f4)
    assert F.array_equal(f3, f5)
    assert F.array_equal(F.tensor(g.edges(etype='follows', form='eid')), F.arange(0, 2))
コード例 #29
0
ファイル: test_basics.py プロジェクト: zzq463650885/dgl
def test_group_apply_edges2():
    m = ssp.random(10, 10, 0.2)
    g = DGLGraph(m, readonly=True)
    g.ndata['deg'] = g.in_degrees()
    g.ndata['id'] = F.arange(0, g.number_of_nodes())
    g.edata['id'] = F.arange(0, g.number_of_edges())

    def apply(edges):
        w = edges.data['id']
        n_nodes, deg = w.shape

        dst = edges.dst['id'][:, 0]
        eid1 = F.asnumpy(g.in_edges(dst, 'eid')).reshape(n_nodes, deg).sort(1)
        eid2 = F.asnumpy(edges.data['id']).sort(1)
        assert np.array_equal(eid1, eid2)
        return {'id2': w}

    g.group_apply_edges('dst', apply, inplace=True)
コード例 #30
0
ファイル: test_dataloader.py プロジェクト: jermainewang/dgl
def test_edge_prediction_sampler(idtype):
    g = create_test_graph(idtype)
    sampler = NeighborSampler([10, 10])
    sampler = as_edge_prediction_sampler(
        sampler, negative_sampler=negative_sampler.Uniform(1))

    seeds = F.copy_to(F.arange(0, 2, dtype=idtype), ctx=F.ctx())
    # just a smoke test to make sure we don't fail internal assertions
    result = sampler.sample(g, {'follows': seeds})