Example #1
0
def test_topk(g, idtype, descending):
    g = g.astype(idtype).to(F.ctx())
    g.ndata['x'] = F.randn((g.number_of_nodes(), 3))

    # Test.1: to test the case where k > number of nodes.
    dgl.topk_nodes(g, 'x', 100, sortby=-1)

    # Test.2: test correctness
    min_nnodes = F.asnumpy(g.batch_num_nodes()).min()
    if min_nnodes <= 1:
        return
    k = min_nnodes - 1
    val, indices = dgl.topk_nodes(g, 'x', k, descending=descending, sortby=-1)
    print(k)
    print(g.ndata['x'])
    print('val', val)
    print('indices', indices)
    subg = dgl.unbatch(g)
    subval, subidx = [], []
    for sg in subg:
        subx = F.asnumpy(sg.ndata['x'])
        ai = np.argsort(subx[:, -1:].flatten())
        if descending:
            ai = np.ascontiguousarray(ai[::-1])
        subx = np.expand_dims(subx[ai[:k]], 0)
        subval.append(F.tensor(subx))
        subidx.append(F.tensor(np.expand_dims(ai[:k], 0)))
    print(F.cat(subval, dim=0))
    assert F.allclose(val, F.cat(subval, dim=0))
    assert F.allclose(indices, F.cat(subidx, dim=0))

    # Test.3: sorby=None
    dgl.topk_nodes(g, 'x', k, sortby=None)

    g.edata['x'] = F.randn((g.number_of_edges(), 3))

    # Test.4: topk edges where k > number of edges.
    dgl.topk_edges(g, 'x', 100, sortby=-1)

    # Test.5: topk edges test correctness
    min_nedges = F.asnumpy(g.batch_num_edges()).min()
    if min_nedges <= 1:
        return
    k = min_nedges - 1
    val, indices = dgl.topk_edges(g, 'x', k, descending=descending, sortby=-1)
    print(k)
    print(g.edata['x'])
    print('val', val)
    print('indices', indices)
    subg = dgl.unbatch(g)
    subval, subidx = [], []
    for sg in subg:
        subx = F.asnumpy(sg.edata['x'])
        ai = np.argsort(subx[:, -1:].flatten())
        if descending:
            ai = np.ascontiguousarray(ai[::-1])
        subx = np.expand_dims(subx[ai[:k]], 0)
        subval.append(F.tensor(subx))
        subidx.append(F.tensor(np.expand_dims(ai[:k], 0)))
    print(F.cat(subval, dim=0))
    assert F.allclose(val, F.cat(subval, dim=0))
    assert F.allclose(indices, F.cat(subidx, dim=0))
Example #2
0
def test_split_even():
    prepare_dist()
    g = create_random_graph(10000)
    num_parts = 4
    num_hops = 2
    partition_graph(g,
                    'dist_graph_test',
                    num_parts,
                    '/tmp/dist_graph',
                    num_hops=num_hops,
                    part_method='metis')

    node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30
    selected_nodes = np.nonzero(node_mask)[0]
    selected_edges = np.nonzero(edge_mask)[0]
    all_nodes1 = []
    all_nodes2 = []
    all_edges1 = []
    all_edges2 = []
    for i in range(num_parts):
        dgl.distributed.set_num_client(num_parts)
        part_g, node_feats, edge_feats, gpb, _ = load_partition(
            '/tmp/dist_graph/dist_graph_test.json', i)
        local_nids = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
        nodes = node_split(node_mask, gpb, i, force_even=True)
        all_nodes1.append(nodes)
        subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids))
        print('part {} get {} nodes and {} are in the partition'.format(
            i, len(nodes), len(subset)))

        dgl.distributed.set_num_client(num_parts * 2)
        nodes1 = node_split(node_mask, gpb, i * 2, force_even=True)
        nodes2 = node_split(node_mask, gpb, i * 2 + 1, force_even=True)
        nodes3 = F.cat([nodes1, nodes2], 0)
        all_nodes2.append(nodes3)
        subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3))
        print('intersection has', len(subset))

        dgl.distributed.set_num_client(num_parts)
        local_eids = F.nonzero_1d(part_g.edata['inner_edge'])
        local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
        edges = edge_split(edge_mask, gpb, i, force_even=True)
        all_edges1.append(edges)
        subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids))
        print('part {} get {} edges and {} are in the partition'.format(
            i, len(edges), len(subset)))

        dgl.distributed.set_num_client(num_parts * 2)
        edges1 = edge_split(edge_mask, gpb, i * 2, force_even=True)
        edges2 = edge_split(edge_mask, gpb, i * 2 + 1, force_even=True)
        edges3 = F.cat([edges1, edges2], 0)
        all_edges2.append(edges3)
        subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3))
        print('intersection has', len(subset))
    all_nodes1 = F.cat(all_nodes1, 0)
    all_edges1 = F.cat(all_edges1, 0)
    all_nodes2 = F.cat(all_nodes2, 0)
    all_edges2 = F.cat(all_edges2, 0)
    all_nodes = np.nonzero(node_mask)[0]
    all_edges = np.nonzero(edge_mask)[0]
    assert np.all(all_nodes == F.asnumpy(all_nodes1))
    assert np.all(all_edges == F.asnumpy(all_edges1))
    assert np.all(all_nodes == F.asnumpy(all_nodes2))
    assert np.all(all_edges == F.asnumpy(all_edges2))
Example #3
0
def check_dist_graph(g, num_nodes, num_edges):
    # Test API
    assert g.number_of_nodes() == num_nodes
    assert g.number_of_edges() == num_edges

    # Test reading node data
    nids = F.arange(0, int(g.number_of_nodes() / 2))
    feats1 = g.ndata['features'][nids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == nids))

    # Test reading edge data
    eids = F.arange(0, int(g.number_of_edges() / 2))
    feats1 = g.edata['features'][eids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == eids))

    # Test init node data
    new_shape = (g.number_of_nodes(), 2)
    g.init_ndata('test1', new_shape, F.int32)
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test init edge data
    new_shape = (g.number_of_edges(), 2)
    g.init_edata('test1', new_shape, F.int32)
    feats = g.edata['test1'][eids]
    assert np.all(F.asnumpy(feats) == 0)

    # Test sparse emb
    try:
        new_shape = (g.number_of_nodes(), 1)
        emb = SparseNodeEmbedding(g, 'emb1', new_shape, emb_init)
        lr = 0.001
        optimizer = SparseAdagrad([emb], lr=lr)
        with F.record_grad():
            feats = emb(nids)
            assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1)))
            loss = F.sum(feats + 1, 0)
        loss.backward()
        optimizer.step()
        feats = emb(nids)
        assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr)
        rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
        feats1 = emb(rest)
        assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))

        policy = dgl.distributed.PartitionPolicy('node',
                                                 g.get_partition_book())
        grad_sum = dgl.distributed.DistTensor(g, 'node:emb1_sum', policy)
        assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)))
        assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1)))

        emb = SparseNodeEmbedding(g, 'emb2', new_shape, emb_init)
        optimizer = SparseAdagrad([emb], lr=lr)
        with F.record_grad():
            feats1 = emb(nids)
            feats2 = emb(nids)
            feats = F.cat([feats1, feats2], 0)
            assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1)))
            loss = F.sum(feats + 1, 0)
        loss.backward()
        optimizer.step()
        feats = emb(nids)
        assert_almost_equal(F.asnumpy(feats),
                            np.ones((len(nids), 1)) * math.sqrt(2) * -lr)
        rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
        feats1 = emb(rest)
        assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
    except NotImplementedError as e:
        pass

    # Test write data
    new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
    g.ndata['test1'][nids] = new_feats
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 1)

    # Test metadata operations.
    assert len(g.ndata['features']) == g.number_of_nodes()
    assert g.ndata['features'].shape == (g.number_of_nodes(), 1)
    assert g.ndata['features'].dtype == F.int64
    assert g.node_attr_schemes()['features'].dtype == F.int64
    assert g.node_attr_schemes()['test1'].dtype == F.int32
    assert g.node_attr_schemes()['features'].shape == (1, )

    selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    # Test node split
    nodes = node_split(selected_nodes, g.get_partition_book())
    nodes = F.asnumpy(nodes)
    # We only have one partition, so the local nodes are basically all nodes in the graph.
    local_nids = np.arange(g.number_of_nodes())
    for n in nodes:
        assert n in local_nids

    print('end')
Example #4
0
def test_convert():
    hg = create_test_heterograph()
    hs = []
    for ntype in hg.ntypes:
        h = F.randn((hg.number_of_nodes(ntype), 5))
        hg.nodes[ntype].data['h'] = h
        hs.append(h)
    hg.nodes['user'].data['x'] = F.randn((3, 3))
    ws = []
    for etype in hg.canonical_etypes:
        w = F.randn((hg.number_of_edges(etype), 5))
        hg.edges[etype].data['w'] = w
        ws.append(w)
    hg.edges['plays'].data['x'] = F.randn((4, 3))

    g = dgl.to_homo(hg)
    assert F.array_equal(F.cat(hs, dim=0), g.ndata['h'])
    assert 'x' not in g.ndata
    assert F.array_equal(F.cat(ws, dim=0), g.edata['w'])
    assert 'x' not in g.edata

    src, dst = g.all_edges(order='eid')
    src = F.asnumpy(src)
    dst = F.asnumpy(dst)
    etype_id, eid = F.asnumpy(g.edata[dgl.ETYPE]), F.asnumpy(g.edata[dgl.EID])
    ntype_id, nid = F.asnumpy(g.ndata[dgl.NTYPE]), F.asnumpy(g.ndata[dgl.NID])
    for i in range(g.number_of_edges()):
        srctype = hg.ntypes[ntype_id[src[i]]]
        dsttype = hg.ntypes[ntype_id[dst[i]]]
        etype = hg.etypes[etype_id[i]]
        src_i, dst_i = hg.find_edges([eid[i]], (srctype, etype, dsttype))
        assert np.asscalar(F.asnumpy(src_i)) == nid[src[i]]
        assert np.asscalar(F.asnumpy(dst_i)) == nid[dst[i]]

    mg = nx.MultiDiGraph([('user', 'user', 'follows'),
                          ('user', 'game', 'plays'),
                          ('user', 'game', 'wishes'),
                          ('developer', 'game', 'develops')])

    for _mg in [None, mg]:
        hg2 = dgl.to_hetero(g, ['user', 'game', 'developer'],
                            ['follows', 'plays', 'wishes', 'develops'],
                            ntype_field=dgl.NTYPE,
                            etype_field=dgl.ETYPE,
                            metagraph=_mg)
        assert set(hg.ntypes) == set(hg2.ntypes)
        assert set(hg.canonical_etypes) == set(hg2.canonical_etypes)
        for ntype in hg.ntypes:
            assert hg.number_of_nodes(ntype) == hg2.number_of_nodes(ntype)
            assert F.array_equal(hg.nodes[ntype].data['h'],
                                 hg2.nodes[ntype].data['h'])
        for canonical_etype in hg.canonical_etypes:
            src, dst = hg.all_edges(etype=canonical_etype, order='eid')
            src2, dst2 = hg2.all_edges(etype=canonical_etype, order='eid')
            assert F.array_equal(src, src2)
            assert F.array_equal(dst, dst2)
            assert F.array_equal(hg.edges[canonical_etype].data['w'],
                                 hg2.edges[canonical_etype].data['w'])

    # hetero_from_homo test case 2
    g = dgl.graph([(0, 2), (1, 2), (2, 3), (0, 3)])
    g.ndata[dgl.NTYPE] = F.tensor([0, 0, 1, 2])
    g.edata[dgl.ETYPE] = F.tensor([0, 0, 1, 2])
    hg = dgl.to_hetero(g, ['l0', 'l1', 'l2'], ['e0', 'e1', 'e2'])
    assert set(hg.canonical_etypes) == set([('l0', 'e0', 'l1'),
                                            ('l1', 'e1', 'l2'),
                                            ('l0', 'e2', 'l2')])
    assert hg.number_of_nodes('l0') == 2
    assert hg.number_of_nodes('l1') == 1
    assert hg.number_of_nodes('l2') == 1
    assert hg.number_of_edges('e0') == 2
    assert hg.number_of_edges('e1') == 1
    assert hg.number_of_edges('e2') == 1

    # hetero_from_homo test case 3
    mg = nx.MultiDiGraph([('user', 'movie', 'watches'),
                          ('user', 'TV', 'watches')])
    g = dgl.graph([(0, 1), (0, 2)])
    g.ndata[dgl.NTYPE] = F.tensor([0, 1, 2])
    g.edata[dgl.ETYPE] = F.tensor([0, 0])
    for _mg in [None, mg]:
        hg = dgl.to_hetero(g, ['user', 'TV', 'movie'], ['watches'],
                           metagraph=_mg)
        assert set(hg.canonical_etypes) == set([('user', 'watches', 'movie'),
                                                ('user', 'watches', 'TV')])
        assert hg.number_of_nodes('user') == 1
        assert hg.number_of_nodes('TV') == 1
        assert hg.number_of_nodes('movie') == 1
        assert hg.number_of_edges(('user', 'watches', 'TV')) == 1
        assert hg.number_of_edges(('user', 'watches', 'movie')) == 1
        assert len(hg.etypes) == 2

    # hetero_to_homo test case 2
    hg = dgl.bipartite([(0, 0), (1, 1)], card=(2, 3))
    g = dgl.to_homo(hg)
    assert g.number_of_nodes() == 5
Example #5
0
 def _message_2(edges):
     return {'h': F.cat((edges.src['h'], edges.data['w']), dim=1)}
def test_edge_softmax(g, norm_by, idtype):
    print("params", norm_by, idtype)

    g = create_test_heterograph(idtype)

    x1 = F.randn((g.num_edges('plays'),feat_size))
    x2 = F.randn((g.num_edges('follows'),feat_size))
    x3 = F.randn((g.num_edges('develops'),feat_size))
    x4 = F.randn((g.num_edges('wishes'),feat_size))

    F.attach_grad(F.clone(x1))
    F.attach_grad(F.clone(x2))
    F.attach_grad(F.clone(x3))
    F.attach_grad(F.clone(x4))

    g['plays'].edata['eid'] = x1
    g['follows'].edata['eid'] = x2
    g['develops'].edata['eid'] = x3
    g['wishes'].edata['eid'] = x4

    #################################################################
    #  edge_softmax() on homogeneous graph
    #################################################################

    with F.record_grad():
        hm_g = dgl.to_homogeneous(g)
        hm_x = F.cat((x3, x2, x1, x4), 0)
        hm_e = F.attach_grad(F.clone(hm_x))
        score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by)
        hm_g.edata['score'] = score_hm
        ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes)
        r1 =  ht_g.edata['score'][('user', 'plays', 'game')]
        r2 =  ht_g.edata['score'][('user', 'follows', 'user')]
        r3 =  ht_g.edata['score'][('developer', 'develops', 'game')]
        r4 =  ht_g.edata['score'][('user', 'wishes', 'game')]
        F.backward(F.reduce_sum(r1) + F.reduce_sum(r2))
        grad_edata_hm = F.grad(hm_e)

    #################################################################
    #  edge_softmax() on heterogeneous graph
    #################################################################

    e1 = F.attach_grad(F.clone(x1))
    e2 = F.attach_grad(F.clone(x2))
    e3 = F.attach_grad(F.clone(x3))
    e4 = F.attach_grad(F.clone(x4))
    e = {('user', 'follows', 'user'): e2,
        ('user', 'plays', 'game'): e1,
        ('user', 'wishes', 'game'): e4,
        ('developer', 'develops', 'game'): e3}
    with F.record_grad():
        score = edge_softmax(g, e, norm_by=norm_by)
        r5 =  score[('user', 'plays', 'game')]
        r6 =  score[('user', 'follows', 'user')]
        r7 =  score[('developer', 'develops', 'game')]
        r8 =  score[('user', 'wishes', 'game')]
        F.backward(F.reduce_sum(r5) + F.reduce_sum(r6))
        grad_edata_ht = F.cat((F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0)
        # correctness check
        assert F.allclose(r1, r5)
        assert F.allclose(r2, r6)
        assert F.allclose(r3, r7)
        assert F.allclose(r4, r8)
        assert F.allclose(grad_edata_hm, grad_edata_ht)
Example #7
0
def check_dist_graph(g, num_clients, num_nodes, num_edges):
    # Test API
    assert g.number_of_nodes() == num_nodes
    assert g.number_of_edges() == num_edges

    # Test reading node data
    nids = F.arange(0, int(g.number_of_nodes() / 2))
    feats1 = g.ndata['features'][nids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == nids))

    # Test reading edge data
    eids = F.arange(0, int(g.number_of_edges() / 2))
    feats1 = g.edata['features'][eids]
    feats = F.squeeze(feats1, 1)
    assert np.all(F.asnumpy(feats == eids))

    # Test init node data
    new_shape = (g.number_of_nodes(), 2)
    g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32)
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 0)

    # reference to a one that exists
    test2 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       'test2',
                                       init_func=rand_init)
    test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2')
    assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids]))

    # create a tensor and destroy a tensor and create it again.
    test3 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       'test3',
                                       init_func=rand_init)
    del test3
    test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32,
                                       'test3')
    del test3

    # add tests for anonymous distributed tensor.
    test3 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       init_func=rand_init)
    data = test3[0:10]
    test4 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       init_func=rand_init)
    del test3
    test5 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       init_func=rand_init)
    assert np.sum(F.asnumpy(test5[0:10] != data)) > 0

    # test a persistent tesnor
    test4 = dgl.distributed.DistTensor(new_shape,
                                       F.float32,
                                       'test4',
                                       init_func=rand_init,
                                       persistent=True)
    del test4
    try:
        test4 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32,
                                           'test4')
        raise Exception('')
    except:
        pass

    # Test sparse emb
    try:
        emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init)
        lr = 0.001
        optimizer = SparseAdagrad([emb], lr=lr)
        with F.record_grad():
            feats = emb(nids)
            assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1)))
            loss = F.sum(feats + 1, 0)
        loss.backward()
        optimizer.step()
        feats = emb(nids)
        if num_clients == 1:
            assert_almost_equal(F.asnumpy(feats),
                                np.ones((len(nids), 1)) * -lr)
        rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
        feats1 = emb(rest)
        assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))

        policy = dgl.distributed.PartitionPolicy('node',
                                                 g.get_partition_book())
        grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(), ),
                                              F.float32, 'emb1_sum', policy)
        if num_clients == 1:
            assert np.all(
                F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) *
                num_clients)
        assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1)))

        emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init)
        with F.no_grad():
            feats1 = emb(nids)
        assert np.all(F.asnumpy(feats1) == 0)

        optimizer = SparseAdagrad([emb], lr=lr)
        with F.record_grad():
            feats1 = emb(nids)
            feats2 = emb(nids)
            feats = F.cat([feats1, feats2], 0)
            assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1)))
            loss = F.sum(feats + 1, 0)
        loss.backward()
        optimizer.step()
        with F.no_grad():
            feats = emb(nids)
        if num_clients == 1:
            assert_almost_equal(F.asnumpy(feats),
                                np.ones((len(nids), 1)) * math.sqrt(2) * -lr)
        rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
        feats1 = emb(rest)
        assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
    except NotImplementedError as e:
        pass

    # Test write data
    new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
    g.ndata['test1'][nids] = new_feats
    feats = g.ndata['test1'][nids]
    assert np.all(F.asnumpy(feats) == 1)

    # Test metadata operations.
    assert len(g.ndata['features']) == g.number_of_nodes()
    assert g.ndata['features'].shape == (g.number_of_nodes(), 1)
    assert g.ndata['features'].dtype == F.int64
    assert g.node_attr_schemes()['features'].dtype == F.int64
    assert g.node_attr_schemes()['test1'].dtype == F.int32
    assert g.node_attr_schemes()['features'].shape == (1, )

    selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    # Test node split
    nodes = node_split(selected_nodes, g.get_partition_book())
    nodes = F.asnumpy(nodes)
    # We only have one partition, so the local nodes are basically all nodes in the graph.
    local_nids = np.arange(g.number_of_nodes())
    for n in nodes:
        assert n in local_nids

    print('end')
def test_empty_relation(idtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([], [])
        },
        idtype=idtype,
        device=F.ctx())
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])

    g2 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([0, 1], [0, 0])
        },
        idtype=idtype,
        device=F.ctx())
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    bg = dgl.batch([g1, g2])

    # Test number of nodes
    for ntype in bg.ntypes:
        assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [
            g1.number_of_nodes(ntype),
            g2.number_of_nodes(ntype)
        ]

    # Test number of edges
    for etype in bg.canonical_etypes:
        assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [
            g1.number_of_edges(etype),
            g2.number_of_edges(etype)
        ]

    # Test features
    assert F.allclose(
        bg.nodes['user'].data['h1'],
        F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1'])
    assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2'])
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(bg.edges['plays'].data['h1'],
                      g2.edges['plays'].data['h1'])

    # Test unbatching graphs
    g3, g4 = dgl.unbatch(bg)
    check_equivalence_between_heterographs(g1,
                                           g3,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
    check_equivalence_between_heterographs(g2,
                                           g4,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })

    # Test graphs without edges
    g1 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 0, 'v': 4})
    g2 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 1, 'v': 5})
    dgl.batch([g1, g2])
Example #9
0
def test_split():
    #prepare_dist()
    g = create_random_graph(10000)
    num_parts = 4
    num_hops = 2
    partition_graph(g,
                    'dist_graph_test',
                    num_parts,
                    '/tmp/dist_graph',
                    num_hops=num_hops,
                    part_method='metis')

    node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30
    selected_nodes = np.nonzero(node_mask)[0]
    selected_edges = np.nonzero(edge_mask)[0]

    # The code now collects the roles of all client processes and use the information
    # to determine how to split the workloads. Here is to simulate the multi-client
    # use case.
    def set_roles(num_clients):
        dgl.distributed.role.CUR_ROLE = 'default'
        dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)}
        dgl.distributed.role.PER_ROLE_RANK['default'] = {
            i: i
            for i in range(num_clients)
        }

    for i in range(num_parts):
        set_roles(num_parts)
        part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition(
            '/tmp/dist_graph/dist_graph_test.json', i)
        local_nids = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
        nodes1 = np.intersect1d(selected_nodes, F.asnumpy(local_nids))
        nodes2 = node_split(node_mask, gpb, rank=i, force_even=False)
        assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2)))
        local_nids = F.asnumpy(local_nids)
        for n in nodes1:
            assert n in local_nids

        set_roles(num_parts * 2)
        nodes3 = node_split(node_mask, gpb, rank=i * 2, force_even=False)
        nodes4 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=False)
        nodes5 = F.cat([nodes3, nodes4], 0)
        assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5)))

        set_roles(num_parts)
        local_eids = F.nonzero_1d(part_g.edata['inner_edge'])
        local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
        edges1 = np.intersect1d(selected_edges, F.asnumpy(local_eids))
        edges2 = edge_split(edge_mask, gpb, rank=i, force_even=False)
        assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2)))
        local_eids = F.asnumpy(local_eids)
        for e in edges1:
            assert e in local_eids

        set_roles(num_parts * 2)
        edges3 = edge_split(edge_mask, gpb, rank=i * 2, force_even=False)
        edges4 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=False)
        edges5 = F.cat([edges3, edges4], 0)
        assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5)))
Example #10
0
def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
    prepare_dist()
    g = create_random_graph(10000)

    # Partition the graph
    num_parts = 1
    graph_name = 'dist_graph_test_2'
    g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
    g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
    partition_graph(g,
                    graph_name,
                    num_parts,
                    '/tmp/dist_graph',
                    num_trainers_per_machine=num_clients)

    # let's just test on one partition for now.
    # We cannot run multiple servers and clients on the same machine.
    serv_ps = []
    ctx = mp.get_context('spawn')
    for serv_id in range(num_servers):
        p = ctx.Process(target=run_server,
                        args=(graph_name, serv_id, num_servers, num_clients,
                              shared_mem))
        serv_ps.append(p)
        p.start()

    cli_ps = []
    manager = mp.Manager()
    return_dict = manager.dict()
    node_mask = np.zeros((g.number_of_nodes(), ), np.int32)
    edge_mask = np.zeros((g.number_of_edges(), ), np.int32)
    nodes = np.random.choice(g.number_of_nodes(),
                             g.number_of_nodes() // 10,
                             replace=False)
    edges = np.random.choice(g.number_of_edges(),
                             g.number_of_edges() // 10,
                             replace=False)
    node_mask[nodes] = 1
    edge_mask[edges] = 1
    nodes = np.sort(nodes)
    edges = np.sort(edges)
    for cli_id in range(num_clients):
        print('start client', cli_id)
        p = ctx.Process(target=run_client_hierarchy,
                        args=(graph_name, 0, num_servers, node_mask, edge_mask,
                              return_dict))
        p.start()
        cli_ps.append(p)

    for p in cli_ps:
        p.join()
    for p in serv_ps:
        p.join()

    nodes1 = []
    edges1 = []
    for n, e in return_dict.values():
        nodes1.append(n)
        edges1.append(e)
    nodes1, _ = F.sort_1d(F.cat(nodes1, 0))
    edges1, _ = F.sort_1d(F.cat(edges1, 0))
    assert np.all(F.asnumpy(nodes1) == nodes)
    assert np.all(F.asnumpy(edges1) == edges)

    print('clients have terminated')
def test_features(idtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([0, 1], [0, 0])
        },
        idtype=idtype,
        device=F.ctx())
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.nodes['game'].data['h1'] = F.tensor([[0.]])
    g1.nodes['game'].data['h2'] = F.tensor([[1.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    g2 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([0, 1], [0, 0])
        },
        idtype=idtype,
        device=F.ctx())
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    # test default setting
    bg = dgl.batch([g1, g2])
    assert F.allclose(
        bg.nodes['user'].data['h1'],
        F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h1'],
        F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h2'],
        F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h2'],
        F.cat([g1.edges['follows'].data['h2'], g2.edges['follows'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['plays'].data['h1'],
        F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']],
              dim=0))

    # test specifying ndata/edata
    bg = dgl.batch([g1, g2], ndata=['h2'], edata=['h1'])
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h2'],
        F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.edges['plays'].data['h1'],
        F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']],
              dim=0))
    assert 'h1' not in bg.nodes['user'].data
    assert 'h1' not in bg.nodes['game'].data
    assert 'h2' not in bg.edges['follows'].data

    # Test unbatching graphs
    g3, g4 = dgl.unbatch(bg)
    check_equivalence_between_heterographs(g1,
                                           g3,
                                           node_attrs={
                                               'user': ['h2'],
                                               'game': ['h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
    check_equivalence_between_heterographs(g2,
                                           g4,
                                           node_attrs={
                                               'user': ['h2'],
                                               'game': ['h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })

    # test legacy
    bg = dgl.batch([g1, g2], edge_attrs=['h1'])
    assert 'h2' not in bg.edges['follows'].data.keys()
Example #12
0
def test_batching_with_zero_nodes_edges(index_dtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): [(0, 1), (1, 2)],
            ('user', 'plays', 'game'): []
        },
        index_dtype=index_dtype)
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])

    g2 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): [(0, 1), (1, 2)],
            ('user', 'plays', 'game'): [(0, 0), (1, 0)]
        },
        index_dtype=index_dtype)
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    bg = dgl.batch_hetero([g1, g2])

    assert F.allclose(
        bg.nodes['user'].data['h1'],
        F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1'])
    assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2'])
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(bg.edges['plays'].data['h1'],
                      g2.edges['plays'].data['h1'])

    # Test unbatching graphs
    g3, g4 = dgl.unbatch_hetero(bg)
    check_equivalence_between_heterographs(g1,
                                           g3,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
    check_equivalence_between_heterographs(g2,
                                           g4,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })

    # Test graphs without edges
    g1 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(0, 4))
    g2 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(1, 5))
    g2.nodes['u'].data['x'] = F.tensor([1])
    dgl.batch_hetero([g1, g2])
Example #13
0
def test_batched_features(index_dtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): [(0, 1), (1, 2)],
            ('user', 'plays', 'game'): [(0, 0), (1, 0)]
        },
        index_dtype=index_dtype)
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.nodes['game'].data['h1'] = F.tensor([[0.]])
    g1.nodes['game'].data['h2'] = F.tensor([[1.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    g2 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): [(0, 1), (1, 2)],
            ('user', 'plays', 'game'): [(0, 0), (1, 0)]
        },
        index_dtype=index_dtype)
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    bg = dgl.batch_hetero([g1, g2],
                          node_attrs=ALL,
                          edge_attrs={
                              ('user', 'follows', 'user'): 'h1',
                              ('user', 'plays', 'game'): None
                          })

    assert F.allclose(
        bg.nodes['user'].data['h1'],
        F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h1'],
        F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h2'],
        F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert 'h2' not in bg.edges['follows'].data.keys()
    assert 'h1' not in bg.edges['plays'].data.keys()

    # Test unbatching graphs
    g3, g4 = dgl.unbatch_hetero(bg)
    check_equivalence_between_heterographs(g1,
                                           g3,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
    check_equivalence_between_heterographs(g2,
                                           g4,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
Example #14
0
def test_nx_conversion():
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = dgl.graph([(0, 2), (1, 4), (3, 0), (4, 3)])
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu()))

    # test conversion after modifying DGLGraph
    # TODO(minjie): enable after mutation is supported
    #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges
    #new_n = F.randn((2, 3))
    #new_e = F.randn((3, 5))
    #g.add_nodes(2, data={'n1': new_n})
    ## add three edges, one is a multi-edge
    #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    #n1 = F.cat((n1, new_n), 0)
    #e1 = F.cat((e1, new_e), 0)
    ## convert to networkx again
    #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    #assert len(nxg) == 7
    #assert nxg.size() == 7
    #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)

    # Test converting from a networkx graph whose nodes are
    # not labeled with consecutive-integers.
    nxg = nx.cycle_graph(5)
    nxg.remove_nodes_from([0, 4])
    for u in nxg.nodes():
        nxg.nodes[u]['h'] = F.tensor([u])
    for u, v, d in nxg.edges(data=True):
        d['h'] = F.tensor([u, v])

    g = dgl.DGLGraph()
    g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
    assert g.number_of_nodes() == 3
    assert g.number_of_edges() == 4
    assert g.has_edge_between(0, 1)
    assert g.has_edge_between(1, 2)
    assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
    assert F.allclose(g.edata['h'],
                      F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
Example #15
0
def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator):
    seeds = defaultdict(list)

    for item in dl:
        if mode == 'node':
            input_nodes, output_nodes, blocks = item
        elif mode == 'edge':
            input_nodes, pair_graph, blocks = item
            output_nodes = pair_graph.ndata[dgl.NID]
        elif mode == 'link':
            input_nodes, pair_graph, neg_graph, blocks = item
            output_nodes = pair_graph.ndata[dgl.NID]
            for ntype in pair_graph.ntypes:
                assert F.array_equal(pair_graph.nodes[ntype].data[dgl.NID],
                                     neg_graph.nodes[ntype].data[dgl.NID])

        if len(g.ntypes) > 1:
            for ntype in g.ntypes:
                assert F.array_equal(input_nodes[ntype],
                                     blocks[0].srcnodes[ntype].data[dgl.NID])
                assert F.array_equal(output_nodes[ntype],
                                     blocks[-1].dstnodes[ntype].data[dgl.NID])
        else:
            assert F.array_equal(input_nodes, blocks[0].srcdata[dgl.NID])
            assert F.array_equal(output_nodes, blocks[-1].dstdata[dgl.NID])

        prev_dst = {ntype: None for ntype in g.ntypes}
        for block in blocks:
            for canonical_etype in block.canonical_etypes:
                utype, etype, vtype = canonical_etype
                uu, vv = block.all_edges(order='eid', etype=canonical_etype)
                src = block.srcnodes[utype].data[dgl.NID]
                dst = block.dstnodes[vtype].data[dgl.NID]
                assert F.array_equal(block.srcnodes[utype].data['feat'],
                                     g.nodes[utype].data['feat'][src])
                assert F.array_equal(block.dstnodes[vtype].data['feat'],
                                     g.nodes[vtype].data['feat'][dst])
                if prev_dst[utype] is not None:
                    assert F.array_equal(src, prev_dst[utype])
                u = src[uu]
                v = dst[vv]
                assert F.asnumpy(
                    g.has_edges_between(u, v, etype=canonical_etype)).all()
                eid = block.edges[canonical_etype].data[dgl.EID]
                assert F.array_equal(
                    block.edges[canonical_etype].data['feat'],
                    g.edges[canonical_etype].data['feat'][eid])
                ufound, vfound = g.find_edges(eid, etype=canonical_etype)
                assert F.array_equal(ufound, u)
                assert F.array_equal(vfound, v)
            for ntype in block.dsttypes:
                src = block.srcnodes[ntype].data[dgl.NID]
                dst = block.dstnodes[ntype].data[dgl.NID]
                assert F.array_equal(src[:block.number_of_dst_nodes(ntype)],
                                     dst)
                prev_dst[ntype] = dst

        if mode == 'node':
            for ntype in blocks[-1].dsttypes:
                seeds[ntype].append(blocks[-1].dstnodes[ntype].data[dgl.NID])
        elif mode == 'edge' or mode == 'link':
            for etype in pair_graph.canonical_etypes:
                seeds[etype].append(pair_graph.edges[etype].data[dgl.EID])

    # Check if all nodes/edges are iterated
    seeds = {k: F.cat(v, 0) for k, v in seeds.items()}
    for k, v in seeds.items():
        if k in nids:
            seed_set = set(F.asnumpy(nids[k]))
        elif isinstance(k, tuple) and k[1] in nids:
            seed_set = set(F.asnumpy(nids[k[1]]))
        else:
            continue

        v_set = set(F.asnumpy(v))
        assert v_set == seed_set
Example #16
0
def test_split_even():
    #prepare_dist(1)
    g = create_random_graph(10000)
    num_parts = 4
    num_hops = 2
    partition_graph(g,
                    'dist_graph_test',
                    num_parts,
                    '/tmp/dist_graph',
                    num_hops=num_hops,
                    part_method='metis')

    node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
    edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30
    selected_nodes = np.nonzero(node_mask)[0]
    selected_edges = np.nonzero(edge_mask)[0]
    all_nodes1 = []
    all_nodes2 = []
    all_edges1 = []
    all_edges2 = []

    # The code now collects the roles of all client processes and use the information
    # to determine how to split the workloads. Here is to simulate the multi-client
    # use case.
    def set_roles(num_clients):
        dgl.distributed.role.CUR_ROLE = 'default'
        dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)}
        dgl.distributed.role.PER_ROLE_RANK['default'] = {
            i: i
            for i in range(num_clients)
        }

    for i in range(num_parts):
        set_roles(num_parts)
        part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition(
            '/tmp/dist_graph/dist_graph_test.json', i)
        local_nids = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
        nodes = node_split(node_mask, gpb, rank=i, force_even=True)
        all_nodes1.append(nodes)
        subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids))
        print('part {} get {} nodes and {} are in the partition'.format(
            i, len(nodes), len(subset)))

        set_roles(num_parts * 2)
        nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True)
        nodes2 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=True)
        nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0))
        all_nodes2.append(nodes3)
        subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3))
        print('intersection has', len(subset))

        set_roles(num_parts)
        local_eids = F.nonzero_1d(part_g.edata['inner_edge'])
        local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
        edges = edge_split(edge_mask, gpb, rank=i, force_even=True)
        all_edges1.append(edges)
        subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids))
        print('part {} get {} edges and {} are in the partition'.format(
            i, len(edges), len(subset)))

        set_roles(num_parts * 2)
        edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True)
        edges2 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=True)
        edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0))
        all_edges2.append(edges3)
        subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3))
        print('intersection has', len(subset))
    all_nodes1 = F.cat(all_nodes1, 0)
    all_edges1 = F.cat(all_edges1, 0)
    all_nodes2 = F.cat(all_nodes2, 0)
    all_edges2 = F.cat(all_edges2, 0)
    all_nodes = np.nonzero(node_mask)[0]
    all_edges = np.nonzero(edge_mask)[0]
    assert np.all(all_nodes == F.asnumpy(all_nodes1))
    assert np.all(all_edges == F.asnumpy(all_edges1))
    assert np.all(all_nodes == F.asnumpy(all_nodes2))
    assert np.all(all_edges == F.asnumpy(all_edges2))
Example #17
0
def test_nx_conversion():
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1 # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = DGLGraph(multigraph=True)
    g.add_nodes(5)
    g.add_edges([0,1,3,4], [2,4,0,3])
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.get_e_repr()['id'], F.arange(0, 4))

    # test conversion after modifying DGLGraph
    g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges
    new_n = F.randn((2, 3))
    new_e = F.randn((3, 5))
    g.add_nodes(2, data={'n1': new_n})
    # add three edges, one is a multi-edge
    g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    n1 = F.cat((n1, new_n), 0)
    e1 = F.cat((e1, new_e), 0)
    # convert to networkx again
    nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    assert len(nxg) == 7
    assert nxg.size() == 7
    _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = DGLGraph(multigraph=True)
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 7
    assert g.number_of_edges() == 7
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)
Example #18
0
def test_to_bidirected():
    # homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True)
    u, v = g.edges()
    ub, vb = bg.edges()
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert F.array_equal(g.ndata['h'], bg.ndata['h'])
    assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0),
                         bg.edata['h'])
    bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]])
    assert ('hh' in g.ndata) is False
    bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]])
    assert ('hh' in g.edata) is False

    # donot share ndata and edata
    bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False)
    ub, vb = bg.edges()
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert ('h' in bg.ndata) is False
    assert ('h' in bg.edata) is False

    # zero edge graph
    g = dgl.graph([])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True)

    # heterogeneous graph
    g = dgl.heterograph({
        ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2,
                                             2]), F.tensor([1, 1, 2, 1, 0])),
        ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])),
        ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0]))
    })
    g.nodes['game'].data['hv'] = F.ones((3, 1))
    g.nodes['user'].data['hv'] = F.ones((3, 1))
    g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    bg = dgl.to_bidirected(g,
                           copy_ndata=True,
                           copy_edata=True,
                           ignore_bipartite=True)
    assert F.array_equal(g.nodes['game'].data['hv'],
                         bg.nodes['game'].data['hv'])
    assert F.array_equal(g.nodes['user'].data['hv'],
                         bg.nodes['user'].data['hv'])
    u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert F.array_equal(
        F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0),
        bg.edges['wins'].data['h'])
    u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)
    assert len(bg.edges['plays'].data) == 0
    assert len(bg.edges['follows'].data) == 0

    # donot share ndata and edata
    bg = dgl.to_bidirected(g,
                           copy_ndata=False,
                           copy_edata=False,
                           ignore_bipartite=True)
    assert len(bg.edges['wins'].data) == 0
    assert len(bg.edges['plays'].data) == 0
    assert len(bg.edges['follows'].data) == 0
    assert len(bg.nodes['game'].data) == 0
    assert len(bg.nodes['user'].data) == 0
    u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)