Esempio n. 1
0
def start_client(num_clients, num_servers):
    os.environ['DGL_DIST_MODE'] = 'distributed'
    # Note: connect to server first !
    dgl.distributed.initialize(ip_config='kv_ip_config.txt')
    # Init kvclient
    kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt',
                                        num_servers=num_servers)
    kvclient.map_shared_data(partition_book=gpb)
    assert dgl.distributed.get_num_client() == num_clients
    kvclient.init_data(name='data_1',
                       shape=F.shape(data_1),
                       dtype=F.dtype(data_1),
                       part_policy=edge_policy,
                       init_func=init_zero_func)
    kvclient.init_data(name='data_2',
                       shape=F.shape(data_2),
                       dtype=F.dtype(data_2),
                       part_policy=node_policy,
                       init_func=init_zero_func)

    # Test data_name_list
    name_list = kvclient.data_name_list()
    print(name_list)
    assert 'data_0' in name_list
    assert 'data_0_1' in name_list
    assert 'data_0_2' in name_list
    assert 'data_0_3' in name_list
    assert 'data_1' in name_list
    assert 'data_2' in name_list
    # Test get_meta_data
    meta = kvclient.get_data_meta('data_0')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0)
    assert shape == F.shape(data_0)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_1)
    assert shape == F.shape(data_0_1)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_2)
    assert shape == F.shape(data_0_2)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_3')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_3)
    assert shape == F.shape(data_0_3)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_1)
    assert shape == F.shape(data_1)
    assert policy.policy_str == 'edge:_E'

    meta = kvclient.get_data_meta('data_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_2)
    assert shape == F.shape(data_2)
    assert policy.policy_str == 'node:_N'

    # Test push and pull
    id_tensor = F.tensor([0, 2, 4], F.int64)
    data_tensor = F.tensor([[6., 6.], [6., 6.], [6., 6.]], F.float32)
    kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor)
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # Register new push handler
    kvclient.register_push_handler('data_0', udf_push)
    kvclient.register_push_handler('data_1', udf_push)
    kvclient.register_push_handler('data_2', udf_push)
    # Test push and pull
    kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.barrier()
    data_tensor = data_tensor * data_tensor
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))

    # Test delete data
    kvclient.delete_data('data_0')
    kvclient.delete_data('data_1')
    kvclient.delete_data('data_2')

    # Register new push handler
    kvclient.init_data(name='data_3',
                       shape=F.shape(data_2),
                       dtype=F.dtype(data_2),
                       part_policy=node_policy,
                       init_func=init_zero_func)
    kvclient.register_push_handler('data_3', add_push)
    data_tensor = F.tensor([[6., 6.], [6., 6.], [6., 6.]], F.float32)
    kvclient.barrier()
    time.sleep(kvclient.client_id + 1)
    print("add...")
    kvclient.push(name='data_3', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.barrier()
    res = kvclient.pull(name='data_3', id_tensor=id_tensor)
    data_tensor = data_tensor * num_clients
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
Esempio n. 2
0
 def foo(g):
     g = g.local_var()
     g.nodes[0].data['h'] = F.ones((1, 1))
     assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
Esempio n. 3
0
 def foo(g):
     with g.local_scope():
         g.edges[0, 1].data['h'] = F.ones((1, 1))
         assert F.allclose(g.edata['h'], F.ones((2, 1)))
         g.edges[0, 1].data['w'] = F.ones((1, 1))
         assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))
Esempio n. 4
0
def test_to_bidirected():
    # homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True)
    u, v = g.edges()
    ub, vb = bg.edges()
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert F.array_equal(g.ndata['h'], bg.ndata['h'])
    assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0),
                         bg.edata['h'])
    bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]])
    assert ('hh' in g.ndata) is False
    bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]])
    assert ('hh' in g.edata) is False

    # donot share ndata and edata
    bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False)
    ub, vb = bg.edges()
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert ('h' in bg.ndata) is False
    assert ('h' in bg.edata) is False

    # zero edge graph
    g = dgl.graph([])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True)

    # heterogeneous graph
    g = dgl.heterograph({
        ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2,
                                             2]), F.tensor([1, 1, 2, 1, 0])),
        ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])),
        ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0]))
    })
    g.nodes['game'].data['hv'] = F.ones((3, 1))
    g.nodes['user'].data['hv'] = F.ones((3, 1))
    g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    bg = dgl.to_bidirected(g,
                           copy_ndata=True,
                           copy_edata=True,
                           ignore_bipartite=True)
    assert F.array_equal(g.nodes['game'].data['hv'],
                         bg.nodes['game'].data['hv'])
    assert F.array_equal(g.nodes['user'].data['hv'],
                         bg.nodes['user'].data['hv'])
    u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert F.array_equal(
        F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0),
        bg.edges['wins'].data['h'])
    u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)
    assert len(bg.edges['plays'].data) == 0
    assert len(bg.edges['follows'].data) == 0

    # donot share ndata and edata
    bg = dgl.to_bidirected(g,
                           copy_ndata=False,
                           copy_edata=False,
                           ignore_bipartite=True)
    assert len(bg.edges['wins'].data) == 0
    assert len(bg.edges['plays'].data) == 0
    assert len(bg.edges['follows'].data) == 0
    assert len(bg.nodes['game'].data) == 0
    assert len(bg.nodes['user'].data) == 0
    u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)
Esempio n. 5
0
def test_to_simple(index_dtype):
    # homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]])
    sg, wb = dgl.to_simple(g, writeback_mapping=True)
    u, v = g.all_edges(form='uv', order='eid')
    u = F.asnumpy(u).tolist()
    v = F.asnumpy(v).tolist()
    uv = list(zip(u, v))
    eid_map = F.asnumpy(wb)

    su, sv = sg.all_edges(form='uv', order='eid')
    su = F.asnumpy(su).tolist()
    sv = F.asnumpy(sv).tolist()
    suv = list(zip(su, sv))
    sc = F.asnumpy(sg.edata['count'])
    assert set(uv) == set(suv)
    for i, e in enumerate(suv):
        assert sc[i] == sum(e == _e for _e in uv)
    for i, e in enumerate(uv):
        assert eid_map[i] == suv.index(e)
    # shared ndata
    assert F.array_equal(sg.ndata['h'], g.ndata['h'])
    assert 'h' not in sg.edata
    # new ndata to sg
    sg.ndata['hh'] = F.tensor([[0.], [1.], [2.]])
    assert 'hh' not in g.ndata

    sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False)
    assert 'h' not in sg.ndata
    assert 'h' not in sg.edata

    # heterogeneous graph
    g = dgl.heterograph(
        {
            ('user', 'follow', 'user'):
            ([0, 1, 2, 1, 1, 1], [1, 3, 2, 3, 4, 4]),
            ('user', 'plays', 'game'):
            ([3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3])
        },
        index_dtype=index_dtype)
    g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    g.nodes['user'].data['hh'] = F.tensor([0, 1, 2, 3, 4])
    g.edges['follow'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5])
    sg, wb = dgl.to_simple(g,
                           return_counts='weights',
                           writeback_mapping=True,
                           copy_edata=True)
    g.nodes['game'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5])

    for etype in g.canonical_etypes:
        u, v = g.all_edges(form='uv', order='eid', etype=etype)
        u = F.asnumpy(u).tolist()
        v = F.asnumpy(v).tolist()
        uv = list(zip(u, v))
        eid_map = F.asnumpy(wb[etype])

        su, sv = sg.all_edges(form='uv', order='eid', etype=etype)
        su = F.asnumpy(su).tolist()
        sv = F.asnumpy(sv).tolist()
        suv = list(zip(su, sv))
        sw = F.asnumpy(sg.edges[etype].data['weights'])

        assert set(uv) == set(suv)
        for i, e in enumerate(suv):
            assert sw[i] == sum(e == _e for _e in uv)
        for i, e in enumerate(uv):
            assert eid_map[i] == suv.index(e)
    # shared ndata
    assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'])
    assert F.array_equal(sg.nodes['user'].data['hh'],
                         g.nodes['user'].data['hh'])
    assert 'h' not in sg.nodes['game'].data
    # new ndata to sg
    sg.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4])
    assert 'hhh' not in g.nodes['user'].data
    # share edata
    feat_idx = F.asnumpy(wb[('user', 'follow', 'user')])
    _, indices = np.unique(feat_idx, return_index=True)
    assert np.array_equal(F.asnumpy(sg.edges['follow'].data['h']),
                          F.asnumpy(g.edges['follow'].data['h'])[indices])

    sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False)
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)
    assert 'h' not in sg.nodes['user'].data
    assert 'hh' not in sg.nodes['user'].data
Esempio n. 6
0
def test_topk(g, idtype, descending):
    g = g.astype(idtype).to(F.ctx())
    g.ndata['x'] = F.randn((g.number_of_nodes(), 3))

    # Test.1: to test the case where k > number of nodes.
    dgl.topk_nodes(g, 'x', 100, sortby=-1)

    # Test.2: test correctness
    min_nnodes = F.asnumpy(g.batch_num_nodes()).min()
    if min_nnodes <= 1:
        return
    k = min_nnodes - 1
    val, indices = dgl.topk_nodes(g, 'x', k, descending=descending, sortby=-1)
    print(k)
    print(g.ndata['x'])
    print('val', val)
    print('indices', indices)
    subg = dgl.unbatch(g)
    subval, subidx = [], []
    for sg in subg:
        subx = F.asnumpy(sg.ndata['x'])
        ai = np.argsort(subx[:, -1:].flatten())
        if descending:
            ai = np.ascontiguousarray(ai[::-1])
        subx = np.expand_dims(subx[ai[:k]], 0)
        subval.append(F.tensor(subx))
        subidx.append(F.tensor(np.expand_dims(ai[:k], 0)))
    print(F.cat(subval, dim=0))
    assert F.allclose(val, F.cat(subval, dim=0))
    assert F.allclose(indices, F.cat(subidx, dim=0))

    # Test.3: sorby=None
    dgl.topk_nodes(g, 'x', k, sortby=None)

    g.edata['x'] = F.randn((g.number_of_edges(), 3))

    # Test.4: topk edges where k > number of edges.
    dgl.topk_edges(g, 'x', 100, sortby=-1)

    # Test.5: topk edges test correctness
    min_nedges = F.asnumpy(g.batch_num_edges()).min()
    if min_nedges <= 1:
        return
    k = min_nedges - 1
    val, indices = dgl.topk_edges(g, 'x', k, descending=descending, sortby=-1)
    print(k)
    print(g.edata['x'])
    print('val', val)
    print('indices', indices)
    subg = dgl.unbatch(g)
    subval, subidx = [], []
    for sg in subg:
        subx = F.asnumpy(sg.edata['x'])
        ai = np.argsort(subx[:, -1:].flatten())
        if descending:
            ai = np.ascontiguousarray(ai[::-1])
        subx = np.expand_dims(subx[ai[:k]], 0)
        subval.append(F.tensor(subx))
        subidx.append(F.tensor(np.expand_dims(ai[:k], 0)))
    print(F.cat(subval, dim=0))
    assert F.allclose(val, F.cat(subval, dim=0))
    assert F.allclose(indices, F.cat(subidx, dim=0))
Esempio n. 7
0
def test_to_block(index_dtype):
    def check(g, bg, ntype, etype, dst_nodes, include_dst_in_src=True):
        if dst_nodes is not None:
            assert F.array_equal(bg.dstnodes[ntype].data[dgl.NID], dst_nodes)
        n_dst_nodes = bg.number_of_nodes('DST/' + ntype)
        if include_dst_in_src:
            assert F.array_equal(
                bg.srcnodes[ntype].data[dgl.NID][:n_dst_nodes],
                bg.dstnodes[ntype].data[dgl.NID])

        g = g[etype]
        bg = bg[etype]
        induced_src = bg.srcdata[dgl.NID]
        induced_dst = bg.dstdata[dgl.NID]
        induced_eid = bg.edata[dgl.EID]
        bg_src, bg_dst = bg.all_edges(order='eid')
        src_ans, dst_ans = g.all_edges(order='eid')

        induced_src_bg = F.gather_row(induced_src, bg_src)
        induced_dst_bg = F.gather_row(induced_dst, bg_dst)
        induced_src_ans = F.gather_row(src_ans, induced_eid)
        induced_dst_ans = F.gather_row(dst_ans, induced_eid)

        assert F.array_equal(induced_src_bg, induced_src_ans)
        assert F.array_equal(induced_dst_bg, induced_dst_ans)

    def checkall(g, bg, dst_nodes, include_dst_in_src=True):
        for etype in g.etypes:
            ntype = g.to_canonical_etype(etype)[2]
            if dst_nodes is not None and ntype in dst_nodes:
                check(g, bg, ntype, etype, dst_nodes[ntype],
                      include_dst_in_src)
            else:
                check(g, bg, ntype, etype, None, include_dst_in_src)

    g = dgl.heterograph(
        {
            ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)],
            ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)],
            ('B', 'BA', 'A'): [(2, 3), (3, 2)]
        },
        index_dtype=index_dtype)
    g_a = g['AA']

    bg = dgl.to_block(g_a)
    check(g_a, bg, 'A', 'AA', None)
    assert bg.number_of_src_nodes() == 5
    assert bg.number_of_dst_nodes() == 4

    bg = dgl.to_block(g_a, include_dst_in_src=False)
    check(g_a, bg, 'A', 'AA', None, False)
    assert bg.number_of_src_nodes() == 4
    assert bg.number_of_dst_nodes() == 4

    dst_nodes = F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype))
    bg = dgl.to_block(g_a, dst_nodes)
    check(g_a, bg, 'A', 'AA', dst_nodes)

    g_ab = g['AB']

    bg = dgl.to_block(g_ab)
    assert bg._idtype_str == index_dtype
    assert bg.number_of_nodes('SRC/B') == 4
    assert F.array_equal(bg.srcnodes['B'].data[dgl.NID],
                         bg.dstnodes['B'].data[dgl.NID])
    assert bg.number_of_nodes('DST/A') == 0
    checkall(g_ab, bg, None)

    dst_nodes = {'B': F.tensor([5, 6, 3, 1], dtype=getattr(F, index_dtype))}
    bg = dgl.to_block(g, dst_nodes)
    assert bg.number_of_nodes('SRC/B') == 4
    assert F.array_equal(bg.srcnodes['B'].data[dgl.NID],
                         bg.dstnodes['B'].data[dgl.NID])
    assert bg.number_of_nodes('DST/A') == 0
    checkall(g, bg, dst_nodes)

    dst_nodes = {
        'A': F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype)),
        'B': F.tensor([3, 5, 6, 1], dtype=getattr(F, index_dtype))
    }
    bg = dgl.to_block(g, dst_nodes=dst_nodes)
    checkall(g, bg, dst_nodes)
Esempio n. 8
0
def test_random_walk():
    g1 = dgl.heterograph({
        ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
        })
    g2 = dgl.heterograph({
        ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
        })
    g3 = dgl.heterograph({
        ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0]),
        ('user', 'view', 'item'): ([0, 1, 2], [0, 1, 2]),
        ('item', 'viewed-by', 'user'): ([0, 1, 2], [0, 1, 2])})
    g4 = dgl.heterograph({
        ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
        ('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
        ('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3])})

    g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
    g2.edata['p2'] = F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32)
    g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
    g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32)

    traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, return_eids=True)
    check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
    traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0., return_eids=True)
    check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
    traces, ntypes = dgl.sampling.random_walk(
        g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=F.zeros((4,), F.float32, F.cpu()))
    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
    traces, ntypes = dgl.sampling.random_walk(
        g1, [0, 1, 2, 0, 1, 2], length=5,
        restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32))
    check_random_walk(
        g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5))
    assert (F.asnumpy(traces)[:, 5] == -1).all()

    traces, eids, ntypes = dgl.sampling.random_walk(
        g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, return_eids=True)
    check_random_walk(g2, ['follow'] * 4, traces, ntypes, trace_eids=eids)

    traces, eids, ntypes = dgl.sampling.random_walk(
        g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p', return_eids=True)
    check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)

    try:
        traces, ntypes = dgl.sampling.random_walk(
            g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p2')
        fail = False
    except dgl.DGLError:
        fail = True
    assert fail

    metapath = ['follow', 'view', 'viewed-by'] * 2
    traces, eids, ntypes = dgl.sampling.random_walk(
        g3, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True)
    check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids)

    metapath = ['follow', 'view', 'viewed-by'] * 2
    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, return_eids=True)
    check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)

    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True)
    check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)

    metapath = ['follow', 'view', 'viewed-by'] * 2
    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', return_eids=True)
    check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0., return_eids=True)
    check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p',
        restart_prob=F.zeros((6,), F.float32, F.cpu()), return_eids=True)
    check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath + ['follow'], prob='p',
        restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True)
    check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p', trace_eids=eids)
    assert (F.asnumpy(traces[:, 7]) == -1).all()
Esempio n. 9
0
def test_multi_recv():
    # basic recv test
    g = generate_graph()
    h = g.ndata['h']
    g.register_message_func(message_func)
    g.register_reduce_func(reduce_func)
    g.register_apply_node_func(apply_node_func)
    expected = F.copy_to(F.zeros((g.number_of_edges(), ), dtype=F.int64),
                         F.cpu())
    # two separate round of send and recv
    u = [4, 5, 6]
    v = [9]
    g.send((u, v))
    eid = g.edge_ids(u, v)
    expected = F.asnumpy(expected)
    eid = F.asnumpy(eid)
    expected[eid] = 1
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)
    g.recv(v)
    expected[eid] = 0
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)

    u = [0]
    v = [1, 2, 3]
    g.send((u, v))
    eid = g.edge_ids(u, v)
    eid = F.asnumpy(eid)
    expected[eid] = 1
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)
    g.recv(v)
    expected[eid] = 0
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)

    h1 = g.ndata['h']

    # one send, two recv
    g.ndata['h'] = h
    u = F.tensor([0, 0, 0, 4, 5, 6])
    v = F.tensor([1, 2, 3, 9, 9, 9])
    g.send((u, v))
    eid = g.edge_ids(u, v)
    eid = F.asnumpy(eid)
    expected[eid] = 1
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)
    u = [4, 5, 6]
    v = [9]
    g.recv(v)
    eid = g.edge_ids(u, v)
    eid = F.asnumpy(eid)
    expected[eid] = 0
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)
    u = [0]
    v = [1, 2, 3]
    g.recv(v)
    eid = g.edge_ids(u, v)
    eid = F.asnumpy(eid)
    expected[eid] = 0
    assert np.array_equal(g._get_msg_index().tonumpy(), expected)

    h2 = g.ndata['h']
    assert F.allclose(h1, h2)
Esempio n. 10
0
def check_partition(g,
                    part_method,
                    reshuffle,
                    num_parts=4,
                    num_trainers_per_machine=1,
                    load_feats=True):
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10),
                                F.float32)
    g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10),
                                F.float32)
    g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
    g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
    num_hops = 2

    orig_nids, orig_eids = partition_graph(
        g,
        'test',
        num_parts,
        '/tmp/partition',
        num_hops=num_hops,
        part_method=part_method,
        reshuffle=reshuffle,
        return_mapping=True,
        num_trainers_per_machine=num_trainers_per_machine)
    part_sizes = []
    shuffled_labels = []
    shuffled_edata = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition(
            '/tmp/partition/test.json', i, load_feats=load_feats)
        if not load_feats:
            assert not node_feats
            assert not edge_feats
            node_feats, edge_feats = load_partition_feats(
                '/tmp/partition/test.json', i)
        if num_trainers_per_machine > 1:
            for ntype in g.ntypes:
                name = ntype + '/trainer_id'
                assert name in node_feats
                part_ids = F.floor_div(node_feats[name],
                                       num_trainers_per_machine)
                assert np.all(F.asnumpy(part_ids) == i)

            for etype in g.etypes:
                name = etype + '/trainer_id'
                assert name in edge_feats
                part_ids = F.floor_div(edge_feats[name],
                                       num_trainers_per_machine)
                assert np.all(F.asnumpy(part_ids) == i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        nid = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])
        local_nid = gpb.nid2localnid(nid, i)
        assert F.dtype(local_nid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        eid = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])
        local_eid = gpb.eid2localeid(eid, i)
        assert F.dtype(local_eid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.boolean_mask(part_g.ndata[dgl.NID],
                                     part_g.ndata['inner_node'])
        llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nodes1 = gpb.partid2nids(i)
        assert F.dtype(local_nodes1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(
                local_nodes1)))
        assert np.all(F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)))

        # Check the edge map.
        local_edges = F.boolean_mask(part_g.edata[dgl.EID],
                                     part_g.edata['inner_edge'])
        llocal_edges = F.nonzero_1d(part_g.edata['inner_edge'])
        local_edges1 = gpb.partid2eids(i)
        assert F.dtype(local_edges1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(
                local_edges1)))
        assert np.all(F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)))

        # Verify the mapping between the reshuffled IDs and the original IDs.
        part_src_ids, part_dst_ids = part_g.edges()
        part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids)
        part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids)
        part_eids = part_g.edata[dgl.EID]
        orig_src_ids = F.gather_row(orig_nids, part_src_ids)
        orig_dst_ids = F.gather_row(orig_nids, part_dst_ids)
        orig_eids1 = F.gather_row(orig_eids, part_eids)
        orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids)
        assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0]
        assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))

        if reshuffle:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata['orig_id'])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata['orig_id'])
            # when we read node data from the original global graph, we should use orig_id.
            local_nodes = F.boolean_mask(part_g.ndata['orig_id'],
                                         part_g.ndata['inner_node'])
            local_edges = F.boolean_mask(part_g.edata['orig_id'],
                                         part_g.edata['inner_edge'])
        else:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata[dgl.NID])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata[dgl.NID])

        part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
        part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
        assert F.allclose(F.gather_row(g.ndata['h'], local_nodes),
                          F.gather_row(part_g.ndata['h'], llocal_nodes))
        assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes),
                          F.gather_row(part_g.ndata['eh'], llocal_nodes))

        for name in ['labels', 'feats']:
            assert '_N/' + name in node_feats
            assert node_feats['_N/' + name].shape[0] == len(local_nodes)
            true_feats = F.gather_row(g.ndata[name], local_nodes)
            ndata = F.gather_row(node_feats['_N/' + name], local_nid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata))
        for name in ['feats']:
            assert '_E/' + name in edge_feats
            assert edge_feats['_E/' + name].shape[0] == len(local_edges)
            true_feats = F.gather_row(g.edata[name], local_edges)
            edata = F.gather_row(edge_feats['_E/' + name], local_eid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata))

        # This only works if node/edge IDs are shuffled.
        if reshuffle:
            shuffled_labels.append(node_feats['_N/labels'])
            shuffled_edata.append(edge_feats['_E/feats'])

    # Verify that we can reconstruct node/edge data for original IDs.
    if reshuffle:
        shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
        shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
        orig_labels = np.zeros(shuffled_labels.shape,
                               dtype=shuffled_labels.dtype)
        orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
        orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
        orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
        assert np.all(orig_labels == F.asnumpy(g.ndata['labels']))
        assert np.all(orig_edata == F.asnumpy(g.edata['feats']))

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
        assert F.dtype(nid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(nid2pid) == node_map)
        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
        assert F.dtype(eid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(eid2pid) == edge_map)
Esempio n. 11
0
    def _test_one(g):
        assert g.number_of_nodes() == 10
        assert g.number_of_edges() == 20
        assert len(g) == 10
        assert not g.is_multigraph

        for i in range(10):
            assert g.has_node(i)
            assert i in g
        assert not g.has_node(11)
        assert not 11 in g
        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))

        src, dst = edge_pair_input()
        for u, v in zip(src, dst):
            assert g.has_edge_between(u, v)
        assert not g.has_edge_between(0, 0)
        assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]),
                          F.tensor([0, 1, 1]))
        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
        assert set(F.asnumpy(g.successors(2))) == set([7, 3])

        assert g.edge_id(4, 4) == 5
        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0]))

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([5, 7, 4]))
        assert F.allclose(dst, F.tensor([9, 9, 4]))

        src, dst, eid = g.in_edges(9, form='all')
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)])
        src, dst, eid = g.in_edges([9, 0, 8],
                                   form='all')  # test node#0 has no in edges
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7),
                                (3, 8, 9), (7, 8, 12)])

        src, dst, eid = g.out_edges(0, form='all')
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)])
        src, dst, eid = g.out_edges([0, 4, 8],
                                    form='all')  # test node#8 has no out edges
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4), (4, 3, 2),
                                (4, 4, 5), (4, 9, 7), (4, 1, 8)])

        src, dst, eid = g.edges('all', 'eid')
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

        src, dst, eid = g.edges('all', 'srcdst')
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))

        assert g.in_degree(0) == 0
        assert g.in_degree(9) == 4
        assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
        assert g.out_degree(8) == 0
        assert g.out_degree(9) == 1
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

        assert np.array_equal(
            F.sparse_to_numpy(g.adjacency_matrix(transpose=False)),
            scipy_coo_input().toarray().T)
        assert np.array_equal(
            F.sparse_to_numpy(g.adjacency_matrix(transpose=True)),
            scipy_coo_input().toarray())
Esempio n. 12
0
def check_hetero_partition(hg,
                           part_method,
                           num_parts=4,
                           num_trainers_per_machine=1,
                           load_feats=True):
    hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1'))
    hg.nodes['n1'].data['feats'] = F.tensor(
        np.random.randn(hg.number_of_nodes('n1'), 10), F.float32)
    hg.edges['r1'].data['feats'] = F.tensor(
        np.random.randn(hg.number_of_edges('r1'), 10), F.float32)
    hg.edges['r1'].data['labels'] = F.arange(0, hg.number_of_edges('r1'))
    num_hops = 1

    orig_nids, orig_eids = partition_graph(
        hg,
        'test',
        num_parts,
        '/tmp/partition',
        num_hops=num_hops,
        part_method=part_method,
        reshuffle=True,
        return_mapping=True,
        num_trainers_per_machine=num_trainers_per_machine)
    assert len(orig_nids) == len(hg.ntypes)
    assert len(orig_eids) == len(hg.etypes)
    for ntype in hg.ntypes:
        assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype)
    for etype in hg.etypes:
        assert len(orig_eids[etype]) == hg.number_of_edges(etype)
    parts = []
    shuffled_labels = []
    shuffled_elabels = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition(
            '/tmp/partition/test.json', i, load_feats=load_feats)
        if not load_feats:
            assert not node_feats
            assert not edge_feats
            node_feats, edge_feats = load_partition_feats(
                '/tmp/partition/test.json', i)
        if num_trainers_per_machine > 1:
            for ntype in hg.ntypes:
                name = ntype + '/trainer_id'
                assert name in node_feats
                part_ids = F.floor_div(node_feats[name],
                                       num_trainers_per_machine)
                assert np.all(F.asnumpy(part_ids) == i)

            for etype in hg.etypes:
                name = etype + '/trainer_id'
                assert name in edge_feats
                part_ids = F.floor_div(edge_feats[name],
                                       num_trainers_per_machine)
                assert np.all(F.asnumpy(part_ids) == i)
        # Verify the mapping between the reshuffled IDs and the original IDs.
        # These are partition-local IDs.
        part_src_ids, part_dst_ids = part_g.edges()
        # These are reshuffled global homogeneous IDs.
        part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids)
        part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids)
        part_eids = part_g.edata[dgl.EID]
        # These are reshuffled per-type IDs.
        src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids)
        dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids)
        etype_ids, part_eids = gpb.map_to_per_etype(part_eids)
        # These are original per-type IDs.
        for etype_id, etype in enumerate(hg.etypes):
            part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id)
            src_ntype_ids1 = F.boolean_mask(src_ntype_ids,
                                            etype_ids == etype_id)
            part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id)
            dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids,
                                            etype_ids == etype_id)
            part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id)
            assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0]))
            assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0]))
            src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])]
            dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])]
            orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1)
            orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1)
            orig_eids1 = F.gather_row(orig_eids[etype], part_eids1)
            orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype)
            assert len(orig_eids1) == len(orig_eids2)
            assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))
        parts.append(part_g)
        verify_graph_feats(hg, gpb, part_g, node_feats, edge_feats)

        shuffled_labels.append(node_feats['n1/labels'])
        shuffled_elabels.append(edge_feats['r1/labels'])
    verify_hetero_graph(hg, parts)

    shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
    shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0))
    orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype)
    orig_elabels = np.zeros(shuffled_elabels.shape,
                            dtype=shuffled_elabels.dtype)
    orig_labels[F.asnumpy(orig_nids['n1'])] = shuffled_labels
    orig_elabels[F.asnumpy(orig_eids['r1'])] = shuffled_elabels
    assert np.all(orig_labels == F.asnumpy(hg.nodes['n1'].data['labels']))
    assert np.all(orig_elabels == F.asnumpy(hg.edges['r1'].data['labels']))
Esempio n. 13
0
def check_weighted_negative_sampler(mode, exclude_positive, neg_size):
    g = generate_rand_graph(100)
    num_edges = g.number_of_edges()
    num_nodes = g.number_of_nodes()
    edge_weight = F.copy_to(
        F.tensor(np.full((num_edges, ), 1, dtype=np.float32)), F.cpu())
    node_weight = F.copy_to(
        F.tensor(np.full((num_nodes, ), 1, dtype=np.float32)), F.cpu())
    etype = np.random.randint(0, 10, size=num_edges, dtype=np.int64)
    g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu())

    pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid')
    pos_map = {}
    for i in range(len(pos_geid)):
        pos_d = int(F.asnumpy(pos_gdst[i]))
        pos_e = int(F.asnumpy(pos_geid[i]))
        pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i]))
    EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')

    # Correctness check
    # Test the homogeneous graph.
    batch_size = 50
    # Test the knowledge graph with edge weight provied.
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            reset=False,
                                            edge_weight=edge_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all',
                                                           order='eid')
        assert_array_equal(
            F.asnumpy(F.gather_row(pos_edges.parent_eid, pos_leid)),
            F.asnumpy(
                g.edge_ids(F.gather_row(pos_edges.parent_nid, pos_lsrc),
                           F.gather_row(pos_edges.parent_nid, pos_ldst))))
        neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all',
                                                           order='eid')

        neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc)
        neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst)
        neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid)
        for i in range(len(neg_eid)):
            neg_d = int(F.asnumpy(neg_dst[i]))
            neg_e = int(F.asnumpy(neg_eid[i]))
            assert (neg_d, neg_e) in pos_map
            if exclude_positive:
                assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)]

        check_head_tail(neg_edges)
        pos_tails = F.gather_row(pos_edges.parent_nid, pos_edges.tail_nid)
        neg_tails = F.gather_row(neg_edges.parent_nid, neg_edges.tail_nid)
        pos_tails = np.sort(F.asnumpy(pos_tails))
        neg_tails = np.sort(F.asnumpy(neg_tails))
        np.testing.assert_equal(pos_tails, neg_tails)

        exist = neg_edges.edata['false_neg']
        if exclude_positive:
            assert np.sum(F.asnumpy(exist) == 0) == len(exist)
        else:
            assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist)
        total_samples += batch_size
    assert total_samples <= num_edges

    # Test the knowledge graph with edge weight provied.
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            reset=False,
                                            edge_weight=edge_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            relations=g.edata['etype'],
                                            return_false_neg=True):
        neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all',
                                                           order='eid')
        neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc)
        neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst)
        neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid)
        exists = neg_edges.edata['false_neg']
        neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid)
        for i in range(len(neg_eid)):
            u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i])
            if g.has_edge_between(u, v):
                eid = g.edge_id(u, v)
                etype = g.edata['etype'][eid]
                exist = neg_edges.edata['etype'][i] == etype
                assert F.asnumpy(exists[i]) == F.asnumpy(exist)
        total_samples += batch_size
    assert total_samples <= num_edges

    # Test the knowledge graph with edge/node weight provied.
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            reset=False,
                                            edge_weight=edge_weight,
                                            node_weight=node_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            relations=g.edata['etype'],
                                            return_false_neg=True):
        neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all',
                                                           order='eid')
        neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc)
        neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst)
        neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid)
        exists = neg_edges.edata['false_neg']
        neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid)
        for i in range(len(neg_eid)):
            u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i])
            if g.has_edge_between(u, v):
                eid = g.edge_id(u, v)
                etype = g.edata['etype'][eid]
                exist = neg_edges.edata['etype'][i] == etype
                assert F.asnumpy(exists[i]) == F.asnumpy(exist)
        total_samples += batch_size
    assert total_samples <= num_edges

    # check replacement = True with pos edges no-uniform sample
    # with reset = False
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=True,
                                            reset=False,
                                            edge_weight=edge_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) == batch_size
        total_samples += len(pos_leid)
    assert total_samples == num_edges

    # check replacement = True with pos edges no-uniform sample
    # with reset = True
    total_samples = 0
    max_samples = 4 * num_edges
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=True,
                                            reset=True,
                                            edge_weight=edge_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) == batch_size
        total_samples += len(pos_leid)
        if total_samples >= max_samples:
            break
    assert total_samples == max_samples

    # check replacement = False with pos/neg edges no-uniform sample
    # reset = False
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=False,
                                            reset=False,
                                            edge_weight=edge_weight,
                                            node_weight=node_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            relations=g.edata['etype'],
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) == batch_size
        total_samples += len(pos_leid)
    assert total_samples == num_edges

    # check replacement = False with pos/neg edges no-uniform sample
    # reset = True
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=False,
                                            reset=True,
                                            edge_weight=edge_weight,
                                            node_weight=node_weight,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            relations=g.edata['etype'],
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) == batch_size
        total_samples += len(pos_leid)
        if total_samples >= max_samples:
            break
    assert total_samples == max_samples

    # Check Rate
    dgl.random.seed(0)
    g = generate_rand_graph(1000)
    num_edges = g.number_of_edges()
    num_nodes = g.number_of_nodes()
    edge_weight = F.copy_to(
        F.tensor(np.full((num_edges, ), 1, dtype=np.float32)), F.cpu())
    edge_weight[0] = F.sum(edge_weight, dim=0)
    node_weight = F.copy_to(
        F.tensor(np.full((num_nodes, ), 1, dtype=np.float32)), F.cpu())
    node_weight[-1] = F.sum(node_weight, dim=0) / 200
    etype = np.random.randint(0, 20, size=num_edges, dtype=np.int64)
    g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu())

    # Test w/o node weight.
    max_samples = num_edges // 5
    total_samples = 0
    # Test the knowledge graph with edge weight provied.
    edge_sampled = np.full((num_edges, ), 0, dtype=np.int32)
    node_sampled = np.full((num_nodes, ), 0, dtype=np.int32)
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=True,
                                            edge_weight=edge_weight,
                                            shuffle=True,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=False,
                                            relations=g.edata['etype'],
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        neg_lsrc, neg_ldst, _ = neg_edges.all_edges(form='all', order='eid')
        if 'head' in mode:
            neg_src = neg_edges.parent_nid[neg_lsrc]
            np.add.at(node_sampled, F.asnumpy(neg_src), 1)
        else:
            neg_dst = neg_edges.parent_nid[neg_ldst]
            np.add.at(node_sampled, F.asnumpy(neg_dst), 1)
        np.add.at(edge_sampled, F.asnumpy(pos_edges.parent_eid[pos_leid]), 1)
        total_samples += batch_size
        if total_samples > max_samples:
            break

    # Check rate here
    edge_rate_0 = edge_sampled[0] / edge_sampled.sum()
    edge_tail_half_cnt = edge_sampled[edge_sampled.shape[0] // 2:-1].sum()
    edge_rate_tail_half = edge_tail_half_cnt / edge_sampled.sum()
    assert np.allclose(edge_rate_0, 0.5, atol=0.05)
    assert np.allclose(edge_rate_tail_half, 0.25, atol=0.05)

    node_rate_0 = node_sampled[0] / node_sampled.sum()
    node_tail_half_cnt = node_sampled[node_sampled.shape[0] // 2:-1].sum()
    node_rate_tail_half = node_tail_half_cnt / node_sampled.sum()
    assert node_rate_0 < 0.02
    assert np.allclose(node_rate_tail_half, 0.5, atol=0.02)

    # Test the knowledge graph with edge/node weight provied.
    edge_sampled = np.full((num_edges, ), 0, dtype=np.int32)
    node_sampled = np.full((num_nodes, ), 0, dtype=np.int32)
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=True,
                                            edge_weight=edge_weight,
                                            node_weight=node_weight,
                                            shuffle=True,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=False,
                                            relations=g.edata['etype'],
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        neg_lsrc, neg_ldst, _ = neg_edges.all_edges(form='all', order='eid')
        if 'head' in mode:
            neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc)
            np.add.at(node_sampled, F.asnumpy(neg_src), 1)
        else:
            neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst)
            np.add.at(node_sampled, F.asnumpy(neg_dst), 1)
        np.add.at(edge_sampled, F.asnumpy(pos_edges.parent_eid[pos_leid]), 1)
        total_samples += batch_size
        if total_samples > max_samples:
            break

    # Check rate here
    edge_rate_0 = edge_sampled[0] / edge_sampled.sum()
    edge_tail_half_cnt = edge_sampled[edge_sampled.shape[0] // 2:-1].sum()
    edge_rate_tail_half = edge_tail_half_cnt / edge_sampled.sum()
    assert np.allclose(edge_rate_0, 0.5, atol=0.05)
    assert np.allclose(edge_rate_tail_half, 0.25, atol=0.05)

    node_rate = node_sampled[-1] / node_sampled.sum()
    node_rate_a = np.average(node_sampled[:50]) / node_sampled.sum()
    node_rate_b = np.average(node_sampled[50:100]) / node_sampled.sum()
    # As neg sampling does not contain duplicate nodes,
    # this test takes some acceptable variation on the sample rate.
    assert np.allclose(node_rate, node_rate_a * 5, atol=0.002)
    assert np.allclose(node_rate_a, node_rate_b, atol=0.0002)
Esempio n. 14
0
def check_negative_sampler(mode, exclude_positive, neg_size):
    g = generate_rand_graph(100)
    num_edges = g.number_of_edges()
    etype = np.random.randint(0, 10, size=g.number_of_edges(), dtype=np.int64)
    g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu())

    pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid')
    pos_map = {}
    for i in range(len(pos_geid)):
        pos_d = int(F.asnumpy(pos_gdst[i]))
        pos_e = int(F.asnumpy(pos_geid[i]))
        pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i]))

    EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler')
    # Test the homogeneous graph.

    batch_size = 50
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            negative_mode=mode,
                                            reset=False,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all',
                                                           order='eid')
        assert_array_equal(
            F.asnumpy(F.gather_row(pos_edges.parent_eid, pos_leid)),
            F.asnumpy(
                g.edge_ids(F.gather_row(pos_edges.parent_nid, pos_lsrc),
                           F.gather_row(pos_edges.parent_nid, pos_ldst))))

        neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all',
                                                           order='eid')

        neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc)
        neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst)
        neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid)
        for i in range(len(neg_eid)):
            neg_d = int(F.asnumpy(neg_dst)[i])
            neg_e = int(F.asnumpy(neg_eid)[i])
            assert (neg_d, neg_e) in pos_map
            if exclude_positive:
                assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)]

        check_head_tail(neg_edges)
        pos_tails = F.gather_row(pos_edges.parent_nid, pos_edges.tail_nid)
        neg_tails = F.gather_row(neg_edges.parent_nid, neg_edges.tail_nid)
        pos_tails = np.sort(F.asnumpy(pos_tails))
        neg_tails = np.sort(F.asnumpy(neg_tails))
        np.testing.assert_equal(pos_tails, neg_tails)

        exist = neg_edges.edata['false_neg']
        if exclude_positive:
            assert np.sum(F.asnumpy(exist) == 0) == len(exist)
        else:
            assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist)
        total_samples += batch_size
    assert total_samples <= num_edges

    # check replacement = True
    # with reset = False (default setting)
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=True,
                                            reset=False,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) == batch_size
        total_samples += len(pos_leid)
    assert total_samples == num_edges

    # check replacement = False
    # with reset = False (default setting)
    total_samples = 0
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=False,
                                            reset=False,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) == batch_size
        total_samples += len(pos_leid)
    assert total_samples == num_edges

    # check replacement = True
    # with reset = True
    total_samples = 0
    max_samples = 2 * num_edges
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=True,
                                            reset=True,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) <= batch_size
        total_samples += len(pos_leid)
        if (total_samples >= max_samples):
            break
    assert total_samples >= max_samples

    # check replacement = False
    # with reset = True
    total_samples = 0
    max_samples = 2 * num_edges
    for pos_edges, neg_edges in EdgeSampler(g,
                                            batch_size,
                                            replacement=False,
                                            reset=True,
                                            negative_mode=mode,
                                            neg_sample_size=neg_size,
                                            exclude_positive=exclude_positive,
                                            return_false_neg=True):
        _, _, pos_leid = pos_edges.all_edges(form='all', order='eid')
        assert len(pos_leid) <= batch_size
        total_samples += len(pos_leid)
        if (total_samples >= max_samples):
            break
    assert total_samples >= max_samples

    # Test the knowledge graph.
    total_samples = 0
    for _, neg_edges in EdgeSampler(g,
                                    batch_size,
                                    negative_mode=mode,
                                    reset=False,
                                    neg_sample_size=neg_size,
                                    exclude_positive=exclude_positive,
                                    relations=g.edata['etype'],
                                    return_false_neg=True):
        neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all',
                                                           order='eid')
        neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc)
        neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst)
        neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid)
        exists = neg_edges.edata['false_neg']
        neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid)
        for i in range(len(neg_eid)):
            u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i])
            if g.has_edge_between(u, v):
                eid = g.edge_id(u, v)
                etype = g.edata['etype'][eid]
                exist = neg_edges.edata['etype'][i] == etype
                assert F.asnumpy(exists[i]) == F.asnumpy(exist)
        total_samples += batch_size
    assert total_samples <= num_edges
Esempio n. 15
0
def test_empty_relation(idtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([], [])
        },
        idtype=idtype,
        device=F.ctx())
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])

    g2 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([0, 1], [0, 0])
        },
        idtype=idtype,
        device=F.ctx())
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    bg = dgl.batch([g1, g2])

    # Test number of nodes
    for ntype in bg.ntypes:
        assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [
            g1.number_of_nodes(ntype),
            g2.number_of_nodes(ntype)
        ]

    # Test number of edges
    for etype in bg.canonical_etypes:
        assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [
            g1.number_of_edges(etype),
            g2.number_of_edges(etype)
        ]

    # Test features
    assert F.allclose(
        bg.nodes['user'].data['h1'],
        F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1'])
    assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2'])
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(bg.edges['plays'].data['h1'],
                      g2.edges['plays'].data['h1'])

    # Test unbatching graphs
    g3, g4 = dgl.unbatch(bg)
    check_equivalence_between_heterographs(g1,
                                           g3,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
    check_equivalence_between_heterographs(g2,
                                           g4,
                                           node_attrs={
                                               'user': ['h1', 'h2'],
                                               'game': ['h1', 'h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })

    # Test graphs without edges
    g1 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 0, 'v': 4})
    g2 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 1, 'v': 5})
    dgl.batch([g1, g2])
Esempio n. 16
0
 def assert_data(lhs, rhs):
     for key, value in lhs.items():
         assert key in rhs
         assert F.array_equal(F.tensor(value), rhs[key])
Esempio n. 17
0
def test_pickling_graph():
    # graph structures and frames are pickled
    g = dgl.DGLGraph()
    g.add_nodes(3)
    src = F.tensor([0, 0])
    dst = F.tensor([1, 2])
    g.add_edges(src, dst)

    x = F.randn((3, 7))
    y = F.randn((3, 5))
    a = F.randn((2, 6))
    b = F.randn((2, 4))

    g.ndata['x'] = x
    g.ndata['y'] = y
    g.edata['a'] = a
    g.edata['b'] = b

    # registered functions are pickled
    g.register_message_func(_global_message_func)
    reduce_func = fn.sum('x', 'x')
    g.register_reduce_func(reduce_func)

    # custom attributes should be pickled
    g.foo = 2

    new_g = _reconstruct_pickle(g)

    _assert_is_identical(g, new_g)
    assert new_g.foo == 2
    assert new_g._message_func == _global_message_func
    assert isinstance(new_g._reduce_func, type(reduce_func))
    assert new_g._reduce_func._name == 'sum'
    assert new_g._reduce_func.msg_field == 'x'
    assert new_g._reduce_func.out_field == 'x'

    # test batched graph with partial set case
    g2 = dgl.DGLGraph()
    g2.add_nodes(4)
    src2 = F.tensor([0, 1])
    dst2 = F.tensor([2, 3])
    g2.add_edges(src2, dst2)

    x2 = F.randn((4, 7))
    y2 = F.randn((3, 5))
    a2 = F.randn((2, 6))
    b2 = F.randn((2, 4))

    g2.ndata['x'] = x2
    g2.nodes[[0, 1, 3]].data['y'] = y2
    g2.edata['a'] = a2
    g2.edata['b'] = b2

    bg = dgl.batch([g, g2])

    bg2 = _reconstruct_pickle(bg)

    _assert_is_identical(bg, bg2)
    new_g, new_g2 = dgl.unbatch(bg2)
    _assert_is_identical(g, new_g)
    _assert_is_identical(g2, new_g2)

    # readonly graph
    g = dgl.DGLGraph([(0, 1), (1, 2)], readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # multigraph
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)])
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # readonly multigraph
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)
Esempio n. 18
0
def _test_construct_graphs_multiple():
    from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData, DGLGraphConstructor
    num_nodes = 100
    num_edges = 1000
    num_graphs = 10
    num_dims = 3
    node_ids = np.array([], dtype=np.int)
    src_ids = np.array([], dtype=np.int)
    dst_ids = np.array([], dtype=np.int)
    ngraph_ids = np.array([], dtype=np.int)
    egraph_ids = np.array([], dtype=np.int)
    u_indices = np.array([], dtype=np.int)
    for i in range(num_graphs):
        l_node_ids = np.random.choice(
            np.arange(num_nodes*2), size=num_nodes, replace=False)
        node_ids = np.append(node_ids, l_node_ids)
        _, l_u_indices = np.unique(l_node_ids, return_index=True)
        u_indices = np.append(u_indices, l_u_indices)
        ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i))
        src_ids = np.append(src_ids, np.random.choice(
            l_node_ids, size=num_edges))
        dst_ids = np.append(dst_ids, np.random.choice(
            l_node_ids, size=num_edges))
        egraph_ids = np.append(egraph_ids, np.full(num_edges, i))
    ndata = {'feat': np.random.rand(num_nodes*num_graphs, num_dims),
             'label': np.random.randint(2, size=num_nodes*num_graphs)}
    node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids)
    edata = {'feat': np.random.rand(
        num_edges*num_graphs, num_dims), 'label': np.random.randint(2, size=num_edges*num_graphs)}
    edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids)
    gdata = {'feat': np.random.rand(num_graphs, num_dims),
             'label': np.random.randint(2, size=num_graphs)}
    graph_data = GraphData(np.arange(num_graphs), gdata)
    graphs, data_dict = DGLGraphConstructor.construct_graphs(
        node_data, edge_data, graph_data)
    assert len(graphs) == num_graphs
    assert len(data_dict) == len(gdata)
    for k, v in data_dict.items():
        assert F.array_equal(F.tensor(gdata[k]), v)
    for i, g in enumerate(graphs):
        assert g.is_homogeneous
        assert g.num_nodes() == num_nodes
        assert g.num_edges() == num_edges

        def assert_data(lhs, rhs, size, node=False):
            for key, value in lhs.items():
                assert key in rhs
                value = value[i*size:(i+1)*size]
                if node:
                    indices = u_indices[i*size:(i+1)*size]
                    value = value[indices]
                assert F.array_equal(F.tensor(value), rhs[key])
        assert_data(ndata, g.ndata, num_nodes, node=True)
        assert_data(edata, g.edata, num_edges)

    # Graph IDs found in node/edge CSV but not in graph CSV
    graph_data = GraphData(np.arange(num_graphs-2), {})
    expect_except = False
    try:
        _, _ = DGLGraphConstructor.construct_graphs(
            node_data, edge_data, graph_data)
    except:
        expect_except = True
    assert expect_except
Esempio n. 19
0
def test_reverse():
    g = dgl.DGLGraph()
    g.add_nodes(5)
    # The graph need not to be completely connected.
    g.add_edges([0, 1, 2], [1, 2, 1])
    g.ndata['h'] = F.tensor([[0.], [1.], [2.], [3.], [4.]])
    g.edata['h'] = F.tensor([[5.], [6.], [7.]])
    rg = g.reverse()

    assert g.is_multigraph == rg.is_multigraph

    assert g.number_of_nodes() == rg.number_of_nodes()
    assert g.number_of_edges() == rg.number_of_edges()
    assert F.allclose(
        F.astype(rg.has_edges_between([1, 2, 1], [0, 1, 2]), F.float32),
        F.ones((3, )))
    assert g.edge_id(0, 1) == rg.edge_id(1, 0)
    assert g.edge_id(1, 2) == rg.edge_id(2, 1)
    assert g.edge_id(2, 1) == rg.edge_id(1, 2)

    # test dgl.reverse_heterograph
    # test homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 2]), F.tensor([1, 2, 0])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.]])
    g_r = dgl.reverse_heterograph(g)
    assert g.number_of_nodes() == g_r.number_of_nodes()
    assert g.number_of_edges() == g_r.number_of_edges()
    u_g, v_g, eids_g = g.all_edges(form='all')
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all')
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)
    assert F.array_equal(g.ndata['h'], g_r.ndata['h'])
    assert len(g_r.edata) == 0

    # without share ndata
    g_r = dgl.reverse_heterograph(g, copy_ndata=False)
    assert g.number_of_nodes() == g_r.number_of_nodes()
    assert g.number_of_edges() == g_r.number_of_edges()
    assert len(g_r.ndata) == 0
    assert len(g_r.edata) == 0

    # with share ndata and edata
    g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True)
    assert g.number_of_nodes() == g_r.number_of_nodes()
    assert g.number_of_edges() == g_r.number_of_edges()
    assert F.array_equal(g.ndata['h'], g_r.ndata['h'])
    assert F.array_equal(g.edata['h'], g_r.edata['h'])

    # add new node feature to g_r
    g_r.ndata['hh'] = F.tensor([0, 1, 2])
    assert ('hh' in g.ndata) is False
    assert ('hh' in g_r.ndata) is True

    # add new edge feature to g_r
    g_r.edata['hh'] = F.tensor([0, 1, 2])
    assert ('hh' in g.edata) is False
    assert ('hh' in g_r.edata) is True

    # test heterogeneous graph
    g = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1, 2, 4, 3, 1,
                                       3], [1, 2, 3, 2, 0, 0, 1]),
        ('user', 'plays', 'game'): ([0, 0, 2, 3, 3, 4,
                                     1], [1, 0, 1, 0, 1, 0, 0]),
        ('developer', 'develops', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
    })
    g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    g.nodes['user'].data['hh'] = F.tensor([1, 1, 1, 1, 1])
    g.nodes['game'].data['h'] = F.tensor([0, 1])
    g.edges['follows'].data['h'] = F.tensor([0, 1, 2, 4, 3, 1, 3])
    g.edges['follows'].data['hh'] = F.tensor([1, 2, 3, 2, 0, 0, 1])
    g_r = dgl.reverse_heterograph(g)

    for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes):
        assert etype_g[0] == etype_gr[2]
        assert etype_g[1] == etype_gr[1]
        assert etype_g[2] == etype_gr[0]
        assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr)
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype)
    assert F.array_equal(g.nodes['user'].data['h'],
                         g_r.nodes['user'].data['h'])
    assert F.array_equal(g.nodes['user'].data['hh'],
                         g_r.nodes['user'].data['hh'])
    assert F.array_equal(g.nodes['game'].data['h'],
                         g_r.nodes['game'].data['h'])
    assert len(g_r.edges['follows'].data) == 0
    u_g, v_g, eids_g = g.all_edges(form='all',
                                   etype=('user', 'follows', 'user'))
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all',
                                        etype=('user', 'follows', 'user'))
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)
    u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'plays', 'game'))
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all',
                                        etype=('game', 'plays', 'user'))
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)
    u_g, v_g, eids_g = g.all_edges(form='all',
                                   etype=('developer', 'develops', 'game'))
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all',
                                        etype=('game', 'develops',
                                               'developer'))
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)

    # withour share ndata
    g_r = dgl.reverse_heterograph(g, copy_ndata=False)
    for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes):
        assert etype_g[0] == etype_gr[2]
        assert etype_g[1] == etype_gr[1]
        assert etype_g[2] == etype_gr[0]
        assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr)
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype)
    assert len(g_r.nodes['user'].data) == 0
    assert len(g_r.nodes['game'].data) == 0

    g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True)
    print(g_r)
    for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes):
        assert etype_g[0] == etype_gr[2]
        assert etype_g[1] == etype_gr[1]
        assert etype_g[2] == etype_gr[0]
        assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr)
    assert F.array_equal(g.edges['follows'].data['h'],
                         g_r.edges['follows'].data['h'])
    assert F.array_equal(g.edges['follows'].data['hh'],
                         g_r.edges['follows'].data['hh'])

    # add new node feature to g_r
    g_r.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4])
    assert ('hhh' in g.nodes['user'].data) is False
    assert ('hhh' in g_r.nodes['user'].data) is True

    # add new edge feature to g_r
    g_r.edges['follows'].data['hhh'] = F.tensor([1, 2, 3, 2, 0, 0, 1])
    assert ('hhh' in g.edges['follows'].data) is False
    assert ('hhh' in g_r.edges['follows'].data) is True
Esempio n. 20
0
def _test_DGLCSVDataset_multiple():
    with tempfile.TemporaryDirectory() as test_dir:
        # generate YAML/CSVs
        meta_yaml_path = os.path.join(test_dir, "meta.yaml")
        edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv")
        edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv")
        nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
        nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
        graph_csv_path = os.path.join(test_dir, "test_graph.csv")
        meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name',
                          'node_data': [{'file_name': os.path.basename(nodes_csv_path_0),
                                         'ntype': 'user',
                                         },
                                        {'file_name': os.path.basename(nodes_csv_path_1),
                                            'ntype': 'item',
                                         }],
                          'edge_data': [{'file_name': os.path.basename(edges_csv_path_0),
                                         'etype': ['user', 'follow', 'user'],
                                         },
                                        {'file_name': os.path.basename(edges_csv_path_1),
                                         'etype': ['user', 'like', 'item'],
                                         }],
                          'graph_data': {'file_name': os.path.basename(graph_csv_path)}
                          }
        with open(meta_yaml_path, 'w') as f:
            yaml.dump(meta_yaml_data, f, sort_keys=False)
        num_nodes = 100
        num_edges = 500
        num_graphs = 10
        num_dims = 3
        feat_ndata = np.random.rand(num_nodes*num_graphs, num_dims)
        label_ndata = np.random.randint(2, size=num_nodes*num_graphs)
        df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]),
                           'label': label_ndata,
                           'feat': [line.tolist() for line in feat_ndata],
                           'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)])
                           })
        df.to_csv(nodes_csv_path_0, index=False)
        df.to_csv(nodes_csv_path_1, index=False)
        feat_edata = np.random.rand(num_edges*num_graphs, num_dims)
        label_edata = np.random.randint(2, size=num_edges*num_graphs)
        df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]),
                           'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]),
                           'label': label_edata,
                           'feat': [line.tolist() for line in feat_edata],
                           'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)])
                           })
        df.to_csv(edges_csv_path_0, index=False)
        df.to_csv(edges_csv_path_1, index=False)
        feat_gdata = np.random.rand(num_graphs, num_dims)
        label_gdata = np.random.randint(2, size=num_graphs)
        df = pd.DataFrame({'label': label_gdata,
                           'feat': [line.tolist() for line in feat_gdata],
                           'graph_id': np.arange(num_graphs)
                           })
        df.to_csv(graph_csv_path, index=False)

        # load CSVDataset with default node/edge/graph_data_parser
        for force_reload in [True, False]:
            if not force_reload:
                # remove original node data file to verify reload from cached files
                os.remove(nodes_csv_path_0)
                assert not os.path.exists(nodes_csv_path_0)
            csv_dataset = data.DGLCSVDataset(
                test_dir, force_reload=force_reload)
            assert len(csv_dataset) == num_graphs
            assert csv_dataset.has_cache()
            assert len(csv_dataset.data) == 2
            assert 'feat' in csv_dataset.data
            assert 'label' in csv_dataset.data
            assert F.array_equal(F.tensor(feat_gdata),
                                 csv_dataset.data['feat'])
            for i, (g, label) in enumerate(csv_dataset):
                assert not g.is_homogeneous
                assert F.asnumpy(label) == label_gdata[i]
                for ntype in g.ntypes:
                    assert g.num_nodes(ntype) == num_nodes
                    assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes]),
                                         g.nodes[ntype].data['feat'])
                    assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes],
                                          F.asnumpy(g.nodes[ntype].data['label']))
                for etype in g.etypes:
                    assert g.num_edges(etype) == num_edges
                    assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges]),
                                         g.edges[etype].data['feat'])
                    assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges],
                                          F.asnumpy(g.edges[etype].data['label']))
Esempio n. 21
0
def test_remove_edges(index_dtype):
    def check(g1, etype, g, edges_removed):
        src, dst, eid = g.edges(etype=etype, form='all')
        src1, dst1 = g1.edges(etype=etype, order='eid')
        if etype is not None:
            eid1 = g1.edges[etype].data[dgl.EID]
        else:
            eid1 = g1.edata[dgl.EID]
        src1 = F.asnumpy(src1)
        dst1 = F.asnumpy(dst1)
        eid1 = F.asnumpy(eid1)
        src = F.asnumpy(src)
        dst = F.asnumpy(dst)
        eid = F.asnumpy(eid)
        sde_set = set(zip(src, dst, eid))

        for s, d, e in zip(src1, dst1, eid1):
            assert (s, d, e) in sde_set
        assert not np.isin(edges_removed, eid1).any()
        assert g1.idtype == g.idtype

    for fmt in ['coo', 'csr', 'csc']:
        for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]:
            g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)],
                          restrict_format=fmt,
                          index_dtype=index_dtype)
            g1 = dgl.remove_edges(
                g, F.tensor(edges_to_remove, getattr(F, index_dtype)))
            check(g1, None, g, edges_to_remove)

            g = dgl.graph(spsp.csr_matrix(
                ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)),
                          restrict_format=fmt,
                          index_dtype=index_dtype)
            g1 = dgl.remove_edges(
                g, F.tensor(edges_to_remove, getattr(F, index_dtype)))
            check(g1, None, g, edges_to_remove)

    g = dgl.heterograph(
        {
            ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)],
            ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)],
            ('B', 'BA', 'A'): [(2, 3), (3, 2)]
        },
        index_dtype=index_dtype)
    g2 = dgl.remove_edges(
        g, {
            'AA': F.tensor([2], getattr(F, index_dtype)),
            'AB': F.tensor([3], getattr(F, index_dtype)),
            'BA': F.tensor([1], getattr(F, index_dtype))
        })
    check(g2, 'AA', g, [2])
    check(g2, 'AB', g, [3])
    check(g2, 'BA', g, [1])

    g3 = dgl.remove_edges(
        g, {
            'AA': F.tensor([], getattr(F, index_dtype)),
            'AB': F.tensor([3], getattr(F, index_dtype)),
            'BA': F.tensor([1], getattr(F, index_dtype))
        })
    check(g3, 'AA', g, [])
    check(g3, 'AB', g, [3])
    check(g3, 'BA', g, [1])

    g4 = dgl.remove_edges(
        g, {'AB': F.tensor([3, 1, 2, 0], getattr(F, index_dtype))})
    check(g4, 'AA', g, [])
    check(g4, 'AB', g, [3, 1, 2, 0])
    check(g4, 'BA', g, [])
def rand_init(shape, dtype):
    return F.tensor(np.random.normal(size=shape), F.float32)
Esempio n. 23
0
def test_compact(index_dtype):
    g1 = dgl.heterograph(
        {
            ('user', 'follow', 'user'): [(1, 3), (3, 5)],
            ('user', 'plays', 'game'): [(2, 4), (3, 4), (2, 5)],
            ('game', 'wished-by', 'user'): [(6, 7), (5, 7)]
        }, {
            'user': 20,
            'game': 10
        },
        index_dtype=index_dtype)

    g2 = dgl.heterograph(
        {
            ('game', 'clicked-by', 'user'): [(3, 1)],
            ('user', 'likes', 'user'): [(1, 8), (8, 9)]
        }, {
            'user': 20,
            'game': 10
        },
        index_dtype=index_dtype)

    g3 = dgl.graph([(0, 1), (1, 2)],
                   num_nodes=10,
                   ntype='user',
                   index_dtype=index_dtype)
    g4 = dgl.graph([(1, 3), (3, 5)],
                   num_nodes=10,
                   ntype='user',
                   index_dtype=index_dtype)

    def _check(g, new_g, induced_nodes):
        assert g.ntypes == new_g.ntypes
        assert g.canonical_etypes == new_g.canonical_etypes

        for ntype in g.ntypes:
            assert -1 not in induced_nodes[ntype]

        for etype in g.canonical_etypes:
            g_src, g_dst = g.all_edges(order='eid', etype=etype)
            g_src = F.asnumpy(g_src)
            g_dst = F.asnumpy(g_dst)
            new_g_src, new_g_dst = new_g.all_edges(order='eid', etype=etype)
            new_g_src_mapped = induced_nodes[etype[0]][F.asnumpy(new_g_src)]
            new_g_dst_mapped = induced_nodes[etype[2]][F.asnumpy(new_g_dst)]
            assert (g_src == new_g_src_mapped).all()
            assert (g_dst == new_g_dst_mapped).all()

    # Test default
    new_g1 = dgl.compact_graphs(g1)
    induced_nodes = {
        ntype: new_g1.nodes[ntype].data[dgl.NID]
        for ntype in new_g1.ntypes
    }
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert new_g1._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7])
    assert set(induced_nodes['game']) == set([4, 5, 6])
    _check(g1, new_g1, induced_nodes)

    # Test with always_preserve given a dict
    new_g1 = dgl.compact_graphs(g1,
                                always_preserve={
                                    'game':
                                    F.tensor([4, 7],
                                             dtype=getattr(F, index_dtype))
                                })
    assert new_g1._idtype_str == index_dtype
    induced_nodes = {
        ntype: new_g1.nodes[ntype].data[dgl.NID]
        for ntype in new_g1.ntypes
    }
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7])
    assert set(induced_nodes['game']) == set([4, 5, 6, 7])
    _check(g1, new_g1, induced_nodes)

    # Test with always_preserve given a tensor
    new_g3 = dgl.compact_graphs(g3,
                                always_preserve=F.tensor([1, 7],
                                                         dtype=getattr(
                                                             F, index_dtype)))
    induced_nodes = {
        ntype: new_g3.nodes[ntype].data[dgl.NID]
        for ntype in new_g3.ntypes
    }
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}

    assert new_g3._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([0, 1, 2, 7])
    _check(g3, new_g3, induced_nodes)

    # Test multiple graphs
    new_g1, new_g2 = dgl.compact_graphs([g1, g2])
    induced_nodes = {
        ntype: new_g1.nodes[ntype].data[dgl.NID]
        for ntype in new_g1.ntypes
    }
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert new_g1._idtype_str == index_dtype
    assert new_g2._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9])
    assert set(induced_nodes['game']) == set([3, 4, 5, 6])
    _check(g1, new_g1, induced_nodes)
    _check(g2, new_g2, induced_nodes)

    # Test multiple graphs with always_preserve given a dict
    new_g1, new_g2 = dgl.compact_graphs([g1, g2],
                                        always_preserve={
                                            'game':
                                            F.tensor([4, 7],
                                                     dtype=getattr(
                                                         F, index_dtype))
                                        })
    induced_nodes = {
        ntype: new_g1.nodes[ntype].data[dgl.NID]
        for ntype in new_g1.ntypes
    }
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert new_g1._idtype_str == index_dtype
    assert new_g2._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9])
    assert set(induced_nodes['game']) == set([3, 4, 5, 6, 7])
    _check(g1, new_g1, induced_nodes)
    _check(g2, new_g2, induced_nodes)

    # Test multiple graphs with always_preserve given a tensor
    new_g3, new_g4 = dgl.compact_graphs([g3, g4],
                                        always_preserve=F.tensor(
                                            [1, 7],
                                            dtype=getattr(F, index_dtype)))
    induced_nodes = {
        ntype: new_g3.nodes[ntype].data[dgl.NID]
        for ntype in new_g3.ntypes
    }
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}

    assert new_g3._idtype_str == index_dtype
    assert new_g4._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([0, 1, 2, 3, 5, 7])
    _check(g3, new_g3, induced_nodes)
    _check(g4, new_g4, induced_nodes)
Esempio n. 24
0
def _gen_neighbor_sampling_test_graph(hypersparse, reverse):
    if hypersparse:
        # should crash if allocated a CSR
        card = 1 << 50
        card2 = (1 << 50, 1 << 50)
    else:
        card = None
        card2 = None

    if reverse:
        g = dgl.graph([(0, 1), (0, 2), (0, 3), (1, 0), (1, 2), (1, 3), (2, 0)],
                      'user',
                      'follow',
                      card=card)
        g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.],
                                   dtype=F.float32)
        g1 = dgl.bipartite([(0, 0), (1, 0), (2, 1), (2, 3)],
                           'game',
                           'play',
                           'user',
                           card=card2)
        g1.edata['prob'] = F.tensor([.8, .5, .5, .5], dtype=F.float32)
        g2 = dgl.bipartite([(0, 2), (1, 2), (2, 2), (0, 1), (3, 1), (0, 0)],
                           'user',
                           'liked-by',
                           'game',
                           card=card2)
        g2.edata['prob'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32)
        g3 = dgl.bipartite([(0, 0), (0, 1), (0, 2), (0, 3)],
                           'coin',
                           'flips',
                           'user',
                           card=card2)

        hg = dgl.hetero_from_relations([g, g1, g2, g3])
    else:
        g = dgl.graph([(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1), (0, 2)],
                      'user',
                      'follow',
                      card=card)
        g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.],
                                   dtype=F.float32)
        g1 = dgl.bipartite([(0, 0), (0, 1), (1, 2), (3, 2)],
                           'user',
                           'play',
                           'game',
                           card=card2)
        g1.edata['prob'] = F.tensor([.8, .5, .5, .5], dtype=F.float32)
        g2 = dgl.bipartite([(2, 0), (2, 1), (2, 2), (1, 0), (1, 3), (0, 0)],
                           'game',
                           'liked-by',
                           'user',
                           card=card2)
        g2.edata['prob'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32)
        g3 = dgl.bipartite([(0, 0), (1, 0), (2, 0), (3, 0)],
                           'user',
                           'flips',
                           'coin',
                           card=card2)

        hg = dgl.hetero_from_relations([g, g1, g2, g3])
    return g, hg
Esempio n. 25
0
def atest_nx_conversion(index_dtype):
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1 # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = dgl.graph([(0,2),(1,4),(3,0),(4,3)], index_dtype=index_dtype)
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'], index_dtype=index_dtype)    
    assert g._idtype_str == index_dtype
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu()))

    # test conversion after modifying DGLGraph
    # TODO(minjie): enable after mutation is supported
    #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges
    #new_n = F.randn((2, 3))
    #new_e = F.randn((3, 5))
    #g.add_nodes(2, data={'n1': new_n})
    ## add three edges, one is a multi-edge
    #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    #n1 = F.cat((n1, new_n), 0)
    #e1 = F.cat((e1, new_e), 0)
    ## convert to networkx again
    #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    #assert len(nxg) == 7
    #assert nxg.size() == 7
    #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = dgl.graph(nxg , node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)

    # Test converting from a networkx graph whose nodes are
    # not labeled with consecutive-integers.
    nxg = nx.cycle_graph(5)
    nxg.remove_nodes_from([0, 4])
    for u in nxg.nodes():
        nxg.nodes[u]['h'] = F.tensor([u])
    for u, v, d in nxg.edges(data=True):
        d['h'] = F.tensor([u, v])

    g = dgl.DGLGraph()
    g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
    assert g.number_of_nodes() == 3
    assert g.number_of_edges() == 4
    assert g.has_edge_between(0, 1)
    assert g.has_edge_between(1, 2)
    assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
    assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.],
                                              [2., 3.], [2., 3.]]))
Esempio n. 26
0
def test_random_walk():
    g1 = dgl.heterograph({
        ('user', 'follow', 'user'): [(0, 1), (1, 2), (2, 0)]
    })
    g2 = dgl.heterograph({
        ('user', 'follow', 'user'): [(0, 1), (1, 2), (1, 3), (2, 0), (3, 0)]
    })
    g3 = dgl.heterograph({
        ('user', 'follow', 'user'): [(0, 1), (1, 2), (2, 0)],
        ('user', 'view', 'item'): [(0, 0), (1, 1), (2, 2)],
        ('item', 'viewed-by', 'user'): [(0, 0), (1, 1), (2, 2)]
    })
    g4 = dgl.heterograph({
        ('user', 'follow', 'user'): [(0, 1), (1, 2), (1, 3), (2, 0), (3, 0)],
        ('user', 'view', 'item'): [(0, 0), (0, 1), (1, 1), (2, 2), (3, 2),
                                   (3, 1)],
        ('item', 'viewed-by', 'user'): [(0, 0), (1, 0), (1, 1), (2, 2), (2, 3),
                                        (1, 3)]
    })

    g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
    g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
    g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1],
                                               dtype=F.float32)

    traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4)
    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
    traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2],
                                              length=4,
                                              restart_prob=0.)
    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
    traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2],
                                              length=4,
                                              restart_prob=F.zeros((4, ),
                                                                   F.float32,
                                                                   F.cpu()))
    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
    traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2],
                                              length=5,
                                              restart_prob=F.tensor(
                                                  [0, 0, 0, 0, 1],
                                                  dtype=F.float32))
    check_random_walk(g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5),
                      F.slice_axis(ntypes, 0, 0, 5))
    assert (F.asnumpy(traces)[:, 5] == -1).all()

    traces, ntypes = dgl.sampling.random_walk(g2, [0, 1, 2, 3, 0, 1, 2, 3],
                                              length=4)
    check_random_walk(g2, ['follow'] * 4, traces, ntypes)

    traces, ntypes = dgl.sampling.random_walk(g2, [0, 1, 2, 3, 0, 1, 2, 3],
                                              length=4,
                                              prob='p')
    check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p')

    metapath = ['follow', 'view', 'viewed-by'] * 2
    traces, ntypes = dgl.sampling.random_walk(g3, [0, 1, 2, 0, 1, 2],
                                              metapath=metapath)
    check_random_walk(g3, metapath, traces, ntypes)

    metapath = ['follow', 'view', 'viewed-by'] * 2
    traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3],
                                              metapath=metapath)
    check_random_walk(g4, metapath, traces, ntypes)

    metapath = ['follow', 'view', 'viewed-by'] * 2
    traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3],
                                              metapath=metapath,
                                              prob='p')
    check_random_walk(g4, metapath, traces, ntypes, 'p')
    traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3],
                                              metapath=metapath,
                                              prob='p',
                                              restart_prob=0.)
    check_random_walk(g4, metapath, traces, ntypes, 'p')
    traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3],
                                              metapath=metapath,
                                              prob='p',
                                              restart_prob=F.zeros((6, ),
                                                                   F.float32,
                                                                   F.cpu()))
    check_random_walk(g4, metapath, traces, ntypes, 'p')
    traces, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3],
        metapath=metapath + ['follow'],
        prob='p',
        restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32))
    check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p')
    assert (F.asnumpy(traces[:, 7]) == -1).all()
Esempio n. 27
0
 def foo(g):
     with g.local_scope():
         g.nodes[0].data['h'] = F.ones((1, 1))
         assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
Esempio n. 28
0
def test_features(idtype):
    """Test the features of batched DGLHeteroGraphs"""
    g1 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([0, 1], [0, 0])
        },
        idtype=idtype,
        device=F.ctx())
    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g1.nodes['game'].data['h1'] = F.tensor([[0.]])
    g1.nodes['game'].data['h2'] = F.tensor([[1.]])
    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    g2 = dgl.heterograph(
        {
            ('user', 'follows', 'user'): ([0, 1], [1, 2]),
            ('user', 'plays', 'game'): ([0, 1], [0, 0])
        },
        idtype=idtype,
        device=F.ctx())
    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])

    # test default setting
    bg = dgl.batch([g1, g2])
    assert F.allclose(
        bg.nodes['user'].data['h1'],
        F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h1'],
        F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h2'],
        F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h2'],
        F.cat([g1.edges['follows'].data['h2'], g2.edges['follows'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['plays'].data['h1'],
        F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']],
              dim=0))

    # test specifying ndata/edata
    bg = dgl.batch([g1, g2], ndata=['h2'], edata=['h1'])
    assert F.allclose(
        bg.nodes['user'].data['h2'],
        F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.nodes['game'].data['h2'],
        F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']],
              dim=0))
    assert F.allclose(
        bg.edges['follows'].data['h1'],
        F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']],
              dim=0))
    assert F.allclose(
        bg.edges['plays'].data['h1'],
        F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']],
              dim=0))
    assert 'h1' not in bg.nodes['user'].data
    assert 'h1' not in bg.nodes['game'].data
    assert 'h2' not in bg.edges['follows'].data

    # Test unbatching graphs
    g3, g4 = dgl.unbatch(bg)
    check_equivalence_between_heterographs(g1,
                                           g3,
                                           node_attrs={
                                               'user': ['h2'],
                                               'game': ['h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })
    check_equivalence_between_heterographs(g2,
                                           g4,
                                           node_attrs={
                                               'user': ['h2'],
                                               'game': ['h2']
                                           },
                                           edge_attrs={
                                               ('user', 'follows', 'user'):
                                               ['h1']
                                           })

    # test legacy
    bg = dgl.batch([g1, g2], edge_attrs=['h1'])
    assert 'h2' not in bg.edges['follows'].data.keys()
Esempio n. 29
0
def test_batch_setter_getter(index_dtype):
    def _pfc(x):
        return list(F.zerocopy_to_numpy(x)[:,0])
    g = generate_graph(index_dtype)
    # set all nodes
    g.ndata['h'] = F.zeros((10, D))
    assert F.allclose(g.ndata['h'], F.zeros((10, D)))
    # pop nodes
    old_len = len(g.ndata)
    assert _pfc(g.ndata.pop('h')) == [0.] * 10
    assert len(g.ndata) == old_len - 1
    g.ndata['h'] = F.zeros((10, D))
    # set partial nodes
    u = F.tensor([1, 3, 5], F.data_type_dict[index_dtype])
    g.nodes[u].data['h'] = F.ones((3, D))
    assert _pfc(g.ndata['h']) == [0., 1., 0., 1., 0., 1., 0., 0., 0., 0.]
    # get partial nodes
    u = F.tensor([1, 2, 3], F.data_type_dict[index_dtype])
    assert _pfc(g.nodes[u].data['h']) == [1., 0., 1.]

    '''
    s, d, eid
    0, 1, 0
    1, 9, 1
    0, 2, 2
    2, 9, 3
    0, 3, 4
    3, 9, 5
    0, 4, 6
    4, 9, 7
    0, 5, 8
    5, 9, 9
    0, 6, 10
    6, 9, 11
    0, 7, 12
    7, 9, 13
    0, 8, 14
    8, 9, 15
    9, 0, 16
    '''
    # set all edges
    g.edata['l'] = F.zeros((17, D))
    assert _pfc(g.edata['l']) == [0.] * 17
    # pop edges
    old_len = len(g.edata)
    assert _pfc(g.edata.pop('l')) == [0.] * 17
    assert len(g.edata) == old_len - 1
    g.edata['l'] = F.zeros((17, D))
    # set partial edges (many-many)
    u = F.tensor([0, 0, 2, 5, 9], dtype=F.data_type_dict[index_dtype])
    v = F.tensor([1, 3, 9, 9, 0], dtype=F.data_type_dict[index_dtype])
    g.edges[u, v].data['l'] = F.ones((5, D))
    truth = [0.] * 17
    truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1.
    assert _pfc(g.edata['l']) == truth
    # set partial edges (many-one)
    u = F.tensor([3, 4, 6], dtype=F.data_type_dict[index_dtype])
    v = F.tensor([9], dtype=F.data_type_dict[index_dtype])
    g.edges[u, v].data['l'] = F.ones((3, D))
    truth[5] = truth[7] = truth[11] = 1.
    assert _pfc(g.edata['l']) == truth
    # set partial edges (one-many)
    u = F.tensor([0], dtype=F.data_type_dict[index_dtype])
    v = F.tensor([4, 5, 6], dtype=F.data_type_dict[index_dtype])
    g.edges[u, v].data['l'] = F.ones((3, D))
    truth[6] = truth[8] = truth[10] = 1.
    assert _pfc(g.edata['l']) == truth
    # get partial edges (many-many)
    u = F.tensor([0, 6, 0], dtype=F.data_type_dict[index_dtype])
    v = F.tensor([6, 9, 7], dtype=F.data_type_dict[index_dtype])
    assert _pfc(g.edges[u, v].data['l']) == [1., 1., 0.]
    # get partial edges (many-one)
    u = F.tensor([5, 6, 7], dtype=F.data_type_dict[index_dtype])
    v = F.tensor([9], dtype=F.data_type_dict[index_dtype])
    assert _pfc(g.edges[u, v].data['l']) == [1., 1., 0.]
    # get partial edges (one-many)
    u = F.tensor([0], dtype=F.data_type_dict[index_dtype])
    v = F.tensor([3, 4, 5], dtype=F.data_type_dict[index_dtype])
    assert _pfc(g.edges[u, v].data['l']) == [1., 1., 1.]
Esempio n. 30
0
        ip_addr = sock.getsockname()[0]
    except ValueError:
        ip_addr = '127.0.0.1'
    finally:
        sock.close()
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.bind(("", 0))
    sock.listen(1)
    port = sock.getsockname()[1]
    sock.close()

    return ip_addr + ' ' + str(port)


# Create an one-part Graph
node_map = F.tensor([0, 0, 0, 0, 0, 0], F.int64)
edge_map = F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)
global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64)
global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)

g = dgl.DGLGraph()
g.add_nodes(6)
g.add_edges(0, 1)  # 0
g.add_edges(0, 2)  # 1
g.add_edges(0, 3)  # 2
g.add_edges(2, 3)  # 3
g.add_edges(1, 1)  # 4
g.add_edges(0, 4)  # 5
g.add_edges(2, 5)  # 6

g.ndata[dgl.NID] = global_nid