Beispiel #1
0
def random_walk(g, seeds, num_traces, num_hops):
    """Batch-generate random walk traces on given graph with the same length.

    Parameters
    ----------
    g : DGLGraph
        The graph.
    seeds : Tensor
        The node ID tensor from which the random walk traces starts.
    num_traces : int
        Number of traces to generate for each seed.
    num_hops : int
        Number of hops for each trace.

    Returns
    -------
    traces : Tensor
        A 3-dimensional node ID tensor with shape

            (num_seeds, num_traces, num_hops + 1)

        traces[i, j, 0] are always starting nodes (i.e. seed[i]).
    """
    if len(seeds) == 0:
        return utils.toindex([]).tousertensor()
    seeds = utils.toindex(seeds).todgltensor()
    traces = _CAPI_DGLRandomWalk(g._graph._handle, seeds, int(num_traces),
                                 int(num_hops))
    return F.zerocopy_from_dlpack(traces.to_dlpack())
Beispiel #2
0
def test_load_csr():
    n = 100
    csr = (sp.sparse.random(n, n, density=0.1, format='csr') != 0).astype(np.int64)

    # Load CSR normally.
    idx = dgl.graph_index.from_csr(
            utils.toindex(csr.indptr), utils.toindex(csr.indices), False, 'out')
    assert idx.number_of_nodes() == n
    assert idx.number_of_edges() == csr.nnz
    src, dst, eid = idx.edges()
    src, dst, eid = src.tousertensor(), dst.tousertensor(), eid.tousertensor()
    coo = csr.tocoo()
    assert np.all(F.asnumpy(src) == coo.row)
    assert np.all(F.asnumpy(dst) == coo.col)

    # Load CSR to shared memory.
    # Shared memory isn't supported in Windows.
    if os.name is not 'nt':
        idx = dgl.graph_index.from_csr(
                utils.toindex(csr.indptr), utils.toindex(csr.indices),
                False, 'out', '/test_graph_struct')
        assert idx.number_of_nodes() == n
        assert idx.number_of_edges() == csr.nnz
        src, dst, eid = idx.edges()
        src, dst, eid = src.tousertensor(), dst.tousertensor(), eid.tousertensor()
        coo = csr.tocoo()
        assert np.all(F.asnumpy(src) == coo.row)
        assert np.all(F.asnumpy(dst) == coo.col)
Beispiel #3
0
def test_slicing():
    data = Frame(create_test_data(grad=True))
    f1 = FrameRef(data, index=toindex(slice(1, 5)))
    f2 = FrameRef(data, index=toindex(slice(3, 8)))
    # test read
    for k, v in f1.items():
        assert F.allclose(F.narrow_row(data[k].data, 1, 5), v)
    f2_a1 = f2['a1']  # is a tensor
    # test write
    f1[Index(F.tensor([0, 1]))] = {
        'a1': F.zeros([2, D]),
        'a2': F.zeros([2, D]),
        'a3': F.zeros([2, D]),
    }
    assert F.allclose(f2['a1'], f2_a1)

    f1[Index(F.tensor([2, 3]))] = {
        'a1': F.ones([2, D]),
        'a2': F.ones([2, D]),
        'a3': F.ones([2, D]),
    }
    F.narrow_row_set(f2_a1, 0, 2, 1)
    assert F.allclose(f2['a1'], f2_a1)

    f1[toindex(slice(2, 4))] = {
        'a1': F.zeros([2, D]),
        'a2': F.zeros([2, D]),
        'a3': F.zeros([2, D]),
    }
    F.narrow_row_set(f2_a1, 0, 2, 0)
    assert F.allclose(f2['a1'], f2_a1)
Beispiel #4
0
def test_sharing():
    data = Frame(create_test_data())
    f1 = FrameRef(data, index=toindex([0, 1, 2, 3]))
    f2 = FrameRef(data, index=toindex([2, 3, 4, 5, 6]))
    # test read
    for k, v in f1.items():
        assert U.allclose(data[k].data[0:4], v)
    for k, v in f2.items():
        assert U.allclose(data[k].data[2:7], v)
    f2_a1 = f2['a1'].data
    # test write
    # update own ref should not been seen by the other.
    f1[Index(th.tensor([0, 1]))] = {
        'a1': th.zeros([2, D]),
        'a2': th.zeros([2, D]),
        'a3': th.zeros([2, D]),
    }
    assert U.allclose(f2['a1'], f2_a1)
    # update shared space should been seen by the other.
    f1[Index(th.tensor([2, 3]))] = {
        'a1': th.ones([2, D]),
        'a2': th.ones([2, D]),
        'a3': th.ones([2, D]),
    }
    f2_a1[0:2] = th.ones([2, D])
    assert U.allclose(f2['a1'], f2_a1)
Beispiel #5
0
def test_sharing():
    data = Frame(create_test_data())
    f1 = FrameRef(data, index=toindex([0, 1, 2, 3]))
    f2 = FrameRef(data, index=toindex([2, 3, 4, 5, 6]))
    # test read
    for k, v in f1.items():
        assert F.allclose(F.narrow_row(data[k].data, 0, 4), v)
    for k, v in f2.items():
        assert F.allclose(F.narrow_row(data[k].data, 2, 7), v)
    f2_a1 = f2['a1']
    # test write
    # update own ref should not been seen by the other.
    f1[Index(F.tensor([0, 1]))] = {
        'a1': F.zeros([2, D]),
        'a2': F.zeros([2, D]),
        'a3': F.zeros([2, D]),
    }
    assert F.allclose(f2['a1'], f2_a1)
    # update shared space should been seen by the other.
    f1[Index(F.tensor([2, 3]))] = {
        'a1': F.ones([2, D]),
        'a2': F.ones([2, D]),
        'a3': F.ones([2, D]),
    }
    F.narrow_row_set(f2_a1, 0, 2, F.ones([2, D]))
    assert F.allclose(f2['a1'], f2_a1)
Beispiel #6
0
def test_slicing():
    data = Frame(create_test_data(grad=True))
    f1 = FrameRef(data, index=toindex(slice(1, 5)))
    f2 = FrameRef(data, index=toindex(slice(3, 8)))
    # test read
    for k, v in f1.items():
        assert U.allclose(data[k].data[1:5], v)
    f2_a1 = f2['a1'].data
    # test write
    f1[Index(th.tensor([0, 1]))] = {
        'a1': th.zeros([2, D]),
        'a2': th.zeros([2, D]),
        'a3': th.zeros([2, D]),
    }
    assert U.allclose(f2['a1'], f2_a1)

    f1[Index(th.tensor([2, 3]))] = {
        'a1': th.ones([2, D]),
        'a2': th.ones([2, D]),
        'a3': th.ones([2, D]),
    }
    f2_a1[toindex(slice(0, 2))] = 1
    assert U.allclose(f2['a1'], f2_a1)

    f1[toindex(slice(2, 4))] = {
        'a1': th.zeros([2, D]),
        'a2': th.zeros([2, D]),
        'a3': th.zeros([2, D]),
    }
    f2_a1[toindex(slice(0, 2))] = 0
    assert U.allclose(f2['a1'], f2_a1)
Beispiel #7
0
def test_inplace():
    f = FrameRef(Frame(create_test_data()))
    print(f.schemes)
    a1addr = f['a1'].data.data_ptr()
    a2addr = f['a2'].data.data_ptr()
    a3addr = f['a3'].data.data_ptr()

    # column updates are always out-of-place
    f['a1'] = th.ones((N, D))
    newa1addr = f['a1'].data.data_ptr()
    assert a1addr != newa1addr
    a1addr = newa1addr
    # full row update that becomes column update
    f[toindex(slice(0, N))] = {'a1': th.ones((N, D))}
    assert f['a1'].data.data_ptr() != a1addr

    # row update (outplace) w/ slice
    f[toindex(slice(1, 4))] = {'a2': th.ones((3, D))}
    newa2addr = f['a2'].data.data_ptr()
    assert a2addr != newa2addr
    a2addr = newa2addr
    # row update (outplace) w/ list
    f[toindex([1, 3, 5])] = {'a2': th.ones((3, D))}
    newa2addr = f['a2'].data.data_ptr()
    assert a2addr != newa2addr
    a2addr = newa2addr

    # row update (inplace) w/ slice
    f.update_data(toindex(slice(1, 4)), {'a2': th.ones((3, D))}, True)
    newa2addr = f['a2'].data.data_ptr()
    assert a2addr == newa2addr
    # row update (inplace) w/ list
    f.update_data(toindex([1, 3, 5]), {'a2': th.ones((3, D))}, True)
    newa2addr = f['a2'].data.data_ptr()
    assert a2addr == newa2addr
Beispiel #8
0
def check_basics(g, ig):
    assert g.number_of_nodes() == ig.number_of_nodes()
    assert g.number_of_edges() == ig.number_of_edges()

    edges = g.edges("srcdst")
    iedges = ig.edges("srcdst")
    assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor())
    assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor())
    assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor())

    edges = g.edges("eid")
    iedges = ig.edges("eid")
    assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor())
    assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor())
    assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor())

    for i in range(g.number_of_nodes()):
        assert g.has_node(i) == ig.has_node(i)

    for i in range(g.number_of_nodes()):
        assert F.array_equal(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor())
        assert F.array_equal(g.successors(i).tousertensor(), ig.successors(i).tousertensor())

    randv = np.random.randint(0, g.number_of_nodes(), 10)
    randv = utils.toindex(randv)
    in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv))
    in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv))
    nnz = in_src2.shape[0]
    assert F.array_equal(in_src1, in_src2)
    assert F.array_equal(in_dst1, in_dst2)
    assert F.array_equal(in_eids1, in_eids2)

    out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv))
    out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv))
    nnz = out_dst2.shape[0]
    assert F.array_equal(out_dst1, out_dst2)
    assert F.array_equal(out_src1, out_src2)
    assert F.array_equal(out_eids1, out_eids2)

    num_v = len(randv)
    assert F.array_equal(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor())
    assert F.array_equal(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor())
    randv = randv.tousertensor()
    for v in F.asnumpy(randv):
        assert g.in_degree(v) == ig.in_degree(v)
        assert g.out_degree(v) == ig.out_degree(v)

    for u in F.asnumpy(randv):
        for v in F.asnumpy(randv):
            if len(g.edge_id(u, v)) == 1:
                assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy()
            assert g.has_edge_between(u, v) == ig.has_edge_between(u, v)
    randv = utils.toindex(randv)
    ids = g.edge_ids(randv, randv)[2].tonumpy()
    assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids, 0) == len(ids)
    assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy(), 0) == len(randv)
Beispiel #9
0
def test_edge_id():
    gi = create_graph_index(multigraph=False)
    assert not gi.is_multigraph()

    gi = create_graph_index(multigraph=True)

    gi.add_nodes(4)
    gi.add_edge(0, 1)
    eid = gi.edge_id(0, 1).tonumpy()
    assert len(eid) == 1
    assert eid[0] == 0
    assert gi.is_multigraph()

    # multiedges
    gi.add_edge(0, 1)
    eid = gi.edge_id(0, 1).tonumpy()
    assert len(eid) == 2
    assert eid[0] == 0
    assert eid[1] == 1

    gi.add_edges(toindex([0, 1, 1, 2]), toindex([2, 2, 2, 3]))
    src, dst, eid = gi.edge_ids(toindex([0, 0, 2, 1]), toindex([2, 1, 3, 2]))
    eid_answer = [2, 0, 1, 5, 3, 4]
    assert len(eid) == 6
    assert all(e == ea for e, ea in zip(eid, eid_answer))

    # find edges
    src, dst, eid = gi.find_edges(toindex([1, 3, 5]))
    assert len(src) == len(dst) == len(eid) == 3
    assert src[0] == 0 and src[1] == 1 and src[2] == 2
    assert dst[0] == 1 and dst[1] == 2 and dst[2] == 3
    assert eid[0] == 1 and eid[1] == 3 and eid[2] == 5

    # source broadcasting
    src, dst, eid = gi.edge_ids(toindex([0]), toindex([1, 2]))
    eid_answer = [0, 1, 2]
    assert len(eid) == 3
    assert all(e == ea for e, ea in zip(eid, eid_answer))

    # destination broadcasting
    src, dst, eid = gi.edge_ids(toindex([1, 0]), toindex([2]))
    eid_answer = [3, 4, 2]
    assert len(eid) == 3
    assert all(e == ea for e, ea in zip(eid, eid_answer))

    gi.clear()
    # the following assumes that grabbing nonexistent edge will throw an error
    try:
        gi.edge_id(0, 1)
        fail = True
    except DGLError:
        fail = False
    finally:
        assert not fail

    gi.add_nodes(4)
    gi.add_edge(0, 1)
    eid = gi.edge_id(0, 1).tonumpy()
    assert len(eid) == 1
    assert eid[0] == 0
Beispiel #10
0
def test_pickling_index():
    # normal index
    i = toindex([1, 2, 3])
    i.tousertensor()
    i.todgltensor()  # construct a dgl tensor which is unpicklable
    i2 = _reconstruct_pickle(i)
    _assert_is_identical_index(i, i2)

    # slice index
    i = toindex(slice(5, 10))
    i2 = _reconstruct_pickle(i)
    _assert_is_identical_index(i, i2)
Beispiel #11
0
def test_pickling_graph_index():
    gi = create_graph_index()
    gi.add_nodes(3)
    src_idx = toindex([0, 0])
    dst_idx = toindex([1, 2])
    gi.add_edges(src_idx, dst_idx)

    gi2 = _reconstruct_pickle(gi)

    assert gi2.number_of_nodes() == gi.number_of_nodes()
    src_idx2, dst_idx2, _ = gi2.edges()
    assert F.array_equal(src_idx.tousertensor(), src_idx2.tousertensor())
    assert F.array_equal(dst_idx.tousertensor(), dst_idx2.tousertensor())
Beispiel #12
0
def test_load_csr():
    n = 100
    csr = (sp.sparse.random(n, n, density=0.1, format='csr') != 0).astype(np.int64)

    # Load CSR normally.
    idx = dgl.graph_index.from_csr(
            utils.toindex(csr.indptr), utils.toindex(csr.indices), 'out')
    assert idx.number_of_nodes() == n
    assert idx.number_of_edges() == csr.nnz
    src, dst, eid = idx.edges()
    src, dst, eid = src.tousertensor(), dst.tousertensor(), eid.tousertensor()
    coo = csr.tocoo()
    assert np.all(F.asnumpy(src) == coo.row)
    assert np.all(F.asnumpy(dst) == coo.col)
Beispiel #13
0
def create_mini_batch(g, num_hops, add_self_loop=False):
    seed_ids = np.array([0, 1, 2, 3])
    seed_ids = utils.toindex(seed_ids)
    sgi = g._graph.neighbor_sampling([seed_ids], g.number_of_nodes(), num_hops,
                                     "in", None, add_self_loop)
    assert len(sgi) == 1
    return dgl.node_flow.NodeFlow(g, sgi[0])
Beispiel #14
0
def l0_sample(g, positive_max=128, negative_ratio=3):
    '''sampling positive and negative edges'''
    if g is None:
        return None
    n_eids = g.number_of_edges()
    pos_eids = np.where(g.edata['rel_class'].asnumpy() > 0)[0]
    neg_eids = np.where(g.edata['rel_class'].asnumpy() == 0)[0]
    if len(pos_eids) == 0:
        return None

    positive_num = min(len(pos_eids), positive_max)
    negative_num = min(len(neg_eids), positive_num * negative_ratio)
    pos_sample = np.random.choice(pos_eids, positive_num, replace=False)
    neg_sample = np.random.choice(neg_eids, negative_num, replace=False)
    weights = np.zeros(n_eids)
    # np.add.at(weights, pos_sample, 1)
    weights[pos_sample] = 1
    weights[neg_sample] = 1
    # g.edata['sample_weights'] = mx.nd.array(weights, ctx=g.edata['rel_class'].context)
    # return g
    eids = np.where(weights > 0)[0]
    sub_g = g.edge_subgraph(toindex(eids.tolist()))
    sub_g.copy_from_parent()
    sub_g.edata['sample_weights'] = mx.nd.array(
        weights[eids], ctx=g.edata['rel_class'].context)
    return sub_g
Beispiel #15
0
def test_block_edges():
    num_layers = 3
    g = generate_rand_graph(100)
    nf = create_mini_batch(g, num_layers)
    assert nf.num_layers == num_layers + 1
    for i in range(nf.num_blocks):
        dest_nodes = utils.toindex(nf.layer_nid(i + 1))
        src1, dst1, eid1 = nf.in_edges(dest_nodes, 'all')

        src, dst, eid = nf.block_edges(i)
        assert_array_equal(F.asnumpy(src), F.asnumpy(src1))
        assert_array_equal(F.asnumpy(dst), F.asnumpy(dst1))
        assert_array_equal(F.asnumpy(eid), F.asnumpy(eid1))

        src, dst, eid = nf.block_edges(i, remap_local=True)
        # should also work for negative block ids
        src_by_neg, dst_by_neg, eid_by_neg = nf.block_edges(-nf.num_blocks + i,
                                                            remap_local=True)
        assert_array_equal(F.asnumpy(src), F.asnumpy(src_by_neg))
        assert_array_equal(F.asnumpy(dst), F.asnumpy(dst_by_neg))
        assert_array_equal(F.asnumpy(eid), F.asnumpy(eid_by_neg))

        src1 = nf._glb2lcl_nid(src1, i)
        dst1 = nf._glb2lcl_nid(dst1, i + 1)
        assert_array_equal(F.asnumpy(src), F.asnumpy(src1))
        assert_array_equal(F.asnumpy(dst), F.asnumpy(dst1))
Beispiel #16
0
def test_node_batch():
    g = dgl.DGLGraph(nx.path_graph(20))
    feat = F.randn((g.number_of_nodes(), 10))
    g.ndata['x'] = feat

    # test all
    v = ALL
    n_repr = g.get_n_repr(v)
    nbatch = NodeBatch(g, v, n_repr)
    assert F.allclose(nbatch.data['x'], feat)
    assert nbatch.mailbox is None
    assert F.allclose(nbatch.nodes(), g.nodes())
    assert nbatch.batch_size() == g.number_of_nodes()
    assert len(nbatch) == g.number_of_nodes()

    # test partial
    v = utils.toindex(F.tensor([0, 3, 5, 7, 9]))
    n_repr = g.get_n_repr(v)
    nbatch = NodeBatch(g, v, n_repr)
    assert F.allclose(nbatch.data['x'],
                      F.gather_row(feat, F.tensor([0, 3, 5, 7, 9])))
    assert nbatch.mailbox is None
    assert F.allclose(nbatch.nodes(), F.tensor([0, 3, 5, 7, 9]))
    assert nbatch.batch_size() == 5
    assert len(nbatch) == 5
Beispiel #17
0
def check_basics(g, ig):
    assert g.number_of_nodes() == ig.number_of_nodes()
    assert g.number_of_edges() == ig.number_of_edges()

    edges = g.edges()
    iedges = ig.edges()

    for i in range(g.number_of_nodes()):
        assert g.has_node(i) == ig.has_node(i)

    for i in range(g.number_of_nodes()):
        assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy()
        assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy()

    randv = np.random.randint(0, g.number_of_nodes(), 10)
    randv = utils.toindex(randv)
    in_src1, in_dst1, in_eids1 = g.in_edges(randv)
    in_src2, in_dst2, in_eids2 = ig.in_edges(randv)
    nnz = in_src2.tousertensor().shape[0]
    assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz
    assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz
    assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz

    out_src1, out_dst1, out_eids1 = g.out_edges(randv)
    out_src2, out_dst2, out_eids2 = ig.out_edges(randv)
    nnz = out_dst2.tousertensor().shape[0]
    assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz
    assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz
    assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz

    num_v = len(randv)
    assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v
    assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v
    randv = randv.tousertensor()
    for v in randv.asnumpy():
        assert g.in_degree(v) == ig.in_degree(v)
        assert g.out_degree(v) == ig.out_degree(v)

    for u in randv.asnumpy():
        for v in randv.asnumpy():
            if len(g.edge_id(u, v)) == 1:
                assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy()
            assert g.has_edge_between(u, v) == ig.has_edge_between(u, v)
    randv = utils.toindex(randv)
    ids = g.edge_ids(randv, randv)[2].tonumpy()
    assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids) == len(ids)
    assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy()) == len(randv)
Beispiel #18
0
def test_edge_batch():
    d = 10
    g = dgl.DGLGraph(nx.path_graph(20))
    nfeat = F.randn((g.number_of_nodes(), d))
    efeat = F.randn((g.number_of_edges(), d))
    g.ndata['x'] = nfeat
    g.edata['x'] = efeat

    # test all
    eid = utils.toindex(slice(0, g.number_of_edges()))
    u, v, _ = g._graph.edges('eid')

    src_data = g.get_n_repr(u)
    edge_data = g.get_e_repr(eid)
    dst_data = g.get_n_repr(v)
    ebatch = EdgeBatch((u, v, eid), src_data, edge_data, dst_data)
    assert F.shape(ebatch.src['x'])[0] == g.number_of_edges() and\
        F.shape(ebatch.src['x'])[1] == d
    assert F.shape(ebatch.dst['x'])[0] == g.number_of_edges() and\
        F.shape(ebatch.dst['x'])[1] == d
    assert F.shape(ebatch.data['x'])[0] == g.number_of_edges() and\
        F.shape(ebatch.data['x'])[1] == d
    assert F.allclose(ebatch.edges()[0], u.tousertensor())
    assert F.allclose(ebatch.edges()[1], v.tousertensor())
    assert F.allclose(ebatch.edges()[2], F.arange(0, g.number_of_edges()))
    assert ebatch.batch_size() == g.number_of_edges()
    assert len(ebatch) == g.number_of_edges()

    # test partial
    eid = utils.toindex(F.tensor([0, 3, 5, 7, 11, 13, 15, 27]))
    u, v, _ = g._graph.find_edges(eid)
    src_data = g.get_n_repr(u)
    edge_data = g.get_e_repr(eid)
    dst_data = g.get_n_repr(v)
    ebatch = EdgeBatch((u, v, eid), src_data, edge_data, dst_data)
    assert F.shape(ebatch.src['x'])[0] == 8 and\
        F.shape(ebatch.src['x'])[1] == d
    assert F.shape(ebatch.dst['x'])[0] == 8 and\
        F.shape(ebatch.dst['x'])[1] == d
    assert F.shape(ebatch.data['x'])[0] == 8 and\
        F.shape(ebatch.data['x'])[1] == d
    assert F.allclose(ebatch.edges()[0], u.tousertensor())
    assert F.allclose(ebatch.edges()[1], v.tousertensor())
    assert F.allclose(ebatch.edges()[2], eid.tousertensor())
    assert ebatch.batch_size() == 8
    assert len(ebatch) == 8
Beispiel #19
0
def test_pickling_index():
    i = toindex([1, 2, 3])
    i.tousertensor()
    i.todgltensor()  # construct a dgl tensor which is unpicklable

    i2 = _reconstruct_pickle(i)

    assert F.array_equal(i2.tousertensor(), i.tousertensor())
Beispiel #20
0
def bipartite_single_sided_random_walk_with_restart(
        g,
        seeds,
        restart_prob,
        max_nodes_per_seed,
        max_visit_counts=0,
        max_frequent_visited_nodes=0):
    """Batch-generate random walk traces on given graph with restart probability.

    The graph must be a bipartite graph.

    A single random walk step involves two normal steps, so that the "visited"
    nodes always stay on the same side. [1]

    Parameters
    ----------
    g : DGLGraph
        The graph.
    seeds : Tensor
        The node ID tensor from which the random walk traces starts.
    restart_prob : float
        Probability to stop a random walk after each step.
    max_nodes_per_seed : int
        Stop generating traces for a seed if the total number of nodes
        visited exceeds this number. [1]
    max_visit_counts : int, optional
    max_frequent_visited_nodes : int, optional
        Alternatively, stop generating traces for a seed if no less than
        ``max_frequent_visited_nodes`` are visited no less than
        ``max_visit_counts`` times.  [1]

    Returns
    -------
    traces : list[list[Tensor]]
        traces[i][j] is the j-th trace generated for i-th seed.

    Notes
    -----
    The current implementation does not ensure that the graph is a bipartite
    graph.

    The traces does **not** include the seed nodes themselves.

    Reference
    ---------
    [1] Eksombatchai et al., 2017 https://arxiv.org/abs/1711.07601
    """
    if len(seeds) == 0:
        return []
    seeds = utils.toindex(seeds).todgltensor()
    traces = _CAPI_DGLBipartiteSingleSidedRandomWalkWithRestart(
        g._graph._handle, seeds, restart_prob, int(max_nodes_per_seed),
        int(max_visit_counts), int(max_frequent_visited_nodes))
    return _split_traces(traces)
Beispiel #21
0
def test_row3():
    # test row delete
    data = Frame(create_test_data())
    f = FrameRef(data)
    assert f.is_contiguous()
    assert f.is_span_whole_column()
    assert f.num_rows == N
    del f[toindex(th.tensor([2, 3]))]
    assert not f.is_contiguous()
    assert not f.is_span_whole_column()
    # delete is lazy: only reflect on the ref while the
    # underlying storage should not be touched
    assert f.num_rows == N - 2
    assert data.num_rows == N
    newidx = list(range(N))
    newidx.pop(2)
    newidx.pop(2)
    newidx = toindex(newidx)
    for k, v in f.items():
        assert U.allclose(v, data[k][newidx])
Beispiel #22
0
def test_index():
    ans = np.ones((10, ), dtype=np.int64) * 10
    # from np data
    data = np.ones((10, ), dtype=np.int64) * 10
    idx = toindex(data)
    y1 = idx.tonumpy()
    y2 = F.asnumpy(idx.tousertensor())
    y3 = idx.todgltensor().asnumpy()
    assert np.allclose(ans, y1)
    assert np.allclose(ans, y2)
    assert np.allclose(ans, y3)

    # from list
    data = [10] * 10
    idx = toindex(data)
    y1 = idx.tonumpy()
    y2 = F.asnumpy(idx.tousertensor())
    y3 = idx.todgltensor().asnumpy()
    assert np.allclose(ans, y1)
    assert np.allclose(ans, y2)
    assert np.allclose(ans, y3)

    # from dl tensor
    data = F.ones((10, ), dtype=F.int64) * 10
    idx = toindex(data)
    y1 = idx.tonumpy()
    y2 = F.asnumpy(idx.tousertensor())
    y3 = idx.todgltensor().asnumpy()
    assert np.allclose(ans, y1)
    assert np.allclose(ans, y2)
    assert np.allclose(ans, y3)

    # from dgl.NDArray
    data = dgl.ndarray.array(np.ones((10, ), dtype=np.int64) * 10)
    idx = toindex(data)
    y1 = idx.tonumpy()
    y2 = F.asnumpy(idx.tousertensor())
    y3 = idx.todgltensor().asnumpy()
    assert np.allclose(ans, y1)
    assert np.allclose(ans, y2)
    assert np.allclose(ans, y3)
Beispiel #23
0
def test_block_adj_matrix():
    num_layers = 3
    g = generate_rand_graph(100)
    nf = create_mini_batch(g, num_layers)
    assert nf.num_layers == num_layers + 1
    for i in range(nf.num_blocks):
        u, v, _ = nf.block_edges(i, remap_local=True)
        adj, _ = nf.block_adjacency_matrix(i, F.cpu())
        adj = F.sparse_to_numpy(adj)

        # should also work for negative block ids
        adj_by_neg, _ = nf.block_adjacency_matrix(-nf.num_blocks + i, F.cpu())
        adj_by_neg = F.sparse_to_numpy(adj_by_neg)

        data = np.ones((len(u)), dtype=np.float32)
        v = utils.toindex(v)
        u = utils.toindex(u)
        coo = sp.sparse.coo_matrix((data, (v.tonumpy(), u.tonumpy())),
                                   shape=adj.shape).todense()
        assert_array_equal(adj, coo)
        assert_array_equal(adj_by_neg, coo)
Beispiel #24
0
def test_node_subgraph_with_halo():
    gi = create_large_graph_index(1000)
    nodes = np.random.choice(gi.number_of_nodes(), 100, replace=False)
    halo_subg, inner_node, inner_edge = gi.node_halo_subgraph(toindex(nodes), 2)

    # Check if edges in the subgraph are in the original graph.
    for s, d, e in zip(*halo_subg.graph.edges()):
        assert halo_subg.induced_edges[e] in gi.edge_id(
                halo_subg.induced_nodes[s], halo_subg.induced_nodes[d])

    # Check if the inner node labels are correct.
    inner_node = inner_node.asnumpy()
    inner_node_ids = np.nonzero(inner_node)[0]
    inner_node_ids = halo_subg.induced_nodes.tonumpy()[inner_node_ids]
    assert np.all(np.sort(inner_node_ids) == np.sort(nodes))

    # Check if the inner edge labels are correct.
    inner_edge = inner_edge.asnumpy()
    inner_edge_ids = halo_subg.induced_edges.tonumpy()[inner_edge > 0]
    subg = gi.node_subgraph(toindex(nodes))
    assert np.all(np.sort(subg.induced_edges.tonumpy()) == np.sort(inner_edge_ids))
Beispiel #25
0
def test_node_subgraph():
    num_vertices = 100
    g, ig = generate_rand_graph(num_vertices)

    # node_subgraph
    randv1 = np.random.randint(0, num_vertices, 20)
    randv = np.unique(randv1)
    subg = g.node_subgraph(utils.toindex(randv))
    subig = ig.node_subgraph(utils.toindex(randv))
    check_basics(subg, subig)
    check_graph_equal(subg, subig)
    assert F.sum(
        map_to_subgraph_nid(subg, utils.toindex(
            randv1[0:10])).tousertensor() == map_to_subgraph_nid(
                subig, utils.toindex(randv1[0:10])).tousertensor(), 0) == 10

    # node_subgraphs
    randvs = []
    subgs = []
    for i in range(4):
        randv = np.unique(np.random.randint(0, num_vertices, 20))
        randvs.append(utils.toindex(randv))
        subgs.append(g.node_subgraph(utils.toindex(randv)))
    subigs = ig.node_subgraphs(randvs)
    for i in range(4):
        check_basics(subg, subig)
        check_graph_equal(subgs[i], subigs[i])
Beispiel #26
0
def test_block_incidence_matrix():
    num_layers = 3
    g = generate_rand_graph(100)
    nf = create_mini_batch(g, num_layers)
    assert nf.num_layers == num_layers + 1
    for i in range(nf.num_blocks):
        typestrs = ["in", "out"]  # todo need fix for "both"
        adjs = []
        for typestr in typestrs:
            adj, _ = nf.block_incidence_matrix(i, typestr, F.cpu())
            adj = F.sparse_to_numpy(adj)
            adjs.append(adj)

        # should work for negative block ids
        adjs_by_neg = []
        for typestr in typestrs:
            adj_by_neg, _ = nf.block_incidence_matrix(
                -nf.num_blocks + i, typestr, F.cpu())
            adj_by_neg = F.sparse_to_numpy(adj_by_neg)
            adjs_by_neg.append(adj_by_neg)

        u, v, e = nf.block_edges(i, remap_local=True)
        u = utils.toindex(u)
        v = utils.toindex(v)
        e = utils.toindex(e)

        expected = []
        data_in_and_out = np.ones((len(u)), dtype=np.float32)
        expected.append(
            sp.sparse.coo_matrix((data_in_and_out, (v.tonumpy(), e.tonumpy())),
                                 shape=adjs[0].shape).todense()
        )
        expected.append(
            sp.sparse.coo_matrix((data_in_and_out, (u.tonumpy(), e.tonumpy())),
                                 shape=adjs[1].shape).todense()
        )
        for i in range(len(typestrs)):
            assert_array_equal(adjs[i], expected[i])
            assert_array_equal(adjs_by_neg[i], expected[i])
Beispiel #27
0
def test_block_adj_matrix():
    num_layers = 3
    g = generate_rand_graph(100)
    nf = create_mini_batch(g, num_layers)
    assert nf.num_layers == num_layers + 1
    for i in range(nf.num_blocks):
        src, dst, eid = nf.block_edges(i)
        dest_nodes = utils.toindex(nf.layer_nid(i + 1))
        u, v, _ = nf._graph.in_edges(dest_nodes)
        u = nf._glb2lcl_nid(u.tousertensor(), i)
        v = nf._glb2lcl_nid(v.tousertensor(), i + 1)
        assert F.array_equal(src, u)
        assert F.array_equal(dst, v)

        adj, _ = nf.block_adjacency_matrix(i, F.cpu())
        adj = F.sparse_to_numpy(adj)
        data = np.ones((len(u)), dtype=np.float32)
        v = utils.toindex(v)
        u = utils.toindex(u)
        coo = sp.sparse.coo_matrix((data, (v.tonumpy(), u.tonumpy())),
                                   shape=adj.shape).todense()
        assert np.array_equal(adj, coo)
Beispiel #28
0
def test_add_rows():
    data = Frame()
    f1 = FrameRef(data)
    f1.add_rows(4)
    x = th.randn(1, 4)
    f1[Index(th.tensor([0]))] = {'x': x}
    ans = th.cat([x, th.zeros(3, 4)])
    assert U.allclose(f1['x'], ans)
    f1.add_rows(4)
    f1[toindex(slice(4, 8))] = {'x': th.ones(4, 4), 'y': th.ones(4, 5)}
    ans = th.cat([ans, th.ones(4, 4)])
    assert U.allclose(f1['x'], ans)
    ans = th.cat([th.zeros(4, 5), th.ones(4, 5)])
    assert U.allclose(f1['y'], ans)
Beispiel #29
0
def test_edge_subgraph():
    gi = create_graph_index()
    gi.add_nodes(4)
    gi.add_edge(0, 1)
    gi.add_edge(0, 1)
    gi.add_edge(0, 2)
    gi.add_edge(2, 3)

    sub2par_edgemap = [3, 2]
    sgi = gi.edge_subgraph(toindex(sub2par_edgemap))

    for s, d, e in zip(*sgi.edges()):
        assert sgi.induced_edges[e] in gi.edge_id(sgi.induced_nodes[s],
                                                  sgi.induced_nodes[d])
Beispiel #30
0
def test_node_subgraph():
    gi = create_graph_index(None, False)
    gi.add_nodes(4)
    gi.add_edge(0, 1)
    gi.add_edge(0, 2)
    gi.add_edge(0, 2)
    gi.add_edge(0, 3)

    sub2par_nodemap = [2, 0, 3]
    sgi = gi.node_subgraph(toindex(sub2par_nodemap))

    for s, d, e in zip(*sgi.graph.edges()):
        assert sgi.induced_edges[e] in gi.edge_id(sgi.induced_nodes[s],
                                                  sgi.induced_nodes[d])