def random_walk(g, seeds, num_traces, num_hops): """Batch-generate random walk traces on given graph with the same length. Parameters ---------- g : DGLGraph The graph. seeds : Tensor The node ID tensor from which the random walk traces starts. num_traces : int Number of traces to generate for each seed. num_hops : int Number of hops for each trace. Returns ------- traces : Tensor A 3-dimensional node ID tensor with shape (num_seeds, num_traces, num_hops + 1) traces[i, j, 0] are always starting nodes (i.e. seed[i]). """ if len(seeds) == 0: return utils.toindex([]).tousertensor() seeds = utils.toindex(seeds).todgltensor() traces = _CAPI_DGLRandomWalk(g._graph._handle, seeds, int(num_traces), int(num_hops)) return F.zerocopy_from_dlpack(traces.to_dlpack())
def test_load_csr(): n = 100 csr = (sp.sparse.random(n, n, density=0.1, format='csr') != 0).astype(np.int64) # Load CSR normally. idx = dgl.graph_index.from_csr( utils.toindex(csr.indptr), utils.toindex(csr.indices), False, 'out') assert idx.number_of_nodes() == n assert idx.number_of_edges() == csr.nnz src, dst, eid = idx.edges() src, dst, eid = src.tousertensor(), dst.tousertensor(), eid.tousertensor() coo = csr.tocoo() assert np.all(F.asnumpy(src) == coo.row) assert np.all(F.asnumpy(dst) == coo.col) # Load CSR to shared memory. # Shared memory isn't supported in Windows. if os.name is not 'nt': idx = dgl.graph_index.from_csr( utils.toindex(csr.indptr), utils.toindex(csr.indices), False, 'out', '/test_graph_struct') assert idx.number_of_nodes() == n assert idx.number_of_edges() == csr.nnz src, dst, eid = idx.edges() src, dst, eid = src.tousertensor(), dst.tousertensor(), eid.tousertensor() coo = csr.tocoo() assert np.all(F.asnumpy(src) == coo.row) assert np.all(F.asnumpy(dst) == coo.col)
def test_slicing(): data = Frame(create_test_data(grad=True)) f1 = FrameRef(data, index=toindex(slice(1, 5))) f2 = FrameRef(data, index=toindex(slice(3, 8))) # test read for k, v in f1.items(): assert F.allclose(F.narrow_row(data[k].data, 1, 5), v) f2_a1 = f2['a1'] # is a tensor # test write f1[Index(F.tensor([0, 1]))] = { 'a1': F.zeros([2, D]), 'a2': F.zeros([2, D]), 'a3': F.zeros([2, D]), } assert F.allclose(f2['a1'], f2_a1) f1[Index(F.tensor([2, 3]))] = { 'a1': F.ones([2, D]), 'a2': F.ones([2, D]), 'a3': F.ones([2, D]), } F.narrow_row_set(f2_a1, 0, 2, 1) assert F.allclose(f2['a1'], f2_a1) f1[toindex(slice(2, 4))] = { 'a1': F.zeros([2, D]), 'a2': F.zeros([2, D]), 'a3': F.zeros([2, D]), } F.narrow_row_set(f2_a1, 0, 2, 0) assert F.allclose(f2['a1'], f2_a1)
def test_sharing(): data = Frame(create_test_data()) f1 = FrameRef(data, index=toindex([0, 1, 2, 3])) f2 = FrameRef(data, index=toindex([2, 3, 4, 5, 6])) # test read for k, v in f1.items(): assert U.allclose(data[k].data[0:4], v) for k, v in f2.items(): assert U.allclose(data[k].data[2:7], v) f2_a1 = f2['a1'].data # test write # update own ref should not been seen by the other. f1[Index(th.tensor([0, 1]))] = { 'a1': th.zeros([2, D]), 'a2': th.zeros([2, D]), 'a3': th.zeros([2, D]), } assert U.allclose(f2['a1'], f2_a1) # update shared space should been seen by the other. f1[Index(th.tensor([2, 3]))] = { 'a1': th.ones([2, D]), 'a2': th.ones([2, D]), 'a3': th.ones([2, D]), } f2_a1[0:2] = th.ones([2, D]) assert U.allclose(f2['a1'], f2_a1)
def test_sharing(): data = Frame(create_test_data()) f1 = FrameRef(data, index=toindex([0, 1, 2, 3])) f2 = FrameRef(data, index=toindex([2, 3, 4, 5, 6])) # test read for k, v in f1.items(): assert F.allclose(F.narrow_row(data[k].data, 0, 4), v) for k, v in f2.items(): assert F.allclose(F.narrow_row(data[k].data, 2, 7), v) f2_a1 = f2['a1'] # test write # update own ref should not been seen by the other. f1[Index(F.tensor([0, 1]))] = { 'a1': F.zeros([2, D]), 'a2': F.zeros([2, D]), 'a3': F.zeros([2, D]), } assert F.allclose(f2['a1'], f2_a1) # update shared space should been seen by the other. f1[Index(F.tensor([2, 3]))] = { 'a1': F.ones([2, D]), 'a2': F.ones([2, D]), 'a3': F.ones([2, D]), } F.narrow_row_set(f2_a1, 0, 2, F.ones([2, D])) assert F.allclose(f2['a1'], f2_a1)
def test_slicing(): data = Frame(create_test_data(grad=True)) f1 = FrameRef(data, index=toindex(slice(1, 5))) f2 = FrameRef(data, index=toindex(slice(3, 8))) # test read for k, v in f1.items(): assert U.allclose(data[k].data[1:5], v) f2_a1 = f2['a1'].data # test write f1[Index(th.tensor([0, 1]))] = { 'a1': th.zeros([2, D]), 'a2': th.zeros([2, D]), 'a3': th.zeros([2, D]), } assert U.allclose(f2['a1'], f2_a1) f1[Index(th.tensor([2, 3]))] = { 'a1': th.ones([2, D]), 'a2': th.ones([2, D]), 'a3': th.ones([2, D]), } f2_a1[toindex(slice(0, 2))] = 1 assert U.allclose(f2['a1'], f2_a1) f1[toindex(slice(2, 4))] = { 'a1': th.zeros([2, D]), 'a2': th.zeros([2, D]), 'a3': th.zeros([2, D]), } f2_a1[toindex(slice(0, 2))] = 0 assert U.allclose(f2['a1'], f2_a1)
def test_inplace(): f = FrameRef(Frame(create_test_data())) print(f.schemes) a1addr = f['a1'].data.data_ptr() a2addr = f['a2'].data.data_ptr() a3addr = f['a3'].data.data_ptr() # column updates are always out-of-place f['a1'] = th.ones((N, D)) newa1addr = f['a1'].data.data_ptr() assert a1addr != newa1addr a1addr = newa1addr # full row update that becomes column update f[toindex(slice(0, N))] = {'a1': th.ones((N, D))} assert f['a1'].data.data_ptr() != a1addr # row update (outplace) w/ slice f[toindex(slice(1, 4))] = {'a2': th.ones((3, D))} newa2addr = f['a2'].data.data_ptr() assert a2addr != newa2addr a2addr = newa2addr # row update (outplace) w/ list f[toindex([1, 3, 5])] = {'a2': th.ones((3, D))} newa2addr = f['a2'].data.data_ptr() assert a2addr != newa2addr a2addr = newa2addr # row update (inplace) w/ slice f.update_data(toindex(slice(1, 4)), {'a2': th.ones((3, D))}, True) newa2addr = f['a2'].data.data_ptr() assert a2addr == newa2addr # row update (inplace) w/ list f.update_data(toindex([1, 3, 5]), {'a2': th.ones((3, D))}, True) newa2addr = f['a2'].data.data_ptr() assert a2addr == newa2addr
def check_basics(g, ig): assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_edges() == ig.number_of_edges() edges = g.edges("srcdst") iedges = ig.edges("srcdst") assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) edges = g.edges("eid") iedges = ig.edges("eid") assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) for i in range(g.number_of_nodes()): assert g.has_node(i) == ig.has_node(i) for i in range(g.number_of_nodes()): assert F.array_equal(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor()) assert F.array_equal(g.successors(i).tousertensor(), ig.successors(i).tousertensor()) randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv)) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv)) nnz = in_src2.shape[0] assert F.array_equal(in_src1, in_src2) assert F.array_equal(in_dst1, in_dst2) assert F.array_equal(in_eids1, in_eids2) out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv)) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv)) nnz = out_dst2.shape[0] assert F.array_equal(out_dst1, out_dst2) assert F.array_equal(out_src1, out_src2) assert F.array_equal(out_eids1, out_eids2) num_v = len(randv) assert F.array_equal(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor()) assert F.array_equal(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor()) randv = randv.tousertensor() for v in F.asnumpy(randv): assert g.in_degree(v) == ig.in_degree(v) assert g.out_degree(v) == ig.out_degree(v) for u in F.asnumpy(randv): for v in F.asnumpy(randv): if len(g.edge_id(u, v)) == 1: assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy() assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) randv = utils.toindex(randv) ids = g.edge_ids(randv, randv)[2].tonumpy() assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids, 0) == len(ids) assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy(), 0) == len(randv)
def test_edge_id(): gi = create_graph_index(multigraph=False) assert not gi.is_multigraph() gi = create_graph_index(multigraph=True) gi.add_nodes(4) gi.add_edge(0, 1) eid = gi.edge_id(0, 1).tonumpy() assert len(eid) == 1 assert eid[0] == 0 assert gi.is_multigraph() # multiedges gi.add_edge(0, 1) eid = gi.edge_id(0, 1).tonumpy() assert len(eid) == 2 assert eid[0] == 0 assert eid[1] == 1 gi.add_edges(toindex([0, 1, 1, 2]), toindex([2, 2, 2, 3])) src, dst, eid = gi.edge_ids(toindex([0, 0, 2, 1]), toindex([2, 1, 3, 2])) eid_answer = [2, 0, 1, 5, 3, 4] assert len(eid) == 6 assert all(e == ea for e, ea in zip(eid, eid_answer)) # find edges src, dst, eid = gi.find_edges(toindex([1, 3, 5])) assert len(src) == len(dst) == len(eid) == 3 assert src[0] == 0 and src[1] == 1 and src[2] == 2 assert dst[0] == 1 and dst[1] == 2 and dst[2] == 3 assert eid[0] == 1 and eid[1] == 3 and eid[2] == 5 # source broadcasting src, dst, eid = gi.edge_ids(toindex([0]), toindex([1, 2])) eid_answer = [0, 1, 2] assert len(eid) == 3 assert all(e == ea for e, ea in zip(eid, eid_answer)) # destination broadcasting src, dst, eid = gi.edge_ids(toindex([1, 0]), toindex([2])) eid_answer = [3, 4, 2] assert len(eid) == 3 assert all(e == ea for e, ea in zip(eid, eid_answer)) gi.clear() # the following assumes that grabbing nonexistent edge will throw an error try: gi.edge_id(0, 1) fail = True except DGLError: fail = False finally: assert not fail gi.add_nodes(4) gi.add_edge(0, 1) eid = gi.edge_id(0, 1).tonumpy() assert len(eid) == 1 assert eid[0] == 0
def test_pickling_index(): # normal index i = toindex([1, 2, 3]) i.tousertensor() i.todgltensor() # construct a dgl tensor which is unpicklable i2 = _reconstruct_pickle(i) _assert_is_identical_index(i, i2) # slice index i = toindex(slice(5, 10)) i2 = _reconstruct_pickle(i) _assert_is_identical_index(i, i2)
def test_pickling_graph_index(): gi = create_graph_index() gi.add_nodes(3) src_idx = toindex([0, 0]) dst_idx = toindex([1, 2]) gi.add_edges(src_idx, dst_idx) gi2 = _reconstruct_pickle(gi) assert gi2.number_of_nodes() == gi.number_of_nodes() src_idx2, dst_idx2, _ = gi2.edges() assert F.array_equal(src_idx.tousertensor(), src_idx2.tousertensor()) assert F.array_equal(dst_idx.tousertensor(), dst_idx2.tousertensor())
def test_load_csr(): n = 100 csr = (sp.sparse.random(n, n, density=0.1, format='csr') != 0).astype(np.int64) # Load CSR normally. idx = dgl.graph_index.from_csr( utils.toindex(csr.indptr), utils.toindex(csr.indices), 'out') assert idx.number_of_nodes() == n assert idx.number_of_edges() == csr.nnz src, dst, eid = idx.edges() src, dst, eid = src.tousertensor(), dst.tousertensor(), eid.tousertensor() coo = csr.tocoo() assert np.all(F.asnumpy(src) == coo.row) assert np.all(F.asnumpy(dst) == coo.col)
def create_mini_batch(g, num_hops, add_self_loop=False): seed_ids = np.array([0, 1, 2, 3]) seed_ids = utils.toindex(seed_ids) sgi = g._graph.neighbor_sampling([seed_ids], g.number_of_nodes(), num_hops, "in", None, add_self_loop) assert len(sgi) == 1 return dgl.node_flow.NodeFlow(g, sgi[0])
def l0_sample(g, positive_max=128, negative_ratio=3): '''sampling positive and negative edges''' if g is None: return None n_eids = g.number_of_edges() pos_eids = np.where(g.edata['rel_class'].asnumpy() > 0)[0] neg_eids = np.where(g.edata['rel_class'].asnumpy() == 0)[0] if len(pos_eids) == 0: return None positive_num = min(len(pos_eids), positive_max) negative_num = min(len(neg_eids), positive_num * negative_ratio) pos_sample = np.random.choice(pos_eids, positive_num, replace=False) neg_sample = np.random.choice(neg_eids, negative_num, replace=False) weights = np.zeros(n_eids) # np.add.at(weights, pos_sample, 1) weights[pos_sample] = 1 weights[neg_sample] = 1 # g.edata['sample_weights'] = mx.nd.array(weights, ctx=g.edata['rel_class'].context) # return g eids = np.where(weights > 0)[0] sub_g = g.edge_subgraph(toindex(eids.tolist())) sub_g.copy_from_parent() sub_g.edata['sample_weights'] = mx.nd.array( weights[eids], ctx=g.edata['rel_class'].context) return sub_g
def test_block_edges(): num_layers = 3 g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 for i in range(nf.num_blocks): dest_nodes = utils.toindex(nf.layer_nid(i + 1)) src1, dst1, eid1 = nf.in_edges(dest_nodes, 'all') src, dst, eid = nf.block_edges(i) assert_array_equal(F.asnumpy(src), F.asnumpy(src1)) assert_array_equal(F.asnumpy(dst), F.asnumpy(dst1)) assert_array_equal(F.asnumpy(eid), F.asnumpy(eid1)) src, dst, eid = nf.block_edges(i, remap_local=True) # should also work for negative block ids src_by_neg, dst_by_neg, eid_by_neg = nf.block_edges(-nf.num_blocks + i, remap_local=True) assert_array_equal(F.asnumpy(src), F.asnumpy(src_by_neg)) assert_array_equal(F.asnumpy(dst), F.asnumpy(dst_by_neg)) assert_array_equal(F.asnumpy(eid), F.asnumpy(eid_by_neg)) src1 = nf._glb2lcl_nid(src1, i) dst1 = nf._glb2lcl_nid(dst1, i + 1) assert_array_equal(F.asnumpy(src), F.asnumpy(src1)) assert_array_equal(F.asnumpy(dst), F.asnumpy(dst1))
def test_node_batch(): g = dgl.DGLGraph(nx.path_graph(20)) feat = F.randn((g.number_of_nodes(), 10)) g.ndata['x'] = feat # test all v = ALL n_repr = g.get_n_repr(v) nbatch = NodeBatch(g, v, n_repr) assert F.allclose(nbatch.data['x'], feat) assert nbatch.mailbox is None assert F.allclose(nbatch.nodes(), g.nodes()) assert nbatch.batch_size() == g.number_of_nodes() assert len(nbatch) == g.number_of_nodes() # test partial v = utils.toindex(F.tensor([0, 3, 5, 7, 9])) n_repr = g.get_n_repr(v) nbatch = NodeBatch(g, v, n_repr) assert F.allclose(nbatch.data['x'], F.gather_row(feat, F.tensor([0, 3, 5, 7, 9]))) assert nbatch.mailbox is None assert F.allclose(nbatch.nodes(), F.tensor([0, 3, 5, 7, 9])) assert nbatch.batch_size() == 5 assert len(nbatch) == 5
def check_basics(g, ig): assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_edges() == ig.number_of_edges() edges = g.edges() iedges = ig.edges() for i in range(g.number_of_nodes()): assert g.has_node(i) == ig.has_node(i) for i in range(g.number_of_nodes()): assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy() assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy() randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) in_src1, in_dst1, in_eids1 = g.in_edges(randv) in_src2, in_dst2, in_eids2 = ig.in_edges(randv) nnz = in_src2.tousertensor().shape[0] assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz out_src1, out_dst1, out_eids1 = g.out_edges(randv) out_src2, out_dst2, out_eids2 = ig.out_edges(randv) nnz = out_dst2.tousertensor().shape[0] assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz num_v = len(randv) assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v randv = randv.tousertensor() for v in randv.asnumpy(): assert g.in_degree(v) == ig.in_degree(v) assert g.out_degree(v) == ig.out_degree(v) for u in randv.asnumpy(): for v in randv.asnumpy(): if len(g.edge_id(u, v)) == 1: assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy() assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) randv = utils.toindex(randv) ids = g.edge_ids(randv, randv)[2].tonumpy() assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids) == len(ids) assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy()) == len(randv)
def test_edge_batch(): d = 10 g = dgl.DGLGraph(nx.path_graph(20)) nfeat = F.randn((g.number_of_nodes(), d)) efeat = F.randn((g.number_of_edges(), d)) g.ndata['x'] = nfeat g.edata['x'] = efeat # test all eid = utils.toindex(slice(0, g.number_of_edges())) u, v, _ = g._graph.edges('eid') src_data = g.get_n_repr(u) edge_data = g.get_e_repr(eid) dst_data = g.get_n_repr(v) ebatch = EdgeBatch((u, v, eid), src_data, edge_data, dst_data) assert F.shape(ebatch.src['x'])[0] == g.number_of_edges() and\ F.shape(ebatch.src['x'])[1] == d assert F.shape(ebatch.dst['x'])[0] == g.number_of_edges() and\ F.shape(ebatch.dst['x'])[1] == d assert F.shape(ebatch.data['x'])[0] == g.number_of_edges() and\ F.shape(ebatch.data['x'])[1] == d assert F.allclose(ebatch.edges()[0], u.tousertensor()) assert F.allclose(ebatch.edges()[1], v.tousertensor()) assert F.allclose(ebatch.edges()[2], F.arange(0, g.number_of_edges())) assert ebatch.batch_size() == g.number_of_edges() assert len(ebatch) == g.number_of_edges() # test partial eid = utils.toindex(F.tensor([0, 3, 5, 7, 11, 13, 15, 27])) u, v, _ = g._graph.find_edges(eid) src_data = g.get_n_repr(u) edge_data = g.get_e_repr(eid) dst_data = g.get_n_repr(v) ebatch = EdgeBatch((u, v, eid), src_data, edge_data, dst_data) assert F.shape(ebatch.src['x'])[0] == 8 and\ F.shape(ebatch.src['x'])[1] == d assert F.shape(ebatch.dst['x'])[0] == 8 and\ F.shape(ebatch.dst['x'])[1] == d assert F.shape(ebatch.data['x'])[0] == 8 and\ F.shape(ebatch.data['x'])[1] == d assert F.allclose(ebatch.edges()[0], u.tousertensor()) assert F.allclose(ebatch.edges()[1], v.tousertensor()) assert F.allclose(ebatch.edges()[2], eid.tousertensor()) assert ebatch.batch_size() == 8 assert len(ebatch) == 8
def test_pickling_index(): i = toindex([1, 2, 3]) i.tousertensor() i.todgltensor() # construct a dgl tensor which is unpicklable i2 = _reconstruct_pickle(i) assert F.array_equal(i2.tousertensor(), i.tousertensor())
def bipartite_single_sided_random_walk_with_restart( g, seeds, restart_prob, max_nodes_per_seed, max_visit_counts=0, max_frequent_visited_nodes=0): """Batch-generate random walk traces on given graph with restart probability. The graph must be a bipartite graph. A single random walk step involves two normal steps, so that the "visited" nodes always stay on the same side. [1] Parameters ---------- g : DGLGraph The graph. seeds : Tensor The node ID tensor from which the random walk traces starts. restart_prob : float Probability to stop a random walk after each step. max_nodes_per_seed : int Stop generating traces for a seed if the total number of nodes visited exceeds this number. [1] max_visit_counts : int, optional max_frequent_visited_nodes : int, optional Alternatively, stop generating traces for a seed if no less than ``max_frequent_visited_nodes`` are visited no less than ``max_visit_counts`` times. [1] Returns ------- traces : list[list[Tensor]] traces[i][j] is the j-th trace generated for i-th seed. Notes ----- The current implementation does not ensure that the graph is a bipartite graph. The traces does **not** include the seed nodes themselves. Reference --------- [1] Eksombatchai et al., 2017 https://arxiv.org/abs/1711.07601 """ if len(seeds) == 0: return [] seeds = utils.toindex(seeds).todgltensor() traces = _CAPI_DGLBipartiteSingleSidedRandomWalkWithRestart( g._graph._handle, seeds, restart_prob, int(max_nodes_per_seed), int(max_visit_counts), int(max_frequent_visited_nodes)) return _split_traces(traces)
def test_row3(): # test row delete data = Frame(create_test_data()) f = FrameRef(data) assert f.is_contiguous() assert f.is_span_whole_column() assert f.num_rows == N del f[toindex(th.tensor([2, 3]))] assert not f.is_contiguous() assert not f.is_span_whole_column() # delete is lazy: only reflect on the ref while the # underlying storage should not be touched assert f.num_rows == N - 2 assert data.num_rows == N newidx = list(range(N)) newidx.pop(2) newidx.pop(2) newidx = toindex(newidx) for k, v in f.items(): assert U.allclose(v, data[k][newidx])
def test_index(): ans = np.ones((10, ), dtype=np.int64) * 10 # from np data data = np.ones((10, ), dtype=np.int64) * 10 idx = toindex(data) y1 = idx.tonumpy() y2 = F.asnumpy(idx.tousertensor()) y3 = idx.todgltensor().asnumpy() assert np.allclose(ans, y1) assert np.allclose(ans, y2) assert np.allclose(ans, y3) # from list data = [10] * 10 idx = toindex(data) y1 = idx.tonumpy() y2 = F.asnumpy(idx.tousertensor()) y3 = idx.todgltensor().asnumpy() assert np.allclose(ans, y1) assert np.allclose(ans, y2) assert np.allclose(ans, y3) # from dl tensor data = F.ones((10, ), dtype=F.int64) * 10 idx = toindex(data) y1 = idx.tonumpy() y2 = F.asnumpy(idx.tousertensor()) y3 = idx.todgltensor().asnumpy() assert np.allclose(ans, y1) assert np.allclose(ans, y2) assert np.allclose(ans, y3) # from dgl.NDArray data = dgl.ndarray.array(np.ones((10, ), dtype=np.int64) * 10) idx = toindex(data) y1 = idx.tonumpy() y2 = F.asnumpy(idx.tousertensor()) y3 = idx.todgltensor().asnumpy() assert np.allclose(ans, y1) assert np.allclose(ans, y2) assert np.allclose(ans, y3)
def test_block_adj_matrix(): num_layers = 3 g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 for i in range(nf.num_blocks): u, v, _ = nf.block_edges(i, remap_local=True) adj, _ = nf.block_adjacency_matrix(i, F.cpu()) adj = F.sparse_to_numpy(adj) # should also work for negative block ids adj_by_neg, _ = nf.block_adjacency_matrix(-nf.num_blocks + i, F.cpu()) adj_by_neg = F.sparse_to_numpy(adj_by_neg) data = np.ones((len(u)), dtype=np.float32) v = utils.toindex(v) u = utils.toindex(u) coo = sp.sparse.coo_matrix((data, (v.tonumpy(), u.tonumpy())), shape=adj.shape).todense() assert_array_equal(adj, coo) assert_array_equal(adj_by_neg, coo)
def test_node_subgraph_with_halo(): gi = create_large_graph_index(1000) nodes = np.random.choice(gi.number_of_nodes(), 100, replace=False) halo_subg, inner_node, inner_edge = gi.node_halo_subgraph(toindex(nodes), 2) # Check if edges in the subgraph are in the original graph. for s, d, e in zip(*halo_subg.graph.edges()): assert halo_subg.induced_edges[e] in gi.edge_id( halo_subg.induced_nodes[s], halo_subg.induced_nodes[d]) # Check if the inner node labels are correct. inner_node = inner_node.asnumpy() inner_node_ids = np.nonzero(inner_node)[0] inner_node_ids = halo_subg.induced_nodes.tonumpy()[inner_node_ids] assert np.all(np.sort(inner_node_ids) == np.sort(nodes)) # Check if the inner edge labels are correct. inner_edge = inner_edge.asnumpy() inner_edge_ids = halo_subg.induced_edges.tonumpy()[inner_edge > 0] subg = gi.node_subgraph(toindex(nodes)) assert np.all(np.sort(subg.induced_edges.tonumpy()) == np.sort(inner_edge_ids))
def test_node_subgraph(): num_vertices = 100 g, ig = generate_rand_graph(num_vertices) # node_subgraph randv1 = np.random.randint(0, num_vertices, 20) randv = np.unique(randv1) subg = g.node_subgraph(utils.toindex(randv)) subig = ig.node_subgraph(utils.toindex(randv)) check_basics(subg, subig) check_graph_equal(subg, subig) assert F.sum( map_to_subgraph_nid(subg, utils.toindex( randv1[0:10])).tousertensor() == map_to_subgraph_nid( subig, utils.toindex(randv1[0:10])).tousertensor(), 0) == 10 # node_subgraphs randvs = [] subgs = [] for i in range(4): randv = np.unique(np.random.randint(0, num_vertices, 20)) randvs.append(utils.toindex(randv)) subgs.append(g.node_subgraph(utils.toindex(randv))) subigs = ig.node_subgraphs(randvs) for i in range(4): check_basics(subg, subig) check_graph_equal(subgs[i], subigs[i])
def test_block_incidence_matrix(): num_layers = 3 g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 for i in range(nf.num_blocks): typestrs = ["in", "out"] # todo need fix for "both" adjs = [] for typestr in typestrs: adj, _ = nf.block_incidence_matrix(i, typestr, F.cpu()) adj = F.sparse_to_numpy(adj) adjs.append(adj) # should work for negative block ids adjs_by_neg = [] for typestr in typestrs: adj_by_neg, _ = nf.block_incidence_matrix( -nf.num_blocks + i, typestr, F.cpu()) adj_by_neg = F.sparse_to_numpy(adj_by_neg) adjs_by_neg.append(adj_by_neg) u, v, e = nf.block_edges(i, remap_local=True) u = utils.toindex(u) v = utils.toindex(v) e = utils.toindex(e) expected = [] data_in_and_out = np.ones((len(u)), dtype=np.float32) expected.append( sp.sparse.coo_matrix((data_in_and_out, (v.tonumpy(), e.tonumpy())), shape=adjs[0].shape).todense() ) expected.append( sp.sparse.coo_matrix((data_in_and_out, (u.tonumpy(), e.tonumpy())), shape=adjs[1].shape).todense() ) for i in range(len(typestrs)): assert_array_equal(adjs[i], expected[i]) assert_array_equal(adjs_by_neg[i], expected[i])
def test_block_adj_matrix(): num_layers = 3 g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 for i in range(nf.num_blocks): src, dst, eid = nf.block_edges(i) dest_nodes = utils.toindex(nf.layer_nid(i + 1)) u, v, _ = nf._graph.in_edges(dest_nodes) u = nf._glb2lcl_nid(u.tousertensor(), i) v = nf._glb2lcl_nid(v.tousertensor(), i + 1) assert F.array_equal(src, u) assert F.array_equal(dst, v) adj, _ = nf.block_adjacency_matrix(i, F.cpu()) adj = F.sparse_to_numpy(adj) data = np.ones((len(u)), dtype=np.float32) v = utils.toindex(v) u = utils.toindex(u) coo = sp.sparse.coo_matrix((data, (v.tonumpy(), u.tonumpy())), shape=adj.shape).todense() assert np.array_equal(adj, coo)
def test_add_rows(): data = Frame() f1 = FrameRef(data) f1.add_rows(4) x = th.randn(1, 4) f1[Index(th.tensor([0]))] = {'x': x} ans = th.cat([x, th.zeros(3, 4)]) assert U.allclose(f1['x'], ans) f1.add_rows(4) f1[toindex(slice(4, 8))] = {'x': th.ones(4, 4), 'y': th.ones(4, 5)} ans = th.cat([ans, th.ones(4, 4)]) assert U.allclose(f1['x'], ans) ans = th.cat([th.zeros(4, 5), th.ones(4, 5)]) assert U.allclose(f1['y'], ans)
def test_edge_subgraph(): gi = create_graph_index() gi.add_nodes(4) gi.add_edge(0, 1) gi.add_edge(0, 1) gi.add_edge(0, 2) gi.add_edge(2, 3) sub2par_edgemap = [3, 2] sgi = gi.edge_subgraph(toindex(sub2par_edgemap)) for s, d, e in zip(*sgi.edges()): assert sgi.induced_edges[e] in gi.edge_id(sgi.induced_nodes[s], sgi.induced_nodes[d])
def test_node_subgraph(): gi = create_graph_index(None, False) gi.add_nodes(4) gi.add_edge(0, 1) gi.add_edge(0, 2) gi.add_edge(0, 2) gi.add_edge(0, 3) sub2par_nodemap = [2, 0, 3] sgi = gi.node_subgraph(toindex(sub2par_nodemap)) for s, d, e in zip(*sgi.graph.edges()): assert sgi.induced_edges[e] in gi.edge_id(sgi.induced_nodes[s], sgi.induced_nodes[d])