def test_topk(g, idtype, descending): g = g.astype(idtype).to(F.ctx()) g.ndata['x'] = F.randn((g.number_of_nodes(), 3)) # Test.1: to test the case where k > number of nodes. dgl.topk_nodes(g, 'x', 100, sortby=-1) # Test.2: test correctness min_nnodes = F.asnumpy(g.batch_num_nodes()).min() if min_nnodes <= 1: return k = min_nnodes - 1 val, indices = dgl.topk_nodes(g, 'x', k, descending=descending, sortby=-1) print(k) print(g.ndata['x']) print('val', val) print('indices', indices) subg = dgl.unbatch(g) subval, subidx = [], [] for sg in subg: subx = F.asnumpy(sg.ndata['x']) ai = np.argsort(subx[:, -1:].flatten()) if descending: ai = np.ascontiguousarray(ai[::-1]) subx = np.expand_dims(subx[ai[:k]], 0) subval.append(F.tensor(subx)) subidx.append(F.tensor(np.expand_dims(ai[:k], 0))) print(F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0)) # Test.3: sorby=None dgl.topk_nodes(g, 'x', k, sortby=None) g.edata['x'] = F.randn((g.number_of_edges(), 3)) # Test.4: topk edges where k > number of edges. dgl.topk_edges(g, 'x', 100, sortby=-1) # Test.5: topk edges test correctness min_nedges = F.asnumpy(g.batch_num_edges()).min() if min_nedges <= 1: return k = min_nedges - 1 val, indices = dgl.topk_edges(g, 'x', k, descending=descending, sortby=-1) print(k) print(g.edata['x']) print('val', val) print('indices', indices) subg = dgl.unbatch(g) subval, subidx = [], [] for sg in subg: subx = F.asnumpy(sg.edata['x']) ai = np.argsort(subx[:, -1:].flatten()) if descending: ai = np.ascontiguousarray(ai[::-1]) subx = np.expand_dims(subx[ai[:k]], 0) subval.append(F.tensor(subx)) subidx.append(F.tensor(np.expand_dims(ai[:k], 0))) print(F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0))
def test_split_even(): prepare_dist() g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] all_nodes1 = [] all_nodes2 = [] all_edges1 = [] all_edges2 = [] for i in range(num_parts): dgl.distributed.set_num_client(num_parts) part_g, node_feats, edge_feats, gpb, _ = load_partition( '/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes = node_split(node_mask, gpb, i, force_even=True) all_nodes1.append(nodes) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) print('part {} get {} nodes and {} are in the partition'.format( i, len(nodes), len(subset))) dgl.distributed.set_num_client(num_parts * 2) nodes1 = node_split(node_mask, gpb, i * 2, force_even=True) nodes2 = node_split(node_mask, gpb, i * 2 + 1, force_even=True) nodes3 = F.cat([nodes1, nodes2], 0) all_nodes2.append(nodes3) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) print('intersection has', len(subset)) dgl.distributed.set_num_client(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges = edge_split(edge_mask, gpb, i, force_even=True) all_edges1.append(edges) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) print('part {} get {} edges and {} are in the partition'.format( i, len(edges), len(subset))) dgl.distributed.set_num_client(num_parts * 2) edges1 = edge_split(edge_mask, gpb, i * 2, force_even=True) edges2 = edge_split(edge_mask, gpb, i * 2 + 1, force_even=True) edges3 = F.cat([edges1, edges2], 0) all_edges2.append(edges3) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) print('intersection has', len(subset)) all_nodes1 = F.cat(all_nodes1, 0) all_edges1 = F.cat(all_edges1, 0) all_nodes2 = F.cat(all_nodes2, 0) all_edges2 = F.cat(all_edges2, 0) all_nodes = np.nonzero(node_mask)[0] all_edges = np.nonzero(edge_mask)[0] assert np.all(all_nodes == F.asnumpy(all_nodes1)) assert np.all(all_edges == F.asnumpy(all_edges1)) assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_edges == F.asnumpy(all_edges2))
def check_dist_graph(g, num_nodes, num_edges): # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test sparse emb try: new_shape = (g.number_of_nodes(), 1) emb = SparseNodeEmbedding(g, 'emb1', new_shape, emb_init) lr = 0.001 optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats = emb(nids) assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) grad_sum = dgl.distributed.DistTensor(g, 'node:emb1_sum', policy) assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1))) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) emb = SparseNodeEmbedding(g, 'emb2', new_shape, emb_init) optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats1 = emb(nids) feats2 = emb(nids) feats = F.cat([feats1, feats2], 0) assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * math.sqrt(2) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) except NotImplementedError as e: pass # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids print('end')
def test_convert(): hg = create_test_heterograph() hs = [] for ntype in hg.ntypes: h = F.randn((hg.number_of_nodes(ntype), 5)) hg.nodes[ntype].data['h'] = h hs.append(h) hg.nodes['user'].data['x'] = F.randn((3, 3)) ws = [] for etype in hg.canonical_etypes: w = F.randn((hg.number_of_edges(etype), 5)) hg.edges[etype].data['w'] = w ws.append(w) hg.edges['plays'].data['x'] = F.randn((4, 3)) g = dgl.to_homo(hg) assert F.array_equal(F.cat(hs, dim=0), g.ndata['h']) assert 'x' not in g.ndata assert F.array_equal(F.cat(ws, dim=0), g.edata['w']) assert 'x' not in g.edata src, dst = g.all_edges(order='eid') src = F.asnumpy(src) dst = F.asnumpy(dst) etype_id, eid = F.asnumpy(g.edata[dgl.ETYPE]), F.asnumpy(g.edata[dgl.EID]) ntype_id, nid = F.asnumpy(g.ndata[dgl.NTYPE]), F.asnumpy(g.ndata[dgl.NID]) for i in range(g.number_of_edges()): srctype = hg.ntypes[ntype_id[src[i]]] dsttype = hg.ntypes[ntype_id[dst[i]]] etype = hg.etypes[etype_id[i]] src_i, dst_i = hg.find_edges([eid[i]], (srctype, etype, dsttype)) assert np.asscalar(F.asnumpy(src_i)) == nid[src[i]] assert np.asscalar(F.asnumpy(dst_i)) == nid[dst[i]] mg = nx.MultiDiGraph([('user', 'user', 'follows'), ('user', 'game', 'plays'), ('user', 'game', 'wishes'), ('developer', 'game', 'develops')]) for _mg in [None, mg]: hg2 = dgl.to_hetero(g, ['user', 'game', 'developer'], ['follows', 'plays', 'wishes', 'develops'], ntype_field=dgl.NTYPE, etype_field=dgl.ETYPE, metagraph=_mg) assert set(hg.ntypes) == set(hg2.ntypes) assert set(hg.canonical_etypes) == set(hg2.canonical_etypes) for ntype in hg.ntypes: assert hg.number_of_nodes(ntype) == hg2.number_of_nodes(ntype) assert F.array_equal(hg.nodes[ntype].data['h'], hg2.nodes[ntype].data['h']) for canonical_etype in hg.canonical_etypes: src, dst = hg.all_edges(etype=canonical_etype, order='eid') src2, dst2 = hg2.all_edges(etype=canonical_etype, order='eid') assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) assert F.array_equal(hg.edges[canonical_etype].data['w'], hg2.edges[canonical_etype].data['w']) # hetero_from_homo test case 2 g = dgl.graph([(0, 2), (1, 2), (2, 3), (0, 3)]) g.ndata[dgl.NTYPE] = F.tensor([0, 0, 1, 2]) g.edata[dgl.ETYPE] = F.tensor([0, 0, 1, 2]) hg = dgl.to_hetero(g, ['l0', 'l1', 'l2'], ['e0', 'e1', 'e2']) assert set(hg.canonical_etypes) == set([('l0', 'e0', 'l1'), ('l1', 'e1', 'l2'), ('l0', 'e2', 'l2')]) assert hg.number_of_nodes('l0') == 2 assert hg.number_of_nodes('l1') == 1 assert hg.number_of_nodes('l2') == 1 assert hg.number_of_edges('e0') == 2 assert hg.number_of_edges('e1') == 1 assert hg.number_of_edges('e2') == 1 # hetero_from_homo test case 3 mg = nx.MultiDiGraph([('user', 'movie', 'watches'), ('user', 'TV', 'watches')]) g = dgl.graph([(0, 1), (0, 2)]) g.ndata[dgl.NTYPE] = F.tensor([0, 1, 2]) g.edata[dgl.ETYPE] = F.tensor([0, 0]) for _mg in [None, mg]: hg = dgl.to_hetero(g, ['user', 'TV', 'movie'], ['watches'], metagraph=_mg) assert set(hg.canonical_etypes) == set([('user', 'watches', 'movie'), ('user', 'watches', 'TV')]) assert hg.number_of_nodes('user') == 1 assert hg.number_of_nodes('TV') == 1 assert hg.number_of_nodes('movie') == 1 assert hg.number_of_edges(('user', 'watches', 'TV')) == 1 assert hg.number_of_edges(('user', 'watches', 'movie')) == 1 assert len(hg.etypes) == 2 # hetero_to_homo test case 2 hg = dgl.bipartite([(0, 0), (1, 1)], card=(2, 3)) g = dgl.to_homo(hg) assert g.number_of_nodes() == 5
def _message_2(edges): return {'h': F.cat((edges.src['h'], edges.data['w']), dim=1)}
def test_edge_softmax(g, norm_by, idtype): print("params", norm_by, idtype) g = create_test_heterograph(idtype) x1 = F.randn((g.num_edges('plays'),feat_size)) x2 = F.randn((g.num_edges('follows'),feat_size)) x3 = F.randn((g.num_edges('develops'),feat_size)) x4 = F.randn((g.num_edges('wishes'),feat_size)) F.attach_grad(F.clone(x1)) F.attach_grad(F.clone(x2)) F.attach_grad(F.clone(x3)) F.attach_grad(F.clone(x4)) g['plays'].edata['eid'] = x1 g['follows'].edata['eid'] = x2 g['develops'].edata['eid'] = x3 g['wishes'].edata['eid'] = x4 ################################################################# # edge_softmax() on homogeneous graph ################################################################# with F.record_grad(): hm_g = dgl.to_homogeneous(g) hm_x = F.cat((x3, x2, x1, x4), 0) hm_e = F.attach_grad(F.clone(hm_x)) score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by) hm_g.edata['score'] = score_hm ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes) r1 = ht_g.edata['score'][('user', 'plays', 'game')] r2 = ht_g.edata['score'][('user', 'follows', 'user')] r3 = ht_g.edata['score'][('developer', 'develops', 'game')] r4 = ht_g.edata['score'][('user', 'wishes', 'game')] F.backward(F.reduce_sum(r1) + F.reduce_sum(r2)) grad_edata_hm = F.grad(hm_e) ################################################################# # edge_softmax() on heterogeneous graph ################################################################# e1 = F.attach_grad(F.clone(x1)) e2 = F.attach_grad(F.clone(x2)) e3 = F.attach_grad(F.clone(x3)) e4 = F.attach_grad(F.clone(x4)) e = {('user', 'follows', 'user'): e2, ('user', 'plays', 'game'): e1, ('user', 'wishes', 'game'): e4, ('developer', 'develops', 'game'): e3} with F.record_grad(): score = edge_softmax(g, e, norm_by=norm_by) r5 = score[('user', 'plays', 'game')] r6 = score[('user', 'follows', 'user')] r7 = score[('developer', 'develops', 'game')] r8 = score[('user', 'wishes', 'game')] F.backward(F.reduce_sum(r5) + F.reduce_sum(r6)) grad_edata_ht = F.cat((F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0) # correctness check assert F.allclose(r1, r5) assert F.allclose(r2, r6) assert F.allclose(r3, r7) assert F.allclose(r4, r8) assert F.allclose(grad_edata_hm, grad_edata_ht)
def check_dist_graph(g, num_clients, num_nodes, num_edges): # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # reference to a one that exists test2 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2', init_func=rand_init) test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2') assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids])) # create a tensor and destroy a tensor and create it again. test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init) del test3 test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3') del test3 # add tests for anonymous distributed tensor. test3 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) data = test3[0:10] test4 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) del test3 test5 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) assert np.sum(F.asnumpy(test5[0:10] != data)) > 0 # test a persistent tesnor test4 = dgl.distributed.DistTensor(new_shape, F.float32, 'test4', init_func=rand_init, persistent=True) del test4 try: test4 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test4') raise Exception('') except: pass # Test sparse emb try: emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init) lr = 0.001 optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats = emb(nids) assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) if num_clients == 1: assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(), ), F.float32, 'emb1_sum', policy) if num_clients == 1: assert np.all( F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) * num_clients) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init) with F.no_grad(): feats1 = emb(nids) assert np.all(F.asnumpy(feats1) == 0) optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats1 = emb(nids) feats2 = emb(nids) feats = F.cat([feats1, feats2], 0) assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() with F.no_grad(): feats = emb(nids) if num_clients == 1: assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * math.sqrt(2) * -lr) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) except NotImplementedError as e: pass # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids print('end')
def test_empty_relation(idtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([], []) }, idtype=idtype, device=F.ctx()) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1], [0, 0]) }, idtype=idtype, device=F.ctx()) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) bg = dgl.batch([g1, g2]) # Test number of nodes for ntype in bg.ntypes: assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [ g1.number_of_nodes(ntype), g2.number_of_nodes(ntype) ] # Test number of edges for etype in bg.canonical_etypes: assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [ g1.number_of_edges(etype), g2.number_of_edges(etype) ] # Test features assert F.allclose( bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1']) assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2']) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose(bg.edges['plays'].data['h1'], g2.edges['plays'].data['h1']) # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs(g1, g3, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) check_equivalence_between_heterographs(g2, g4, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) # Test graphs without edges g1 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 0, 'v': 4}) g2 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 1, 'v': 5}) dgl.batch([g1, g2])
def test_split(): #prepare_dist() g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK['default'] = { i: i for i in range(num_clients) } for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( '/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes1 = np.intersect1d(selected_nodes, F.asnumpy(local_nids)) nodes2 = node_split(node_mask, gpb, rank=i, force_even=False) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2))) local_nids = F.asnumpy(local_nids) for n in nodes1: assert n in local_nids set_roles(num_parts * 2) nodes3 = node_split(node_mask, gpb, rank=i * 2, force_even=False) nodes4 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=False) nodes5 = F.cat([nodes3, nodes4], 0) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5))) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges1 = np.intersect1d(selected_edges, F.asnumpy(local_eids)) edges2 = edge_split(edge_mask, gpb, rank=i, force_even=False) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2))) local_eids = F.asnumpy(local_eids) for e in edges1: assert e in local_eids set_roles(num_parts * 2) edges3 = edge_split(edge_mask, gpb, rank=i * 2, force_even=False) edges4 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=False) edges5 = F.cat([edges3, edges4], 0) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5)))
def check_server_client_hierarchy(shared_mem, num_servers, num_clients): prepare_dist() g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_2' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph', num_trainers_per_machine=num_clients) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context('spawn') for serv_id in range(num_servers): p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, num_clients, shared_mem)) serv_ps.append(p) p.start() cli_ps = [] manager = mp.Manager() return_dict = manager.dict() node_mask = np.zeros((g.number_of_nodes(), ), np.int32) edge_mask = np.zeros((g.number_of_edges(), ), np.int32) nodes = np.random.choice(g.number_of_nodes(), g.number_of_nodes() // 10, replace=False) edges = np.random.choice(g.number_of_edges(), g.number_of_edges() // 10, replace=False) node_mask[nodes] = 1 edge_mask[edges] = 1 nodes = np.sort(nodes) edges = np.sort(edges) for cli_id in range(num_clients): print('start client', cli_id) p = ctx.Process(target=run_client_hierarchy, args=(graph_name, 0, num_servers, node_mask, edge_mask, return_dict)) p.start() cli_ps.append(p) for p in cli_ps: p.join() for p in serv_ps: p.join() nodes1 = [] edges1 = [] for n, e in return_dict.values(): nodes1.append(n) edges1.append(e) nodes1, _ = F.sort_1d(F.cat(nodes1, 0)) edges1, _ = F.sort_1d(F.cat(edges1, 0)) assert np.all(F.asnumpy(nodes1) == nodes) assert np.all(F.asnumpy(edges1) == edges) print('clients have terminated')
def test_features(idtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1], [0, 0]) }, idtype=idtype, device=F.ctx()) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.nodes['game'].data['h1'] = F.tensor([[0.]]) g1.nodes['game'].data['h2'] = F.tensor([[1.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) g2 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1], [0, 0]) }, idtype=idtype, device=F.ctx()) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) # test default setting bg = dgl.batch([g1, g2]) assert F.allclose( bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose( bg.nodes['game'].data['h1'], F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']], dim=0)) assert F.allclose( bg.nodes['game'].data['h2'], F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0)) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose( bg.edges['follows'].data['h2'], F.cat([g1.edges['follows'].data['h2'], g2.edges['follows'].data['h2']], dim=0)) assert F.allclose( bg.edges['plays'].data['h1'], F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0)) # test specifying ndata/edata bg = dgl.batch([g1, g2], ndata=['h2'], edata=['h1']) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose( bg.nodes['game'].data['h2'], F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0)) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose( bg.edges['plays'].data['h1'], F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0)) assert 'h1' not in bg.nodes['user'].data assert 'h1' not in bg.nodes['game'].data assert 'h2' not in bg.edges['follows'].data # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs(g1, g3, node_attrs={ 'user': ['h2'], 'game': ['h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) check_equivalence_between_heterographs(g2, g4, node_attrs={ 'user': ['h2'], 'game': ['h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) # test legacy bg = dgl.batch([g1, g2], edge_attrs=['h1']) assert 'h2' not in bg.edges['follows'].data.keys()
def test_batching_with_zero_nodes_edges(index_dtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph( { ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): [] }, index_dtype=index_dtype) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2 = dgl.heterograph( { ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): [(0, 0), (1, 0)] }, index_dtype=index_dtype) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) bg = dgl.batch_hetero([g1, g2]) assert F.allclose( bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1']) assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2']) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose(bg.edges['plays'].data['h1'], g2.edges['plays'].data['h1']) # Test unbatching graphs g3, g4 = dgl.unbatch_hetero(bg) check_equivalence_between_heterographs(g1, g3, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) check_equivalence_between_heterographs(g2, g4, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) # Test graphs without edges g1 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(0, 4)) g2 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(1, 5)) g2.nodes['u'].data['x'] = F.tensor([1]) dgl.batch_hetero([g1, g2])
def test_batched_features(index_dtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph( { ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): [(0, 0), (1, 0)] }, index_dtype=index_dtype) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.nodes['game'].data['h1'] = F.tensor([[0.]]) g1.nodes['game'].data['h2'] = F.tensor([[1.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) g2 = dgl.heterograph( { ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): [(0, 0), (1, 0)] }, index_dtype=index_dtype) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) bg = dgl.batch_hetero([g1, g2], node_attrs=ALL, edge_attrs={ ('user', 'follows', 'user'): 'h1', ('user', 'plays', 'game'): None }) assert F.allclose( bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose( bg.nodes['game'].data['h1'], F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']], dim=0)) assert F.allclose( bg.nodes['game'].data['h2'], F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0)) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert 'h2' not in bg.edges['follows'].data.keys() assert 'h1' not in bg.edges['plays'].data.keys() # Test unbatching graphs g3, g4 = dgl.unbatch_hetero(bg) check_equivalence_between_heterographs(g1, g3, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) check_equivalence_between_heterographs(g2, g4, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] })
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = dgl.graph([(0, 2), (1, 4), (3, 0), (4, 3)]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata['e1'], e1) assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu())) # test conversion after modifying DGLGraph # TODO(minjie): enable after mutation is supported #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges #new_n = F.randn((2, 3)) #new_e = F.randn((3, 5)) #g.add_nodes(2, data={'n1': new_n}) ## add three edges, one is a multi-edge #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) #n1 = F.cat((n1, new_n), 0) #e1 = F.cat((e1, new_e), 0) ## convert to networkx again #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) #assert len(nxg) == 7 #assert nxg.size() == 7 #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr['e1'], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata['e1'], edge_feat) # Test converting from a networkx graph whose nodes are # not labeled with consecutive-integers. nxg = nx.cycle_graph(5) nxg.remove_nodes_from([0, 4]) for u in nxg.nodes(): nxg.nodes[u]['h'] = F.tensor([u]) for u, v, d in nxg.edges(data=True): d['h'] = F.tensor([u, v]) g = dgl.DGLGraph() g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h']) assert g.number_of_nodes() == 3 assert g.number_of_edges() == 4 assert g.has_edge_between(0, 1) assert g.has_edge_between(1, 2) assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]])) assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator): seeds = defaultdict(list) for item in dl: if mode == 'node': input_nodes, output_nodes, blocks = item elif mode == 'edge': input_nodes, pair_graph, blocks = item output_nodes = pair_graph.ndata[dgl.NID] elif mode == 'link': input_nodes, pair_graph, neg_graph, blocks = item output_nodes = pair_graph.ndata[dgl.NID] for ntype in pair_graph.ntypes: assert F.array_equal(pair_graph.nodes[ntype].data[dgl.NID], neg_graph.nodes[ntype].data[dgl.NID]) if len(g.ntypes) > 1: for ntype in g.ntypes: assert F.array_equal(input_nodes[ntype], blocks[0].srcnodes[ntype].data[dgl.NID]) assert F.array_equal(output_nodes[ntype], blocks[-1].dstnodes[ntype].data[dgl.NID]) else: assert F.array_equal(input_nodes, blocks[0].srcdata[dgl.NID]) assert F.array_equal(output_nodes, blocks[-1].dstdata[dgl.NID]) prev_dst = {ntype: None for ntype in g.ntypes} for block in blocks: for canonical_etype in block.canonical_etypes: utype, etype, vtype = canonical_etype uu, vv = block.all_edges(order='eid', etype=canonical_etype) src = block.srcnodes[utype].data[dgl.NID] dst = block.dstnodes[vtype].data[dgl.NID] assert F.array_equal(block.srcnodes[utype].data['feat'], g.nodes[utype].data['feat'][src]) assert F.array_equal(block.dstnodes[vtype].data['feat'], g.nodes[vtype].data['feat'][dst]) if prev_dst[utype] is not None: assert F.array_equal(src, prev_dst[utype]) u = src[uu] v = dst[vv] assert F.asnumpy( g.has_edges_between(u, v, etype=canonical_etype)).all() eid = block.edges[canonical_etype].data[dgl.EID] assert F.array_equal( block.edges[canonical_etype].data['feat'], g.edges[canonical_etype].data['feat'][eid]) ufound, vfound = g.find_edges(eid, etype=canonical_etype) assert F.array_equal(ufound, u) assert F.array_equal(vfound, v) for ntype in block.dsttypes: src = block.srcnodes[ntype].data[dgl.NID] dst = block.dstnodes[ntype].data[dgl.NID] assert F.array_equal(src[:block.number_of_dst_nodes(ntype)], dst) prev_dst[ntype] = dst if mode == 'node': for ntype in blocks[-1].dsttypes: seeds[ntype].append(blocks[-1].dstnodes[ntype].data[dgl.NID]) elif mode == 'edge' or mode == 'link': for etype in pair_graph.canonical_etypes: seeds[etype].append(pair_graph.edges[etype].data[dgl.EID]) # Check if all nodes/edges are iterated seeds = {k: F.cat(v, 0) for k, v in seeds.items()} for k, v in seeds.items(): if k in nids: seed_set = set(F.asnumpy(nids[k])) elif isinstance(k, tuple) and k[1] in nids: seed_set = set(F.asnumpy(nids[k[1]])) else: continue v_set = set(F.asnumpy(v)) assert v_set == seed_set
def test_split_even(): #prepare_dist(1) g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] all_nodes1 = [] all_nodes2 = [] all_edges1 = [] all_edges2 = [] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK['default'] = { i: i for i in range(num_clients) } for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( '/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes = node_split(node_mask, gpb, rank=i, force_even=True) all_nodes1.append(nodes) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) print('part {} get {} nodes and {} are in the partition'.format( i, len(nodes), len(subset))) set_roles(num_parts * 2) nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True) nodes2 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=True) nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0)) all_nodes2.append(nodes3) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) print('intersection has', len(subset)) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges = edge_split(edge_mask, gpb, rank=i, force_even=True) all_edges1.append(edges) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) print('part {} get {} edges and {} are in the partition'.format( i, len(edges), len(subset))) set_roles(num_parts * 2) edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True) edges2 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=True) edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0)) all_edges2.append(edges3) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) print('intersection has', len(subset)) all_nodes1 = F.cat(all_nodes1, 0) all_edges1 = F.cat(all_edges1, 0) all_nodes2 = F.cat(all_nodes2, 0) all_edges2 = F.cat(all_edges2, 0) all_nodes = np.nonzero(node_mask)[0] all_edges = np.nonzero(edge_mask)[0] assert np.all(all_nodes == F.asnumpy(all_nodes1)) assert np.all(all_edges == F.asnumpy(all_edges1)) assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_edges == F.asnumpy(all_edges2))
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = DGLGraph(multigraph=True) g.add_nodes(5) g.add_edges([0,1,3,4], [2,4,0,3]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata['e1'], e1) assert F.array_equal(g.get_e_repr()['id'], F.arange(0, 4)) # test conversion after modifying DGLGraph g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges new_n = F.randn((2, 3)) new_e = F.randn((3, 5)) g.add_nodes(2, data={'n1': new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) n1 = F.cat((n1, new_n), 0) e1 = F.cat((e1, new_e), 0) # convert to networkx again nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = DGLGraph(multigraph=True) g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 7 assert g.number_of_edges() == 7 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr['e1'], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata['e1'], edge_feat)
def test_to_bidirected(): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) u, v = g.edges() ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(g.ndata['h'], bg.ndata['h']) assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0), bg.edata['h']) bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]]) assert ('hh' in g.ndata) is False bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]]) assert ('hh' in g.edata) is False # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False) ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert ('h' in bg.ndata) is False assert ('h' in bg.edata) is False # zero edge graph g = dgl.graph([]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) # heterogeneous graph g = dgl.heterograph({ ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0])), ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])), ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0])) }) g.nodes['game'].data['hv'] = F.ones((3, 1)) g.nodes['user'].data['hv'] = F.ones((3, 1)) g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True, ignore_bipartite=True) assert F.array_equal(g.nodes['game'].data['hv'], bg.nodes['game'].data['hv']) assert F.array_equal(g.nodes['user'].data['hv'], bg.nodes['user'].data['hv']) u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal( F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0), bg.edges['wins'].data['h']) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb) assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True) assert len(bg.edges['wins'].data) == 0 assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 assert len(bg.nodes['game'].data) == 0 assert len(bg.nodes['user'].data) == 0 u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb)