def test_filter(): g = DGLGraph() g.add_nodes(4) g.add_edges([0,1,2,3], [1,2,3,0]) n_repr = F.zeros((4, 5)) e_repr = F.zeros((4, 5)) n_repr[[1, 3]] = 1 e_repr[[1, 3]] = 1 g.ndata['a'] = n_repr g.edata['a'] = e_repr def predicate(r): return F.max(r.data['a'], 1) > 0 # full node filter n_idx = g.filter_nodes(predicate) assert set(F.zerocopy_to_numpy(n_idx)) == {1, 3} # partial node filter n_idx = g.filter_nodes(predicate, [0, 1]) assert set(F.zerocopy_to_numpy(n_idx)) == {1} # full edge filter e_idx = g.filter_edges(predicate) assert set(F.zerocopy_to_numpy(e_idx)) == {1, 3} # partial edge filter e_idx = g.filter_edges(predicate, [0, 1]) assert set(F.zerocopy_to_numpy(e_idx)) == {1}
def test_map_to_subgraph(): g = DGLGraph() g.add_nodes(10) g.add_edges(F.arange(0, 9), F.arange(1, 10)) h = g.subgraph([0, 1, 2, 5, 8]) v = h.map_to_subgraph_nid([0, 8, 2]) assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2]))
def test_filter(): g = DGLGraph() g.add_nodes(4) g.add_edges([0, 1, 2, 3], [1, 2, 3, 0]) n_repr = th.zeros(4, 5) e_repr = th.zeros(4, 5) n_repr[[1, 3]] = 1 e_repr[[1, 3]] = 1 g.ndata['a'] = n_repr g.edata['a'] = e_repr def predicate(r): return r.data['a'].max(1)[0] > 0 # full node filter n_idx = g.filter_nodes(predicate) assert set(n_idx.numpy()) == {1, 3} # partial node filter n_idx = g.filter_nodes(predicate, [0, 1]) assert set(n_idx.numpy()) == {1} # full edge filter e_idx = g.filter_edges(predicate) assert set(e_idx.numpy()) == {1, 3} # partial edge filter e_idx = g.filter_edges(predicate, [0, 1]) assert set(e_idx.numpy()) == {1}
def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() # Test node addition g.add_nodes(N) g.ndata.update({'h1': th.randn(N, D), 'h2': th.randn(N, D)}) g.add_nodes(3) assert g.ndata['h1'].shape[0] == g.ndata['h2'].shape[0] == N + 3 # Test edge addition g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': th.randn(2, D), 'h2': th.randn(2, D)}) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 2 g.add_edges([0, 2], [2, 0]) g.edata['h1'] = th.randn(4, D) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 4 g.add_edge(1, 2) g.edges[4].data['h1'] = th.randn(1, D) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 5
def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() def _init(shape, dtype, ctx, ids): return F.copy_to(F.astype(F.randn(shape), dtype), ctx) g.set_n_initializer(_init) g.set_e_initializer(_init) def _message(edges): return { 'm': edges.src['h1'] + edges.dst['h2'] + edges.data['h1'] + edges.data['h2'] } def _reduce(nodes): return {'h': F.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h': nodes.data['h']} g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # add nodes and edges g.add_nodes(N) g.ndata.update({'h1': F.randn((N, D)), 'h2': F.randn((N, D))}) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': F.randn((2, D)), 'h2': F.randn((2, D))}) g.send() expected = F.copy_to(F.ones((g.number_of_edges(), ), dtype=F.int64), F.cpu()) assert F.array_equal(g._get_msg_index().tousertensor(), expected) # add more edges g.add_edges([0, 2], [2, 0], {'h1': F.randn((2, D))}) g.send(([0, 2], [2, 0])) g.recv(0) g.add_edge(1, 2) g.edges[4].data['h1'] = F.randn((1, D)) g.send((1, 2)) g.recv([1, 2]) h = g.ndata.pop('h') # a complete round of send and recv g.send() g.recv() assert F.allclose(h, g.ndata['h'])
def perpare_dgl(graph, local, device): dgl_list = [] for i in range(len(local)): dgl_graph = DGLGraph() dgl_graph.add_nodes(len(local[i])) st, ed = np.nonzero(graph[i]) dgl_graph.add_edges(st, ed) dgl_graph.ndata['tk'] = local[i].to(device=device)
def test_send_recv_after_conversion(): # test send and recv after converting from a graph with edges g = generate_graph() # nx graph nxg = g.to_networkx(node_attrs=['h']) g1 = DGLGraph() # some random node and edges g1.add_nodes(4) g1.add_edges([1, 2], [2, 3]) g1.set_n_initializer(dgl.init.zero_initializer) g1.from_networkx(nxg, node_attrs=['h']) # sparse matrix row, col = g.all_edges() data = range(len(row)) n = g.number_of_nodes() a = sp.coo_matrix( (data, (F.zerocopy_to_numpy(row), F.zerocopy_to_numpy(col))), shape=(n, n)) g2 = DGLGraph() # some random node and edges g2.add_nodes(5) g2.add_edges([1, 2, 4], [2, 3, 0]) g2.set_n_initializer(dgl.init.zero_initializer) g2.from_scipy_sparse_matrix(a) g2.ndata['h'] = g.ndata['h'] # on dgl graph g.send(message_func=message_func) g.recv([0, 1, 3, 5], reduce_func=reduce_func, apply_node_func=apply_node_func) g.recv([0, 2, 4, 8], reduce_func=reduce_func, apply_node_func=apply_node_func) # nx g1.send(message_func=message_func) g1.recv([0, 1, 3, 5], reduce_func=reduce_func, apply_node_func=apply_node_func) g1.recv([0, 2, 4, 8], reduce_func=reduce_func, apply_node_func=apply_node_func) # sparse matrix g2.send(message_func=message_func) g2.recv([0, 1, 3, 5], reduce_func=reduce_func, apply_node_func=apply_node_func) g2.recv([0, 2, 4, 8], reduce_func=reduce_func, apply_node_func=apply_node_func) assert F.allclose(g.ndata['h'], g1.ndata['h']) assert F.allclose(g.ndata['h'], g2.ndata['h'])
def perpare_dgl(graph, local, device): dgl_list = [] for i in range(len(local)): dgl_graph = DGLGraph() dgl_graph.add_nodes(len(local[i])) st, ed = np.nonzero(graph[i]) dgl_graph.add_edges(st, ed) dgl_graph.ndata['tk'] = local[i].to(device=device) dgl_list.append(dgl_graph) batched_graph = dgl.batch(dgl_list) return batched_graph
def _load(self): """ Loads input dataset from dataset/NAME/NAME.txt file """ print('loading data...') with open(self.file, 'r') as f: # line_1 == N, total number of graphs self.N = int(f.readline().strip()) for i in range(self.N): if (i + 1) % 10 == 0 and self.verbosity is True: print('processing graph {}...'.format(i + 1)) grow = f.readline().strip().split() # line_2 == [n_nodes, l] is equal to # [node number of a graph, class label of a graph] n_nodes, glabel = [int(w) for w in grow] # relabel graphs if glabel not in self.glabel_dict: mapped = len(self.glabel_dict) self.glabel_dict[glabel] = mapped self.labels.append(self.glabel_dict[glabel]) g = DGLGraph() g.add_nodes(n_nodes) nlabels = [] # node labels nattrs = [] # node attributes if it has m_edges = 0 for j in range(n_nodes): nrow = f.readline().strip().split() # handle edges and attributes(if has) tmp = int(nrow[1]) + 2 # tmp == 2 + #edges if tmp == len(nrow): # no node attributes nrow = [int(w) for w in nrow] nattr = None elif tmp > len(nrow): nrow = [int(w) for w in nrow[:tmp]] nattr = [float(w) for w in nrow[tmp:]] nattrs.append(nattr) else: raise Exception('edge number is incorrect!') # relabel nodes if it has labels # if it doesn't have node labels, then every nrow[0]==0 if not nrow[0] in self.nlabel_dict: mapped = len(self.nlabel_dict) self.nlabel_dict[nrow[0]] = mapped #nlabels.append(self.nlabel_dict[nrow[0]]) nlabels.append(nrow[0]) m_edges += nrow[1] g.add_edges(j, nrow[2:]) # add self loop if self.self_loop: m_edges += 1 g.add_edge(j, j) if (j + 1) % 10 == 0 and self.verbosity is True: print( 'processing node {} of graph {}...'.format( j + 1, i + 1)) print('this node has {} edgs.'.format( nrow[1])) if nattrs != []: nattrs = np.stack(nattrs) g.ndata['attr'] = nattrs self.nattrs_flag = True else: nattrs = None g.ndata['label'] = np.array(nlabels) if len(self.nlabel_dict) > 1: self.nlabels_flag = True assert len(g) == n_nodes # update statistics of graphs self.n += n_nodes self.m += m_edges self.graphs.append(g) # if no attr if not self.nattrs_flag: print('there are no node features in this dataset!') label2idx = {} # generate node attr by node degree if self.degree_as_nlabel: print('generate node features by node degree...') nlabel_set = set([]) for g in self.graphs: # actually this label shouldn't be updated # in case users want to keep it # but usually no features means no labels, fine. g.ndata['label'] = g.in_degrees() # extracting unique node labels nlabel_set = nlabel_set.union(set(g.ndata['label'].numpy())) nlabel_set = list(nlabel_set) # in case the labels/degrees are not continuous number self.ndegree_dict = { nlabel_set[i]: i for i in range(len(nlabel_set)) } label2idx = self.ndegree_dict # generate node attr by node label else: print('generate node features by node label...') label2idx = self.nlabel_dict for g in self.graphs: g.ndata['attr'] = np.zeros(( g.number_of_nodes(), len(label2idx))) g.ndata['attr'][range(g.number_of_nodes( )), [label2idx[nl.item()] for nl in g.ndata['label']]] = 1 # after load, get the #classes and #dim self.gclasses = len(self.glabel_dict) self.nclasses = len(self.nlabel_dict) self.eclasses = len(self.elabel_dict) self.dim_nfeats = len(self.graphs[0].ndata['attr'][0]) print('Done.') print( """ -------- Data Statistics --------' #Graphs: %d #Graph Classes: %d #Nodes: %d #Node Classes: %d #Node Features Dim: %d #Edges: %d #Edge Classes: %d Avg. of #Nodes: %.2f Avg. of #Edges: %.2f Graph Relabeled: %s Node Relabeled: %s Degree Relabeled(If degree_as_nlabel=True): %s \n """ % ( self.N, self.gclasses, self.n, self.nclasses, self.dim_nfeats, self.m, self.eclasses, self.n / self.N, self.m / self.N, self.glabel_dict, self.nlabel_dict, self.ndegree_dict))
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(attr[k].unsqueeze(0)) for k in node_feat: feat = th.cat(node_feat[k], dim=0) assert U.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = attr[k].unsqueeze(0) for k in edge_feat: feat = th.cat(edge_feat[k], dim=0) assert U.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = th.randn(5, 3) n2 = th.randn(5, 10) n3 = th.randn(5, 4) e1 = th.randn(4, 5) e2 = th.randn(4, 7) g = DGLGraph(multigraph=True) g.add_nodes(5) g.add_edges([0, 1, 3, 4], [2, 4, 0, 3]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert U.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert U.allclose(g.edata['e1'], e1) assert th.equal(g.get_e_repr()['id'], th.arange(4)) # test conversion after modifying DGLGraph g.pop_e_repr( 'id') # pop id so we don't need to provide id when adding edges new_n = th.randn(2, 3) new_e = th.randn(3, 5) g.add_nodes(2, data={'n1': new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) n1 = th.cat((n1, new_n), dim=0) e1 = th.cat((e1, new_e), dim=0) # convert to networkx again nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = DGLGraph(multigraph=True) g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 7 assert g.number_of_edges() == 7 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert U.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(attr['e1'].unsqueeze(0)) edge_feat = th.cat(edge_feat, dim=0) assert U.allclose(g.edata['e1'], edge_feat)
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(attr[k].unsqueeze(0)) for k in node_feat: feat = th.cat(node_feat[k], dim=0) assert U.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = attr[k].unsqueeze(0) for k in edge_feat: feat = th.cat(edge_feat[k], dim=0) assert U.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = th.randn(5, 3) n2 = th.randn(5, 10) n3 = th.randn(5, 4) e1 = th.randn(4, 5) e2 = th.randn(4, 7) g = DGLGraph(multigraph=True) g.add_nodes(5) g.add_edges([0, 1, 3, 4], [2, 4, 0, 3]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph # use id feature to test non-tensor copy g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 assert U.allclose(g.get_n_repr()['n1'], n1) assert U.allclose(g.get_e_repr()['e1'], e1) assert th.equal(g.get_e_repr()['id'], th.arange(4)) g.pop_e_repr('id') # test modifying DGLGraph new_n = th.randn(2, 3) new_e = th.randn(3, 5) g.add_nodes(2, data={'n1': new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) n1 = th.cat((n1, new_n), dim=0) e1 = th.cat((e1, new_e), dim=0) # convert to networkx again nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})
def reverse(g, share_ndata=False, share_edata=False): """Return the reverse of a graph The reverse (also called converse, transpose) of a directed graph is another directed graph on the same nodes with edges reversed in terms of direction. Given a :class:`DGLGraph` object, we return another :class:`DGLGraph` object representing its reverse. Notes ----- * This function does not support :class:`~dgl.BatchedDGLGraph` objects. * We do not dynamically update the topology of a graph once that of its reverse changes. This can be particularly problematic when the node/edge attrs are shared. For example, if the topology of both the original graph and its reverse get changed independently, you can get a mismatched node/edge feature. Parameters ---------- g : dgl.DGLGraph share_ndata: bool, optional If True, the original graph and the reversed graph share memory for node attributes. Otherwise the reversed graph will not be initialized with node attributes. share_edata: bool, optional If True, the original graph and the reversed graph share memory for edge attributes. Otherwise the reversed graph will not have edge attributes. Examples -------- Create a graph to reverse. >>> import dgl >>> import torch as th >>> g = dgl.DGLGraph() >>> g.add_nodes(3) >>> g.add_edges([0, 1, 2], [1, 2, 0]) >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]]) >>> g.edata['h'] = th.tensor([[3.], [4.], [5.]]) Reverse the graph and examine its structure. >>> rg = g.reverse(share_ndata=True, share_edata=True) >>> print(rg) DGLGraph with 3 nodes and 3 edges. Node data: {'h': Scheme(shape=(1,), dtype=torch.float32)} Edge data: {'h': Scheme(shape=(1,), dtype=torch.float32)} The edges are reversed now. >>> rg.has_edges_between([1, 2, 0], [0, 1, 2]) tensor([1, 1, 1]) Reversed edges have the same feature as the original ones. >>> g.edges[[0, 2], [1, 0]].data['h'] == rg.edges[[1, 0], [0, 2]].data['h'] tensor([[1], [1]], dtype=torch.uint8) The node/edge features of the reversed graph share memory with the original graph, which is helpful for both forward computation and back propagation. >>> g.ndata['h'] = g.ndata['h'] + 1 >>> rg.ndata['h'] tensor([[1.], [2.], [3.]]) """ assert not isinstance(g, BatchedDGLGraph), \ 'reverse is not supported for a BatchedDGLGraph object' g_reversed = DGLGraph(multigraph=g.is_multigraph) g_reversed.add_nodes(g.number_of_nodes()) g_edges = g.edges() g_reversed.add_edges(g_edges[1], g_edges[0]) if share_ndata: g_reversed._node_frame = g._node_frame if share_edata: g_reversed._edge_frame = g._edge_frame return g_reversed