def test_multi_recv_0deg(): # test recv with 0deg nodes; g = DGLGraph() def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + F.zeros(shape, dtype=dtype, ctx=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(_init2) g.add_nodes(2) g.add_edge(0, 1) # recv both 0deg and non-0deg nodes old = F.randn((2, 5)) g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata['h'] # 0deg check: initialized with the func and got applied assert F.allclose(new[0], F.full((5, ), 4, F.float32)) # non-0deg check assert F.allclose(new[1], F.sum(old, 0) * 2) # recv again on zero degree node g.recv([0]) assert F.allclose(g.nodes[0].data['h'], F.full((5, ), 8, F.float32)) # recv again on node with no incoming message g.recv([1]) assert F.allclose(g.nodes[1].data['h'], F.sum(old, 0) * 4)
def test_pull_0deg(): g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + th.zeros(shape, dtype=dtype, device=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(_init2, 'h') # test#1: pull both 0deg and non-0deg nodes old = th.randn((2, 5)) g.ndata['h'] = old g.pull([0, 1]) new = g.ndata.pop('h') # 0deg check: initialized with the func and got applied assert U.allclose(new[0], th.full((5, ), 4)) # non-0deg check assert U.allclose(new[1], th.sum(old, 0) * 2) # test#2: pull only 0deg node old = th.randn((2, 5)) g.ndata['h'] = old g.pull(0) new = g.ndata.pop('h') # 0deg check: fallback to apply assert U.allclose(new[0], 2 * old[0]) # non-0deg check: not touched assert U.allclose(new[1], old[1])
def test_recv_0deg_newfld(): # test recv with 0deg nodes; the reducer also creates a new field g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m' : edges.src['h']} def _reduce(nodes): return {'h1' : nodes.data['h'] + mx.nd.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h1' : nodes.data['h1'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape=shape, dtype=dtype, ctx=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) # test#1: recv both 0deg and non-0deg nodes old = mx.nd.random.normal(shape=(2, 5)) g.set_n_initializer(_init2, 'h1') g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata.pop('h1') # 0deg check: initialized with the func and got applied assert np.allclose(new[0].asnumpy(), np.full((5,), 4)) # non-0deg check assert np.allclose(new[1].asnumpy(), mx.nd.sum(old, 0).asnumpy() * 2) # test#2: recv only 0deg node old = mx.nd.random.normal(shape=(2, 5)) g.ndata['h'] = old g.ndata['h1'] = mx.nd.full((2, 5), -1) # this is necessary g.send((0, 1)) g.recv(0) new = g.ndata.pop('h1') # 0deg check: fallback to apply assert np.allclose(new[0].asnumpy(), np.full((5,), -2)) # non-0deg check: not changed assert np.allclose(new[1].asnumpy(), np.full((5,), -1))
def test_update_all_0deg(): # test#1 g = DGLGraph() g.add_nodes(5) g.add_edge(1, 0) g.add_edge(2, 0) g.add_edge(3, 0) g.add_edge(4, 0) def _message(edges): return {'m' : edges.src['h']} def _reduce(nodes): return {'h' : nodes.data['h'] + mx.nd.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h' : nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(5, 5)) g.ndata['h'] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata['h'] # the first row of the new_repr should be the sum of all the node # features; while the 0-deg nodes should be initialized by the # initializer and applied with UDF. assert np.allclose(new_repr[1:].asnumpy(), 2*(2+np.zeros((4,5)))) assert np.allclose(new_repr[0].asnumpy(), 2 * mx.nd.sum(old_repr, 0).asnumpy()) # test#2: graph with no edge g = DGLGraph() g.add_nodes(5) g.set_n_initializer(_init2, 'h') g.ndata['h'] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata['h'] # should fallback to apply assert np.allclose(new_repr.asnumpy(), 2*old_repr.asnumpy())
def _load(self): """ Loads input dataset from dataset/NAME/NAME.txt file """ print('loading data...') with open(self.file, 'r') as f: # line_1 == N, total number of graphs self.N = int(f.readline().strip()) for i in range(self.N): if (i + 1) % 10 == 0 and self.verbosity is True: print('processing graph {}...'.format(i + 1)) grow = f.readline().strip().split() # line_2 == [n_nodes, l] is equal to # [node number of a graph, class label of a graph] n_nodes, glabel = [int(w) for w in grow] # relabel graphs if glabel not in self.glabel_dict: mapped = len(self.glabel_dict) self.glabel_dict[glabel] = mapped self.labels.append(self.glabel_dict[glabel]) g = DGLGraph() g.add_nodes(n_nodes) nlabels = [] # node labels nattrs = [] # node attributes if it has m_edges = 0 for j in range(n_nodes): nrow = f.readline().strip().split() # handle edges and attributes(if has) tmp = int(nrow[1]) + 2 # tmp == 2 + #edges if tmp == len(nrow): # no node attributes nrow = [int(w) for w in nrow] nattr = None elif tmp > len(nrow): nrow = [int(w) for w in nrow[:tmp]] nattr = [float(w) for w in nrow[tmp:]] nattrs.append(nattr) else: raise Exception('edge number is incorrect!') # relabel nodes if it has labels # if it doesn't have node labels, then every nrow[0]==0 if not nrow[0] in self.nlabel_dict: mapped = len(self.nlabel_dict) self.nlabel_dict[nrow[0]] = mapped #nlabels.append(self.nlabel_dict[nrow[0]]) nlabels.append(nrow[0]) m_edges += nrow[1] g.add_edges(j, nrow[2:]) # add self loop if self.self_loop: m_edges += 1 g.add_edge(j, j) if (j + 1) % 10 == 0 and self.verbosity is True: print( 'processing node {} of graph {}...'.format( j + 1, i + 1)) print('this node has {} edgs.'.format( nrow[1])) if nattrs != []: nattrs = np.stack(nattrs) g.ndata['attr'] = nattrs self.nattrs_flag = True else: nattrs = None g.ndata['label'] = np.array(nlabels) if len(self.nlabel_dict) > 1: self.nlabels_flag = True assert len(g) == n_nodes # update statistics of graphs self.n += n_nodes self.m += m_edges self.graphs.append(g) # if no attr if not self.nattrs_flag: print('there are no node features in this dataset!') label2idx = {} # generate node attr by node degree if self.degree_as_nlabel: print('generate node features by node degree...') nlabel_set = set([]) for g in self.graphs: # actually this label shouldn't be updated # in case users want to keep it # but usually no features means no labels, fine. g.ndata['label'] = g.in_degrees() # extracting unique node labels nlabel_set = nlabel_set.union(set(g.ndata['label'].numpy())) nlabel_set = list(nlabel_set) # in case the labels/degrees are not continuous number self.ndegree_dict = { nlabel_set[i]: i for i in range(len(nlabel_set)) } label2idx = self.ndegree_dict # generate node attr by node label else: print('generate node features by node label...') label2idx = self.nlabel_dict for g in self.graphs: g.ndata['attr'] = np.zeros(( g.number_of_nodes(), len(label2idx))) g.ndata['attr'][range(g.number_of_nodes( )), [label2idx[nl.item()] for nl in g.ndata['label']]] = 1 # after load, get the #classes and #dim self.gclasses = len(self.glabel_dict) self.nclasses = len(self.nlabel_dict) self.eclasses = len(self.elabel_dict) self.dim_nfeats = len(self.graphs[0].ndata['attr'][0]) print('Done.') print( """ -------- Data Statistics --------' #Graphs: %d #Graph Classes: %d #Nodes: %d #Node Classes: %d #Node Features Dim: %d #Edges: %d #Edge Classes: %d Avg. of #Nodes: %.2f Avg. of #Edges: %.2f Graph Relabeled: %s Node Relabeled: %s Degree Relabeled(If degree_as_nlabel=True): %s \n """ % ( self.N, self.gclasses, self.n, self.nclasses, self.dim_nfeats, self.m, self.eclasses, self.n / self.N, self.m / self.N, self.glabel_dict, self.nlabel_dict, self.ndegree_dict))
def test_send_multigraph(): g = DGLGraph(multigraph=True) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(0, 1) g.add_edge(0, 1) g.add_edge(2, 1) def _message_a(edges): return {'a': edges.data['a']} def _message_b(edges): return {'a': edges.data['a'] * 3} def _reduce(nodes): return {'a': nodes.mailbox['a'].max(1)[0]} def answer(*args): return th.stack(args, 0).max(0)[0] # send by eid old_repr = th.randn(4, 5) g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send([0, 2], message_func=_message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[2])) g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send([0, 2, 3], message_func=_message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])) # send on multigraph g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send(([0, 2], [1, 1]), _message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], old_repr.max(0)[0]) # consecutive send and send_on g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send((2, 1), _message_a) g.send([0, 1], message_func=_message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0] * 3, old_repr[1] * 3, old_repr[3])) # consecutive send_on g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send(0, message_func=_message_a) g.send(1, message_func=_message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[1] * 3)) # send_and_recv_on g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])) assert U.allclose(new_repr[[0, 2]], th.zeros(2, 5))
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(attr[k].unsqueeze(0)) for k in node_feat: feat = th.cat(node_feat[k], dim=0) assert U.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = attr[k].unsqueeze(0) for k in edge_feat: feat = th.cat(edge_feat[k], dim=0) assert U.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = th.randn(5, 3) n2 = th.randn(5, 10) n3 = th.randn(5, 4) e1 = th.randn(4, 5) e2 = th.randn(4, 7) g = DGLGraph(multigraph=True) g.add_nodes(5) g.add_edges([0, 1, 3, 4], [2, 4, 0, 3]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert U.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert U.allclose(g.edata['e1'], e1) assert th.equal(g.get_e_repr()['id'], th.arange(4)) # test conversion after modifying DGLGraph g.pop_e_repr( 'id') # pop id so we don't need to provide id when adding edges new_n = th.randn(2, 3) new_e = th.randn(3, 5) g.add_nodes(2, data={'n1': new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) n1 = th.cat((n1, new_n), dim=0) e1 = th.cat((e1, new_e), dim=0) # convert to networkx again nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = DGLGraph(multigraph=True) g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 7 assert g.number_of_edges() == 7 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert U.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(attr['e1'].unsqueeze(0)) edge_feat = th.cat(edge_feat, dim=0) assert U.allclose(g.edata['e1'], edge_feat)
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(attr[k].unsqueeze(0)) for k in node_feat: feat = th.cat(node_feat[k], dim=0) assert U.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = attr[k].unsqueeze(0) for k in edge_feat: feat = th.cat(edge_feat[k], dim=0) assert U.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = th.randn(5, 3) n2 = th.randn(5, 10) n3 = th.randn(5, 4) e1 = th.randn(4, 5) e2 = th.randn(4, 7) g = DGLGraph(multigraph=True) g.add_nodes(5) g.add_edges([0, 1, 3, 4], [2, 4, 0, 3]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph # use id feature to test non-tensor copy g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 assert U.allclose(g.get_n_repr()['n1'], n1) assert U.allclose(g.get_e_repr()['e1'], e1) assert th.equal(g.get_e_repr()['id'], th.arange(4)) g.pop_e_repr('id') # test modifying DGLGraph new_n = th.randn(2, 3) new_e = th.randn(3, 5) g.add_nodes(2, data={'n1': new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) n1 = th.cat((n1, new_n), dim=0) e1 = th.cat((e1, new_e), dim=0) # convert to networkx again nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})
def reverse(g, share_ndata=False, share_edata=False): """Return the reverse of a graph The reverse (also called converse, transpose) of a directed graph is another directed graph on the same nodes with edges reversed in terms of direction. Given a :class:`DGLGraph` object, we return another :class:`DGLGraph` object representing its reverse. Notes ----- * This function does not support :class:`~dgl.BatchedDGLGraph` objects. * We do not dynamically update the topology of a graph once that of its reverse changes. This can be particularly problematic when the node/edge attrs are shared. For example, if the topology of both the original graph and its reverse get changed independently, you can get a mismatched node/edge feature. Parameters ---------- g : dgl.DGLGraph share_ndata: bool, optional If True, the original graph and the reversed graph share memory for node attributes. Otherwise the reversed graph will not be initialized with node attributes. share_edata: bool, optional If True, the original graph and the reversed graph share memory for edge attributes. Otherwise the reversed graph will not have edge attributes. Examples -------- Create a graph to reverse. >>> import dgl >>> import torch as th >>> g = dgl.DGLGraph() >>> g.add_nodes(3) >>> g.add_edges([0, 1, 2], [1, 2, 0]) >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]]) >>> g.edata['h'] = th.tensor([[3.], [4.], [5.]]) Reverse the graph and examine its structure. >>> rg = g.reverse(share_ndata=True, share_edata=True) >>> print(rg) DGLGraph with 3 nodes and 3 edges. Node data: {'h': Scheme(shape=(1,), dtype=torch.float32)} Edge data: {'h': Scheme(shape=(1,), dtype=torch.float32)} The edges are reversed now. >>> rg.has_edges_between([1, 2, 0], [0, 1, 2]) tensor([1, 1, 1]) Reversed edges have the same feature as the original ones. >>> g.edges[[0, 2], [1, 0]].data['h'] == rg.edges[[1, 0], [0, 2]].data['h'] tensor([[1], [1]], dtype=torch.uint8) The node/edge features of the reversed graph share memory with the original graph, which is helpful for both forward computation and back propagation. >>> g.ndata['h'] = g.ndata['h'] + 1 >>> rg.ndata['h'] tensor([[1.], [2.], [3.]]) """ assert not isinstance(g, BatchedDGLGraph), \ 'reverse is not supported for a BatchedDGLGraph object' g_reversed = DGLGraph(multigraph=g.is_multigraph) g_reversed.add_nodes(g.number_of_nodes()) g_edges = g.edges() g_reversed.add_edges(g_edges[1], g_edges[0]) if share_ndata: g_reversed._node_frame = g._node_frame if share_edata: g_reversed._edge_frame = g._edge_frame return g_reversed
def __init__(self, split ): super(DGLDataset, self).__init__() self.device = torch.device("cuda" ) self.split = split self.data_list = [] self.gt_list = [] n7 = int (len(random_index_list) * 0.7) # print('enter DGLDataset ', random_index_list) if split == 'train': for i in random_index_list[ : n7]: d_data = train_data_list[random_index_list[i]] # if i == 0: # print('d_data ', d_data) nodes = d_data['nodes'] edges = d_data['edges'] g = DGLGraph() g.add_nodes(len(nodes)) gt = [] # {'idx': atom_index, 't': atom_index_dic[atom], 'x': x, 'y' : y, 'z' : z} d = [] for node_info in nodes: idx = int(node_info['idx']) tp = int(node_info['t']) x = float(node_info['x']) y = float(node_info['y']) z = float(node_info['z']) dn = [[tp, x, y, z]] n = torch.tensor( dn).cuda() g.nodes[idx].data['h'] = n d.append(dn) # gt = [] e = [] d_e = [] for edge_info in edges: idx0 = int(edge_info['index0']) idx1 = int(edge_info['index1']) et = int(edge_info['et']) sc = float(edge_info['sc']) g.add_edge(idx0, idx1) e.append([et]) d_e.append(d[idx1]) # if 'w' not in g.edata.keys(): # g.edata['w'] = torch.tensor( [[et]]).cuda() # else : # g.edata['w'].expand( torch.tensor( [et]).cuda()) gt.append(sc) # print('e ', e) g.edata['we'] = torch.tensor(e).cuda() # g.edata['wd'] = torch.tensor(d_e).cuda() # print('g ', g) self.data_list.append(g) self.gt_list.append(gt) # self.gt_list.append([gt]) # self.gt_list.append(1) print('len data ', len(self.data_list)) print('len gt ', len(self.gt_list)) # self.gt_list = np.array(self.gt_list) # self.data_list = np.array(self.data_list) # tshape = self.data_list.shape # print('self.data_list ', tshape) if split == 'val': self.val_data_list = [] for i in random_index_list[n7 : ]: d_data = train_data_list[random_index_list[i]] # if i == 0: # print('d_data ', d_data) nodes = d_data['nodes'] edges = d_data['edges'] g = DGLGraph() g.add_nodes(len(nodes)) gt = [] # {'idx': atom_index, 't': atom_index_dic[atom], 'x': x, 'y' : y, 'z' : z} d = [] for node_info in nodes: idx = int(node_info['idx']) tp = int(node_info['t']) x = float(node_info['x']) y = float(node_info['y']) z = float(node_info['z']) dn = [[tp, x, y, z]] n = torch.tensor( dn).cuda() g.nodes[idx].data['h'] = n d.append(dn) # gt = [] e = [] d_e = [] for edge_info in edges: idx0 = int(edge_info['index0']) idx1 = int(edge_info['index1']) et = int(edge_info['et']) sc = float(edge_info['sc']) g.add_edge(idx0, idx1) e.append([et]) d_e.append(d[idx1]) # if 'w' not in g.edata.keys(): # g.edata['w'] = torch.tensor( [[et]]).cuda() # else : # g.edata['w'].expand( torch.tensor( [et]).cuda()) gt.append(sc) # print('e ', e) g.edata['we'] = torch.tensor(e).cuda() # g.edata['wd'] = torch.tensor(d_e).cuda() # print('g ', g) self.data_list.append(g) self.gt_list.append(gt) print('len v data ', len(self.data_list)) print('len v gt ', len(self.gt_list))