def test_ogbg_gin(virtual_node): # Test for ogbg-mol datasets data_info = { 'name': 'ogbg-molhiv', 'out_size': 1 } model = OGBGGIN(data_info, embed_size=10, num_layers=2, virtual_node=virtual_node) num_nodes = 5 num_edges = 15 g1 = dgl.rand_graph(num_nodes, num_edges) g2 = dgl.rand_graph(num_nodes, num_edges) g = dgl.batch([g1, g2]) num_nodes = g.num_nodes() num_edges = g.num_edges() nfeat = torch.zeros(num_nodes, 9).long() efeat = torch.zeros(num_edges, 3).long() model(g, nfeat, efeat) # Test for non-ogbg-mol datasets data_info = { 'name': 'a_dataset', 'out_size': 1, 'node_feat_size': 15, 'edge_feat_size': 5 } model = OGBGGIN(data_info, embed_size=10, num_layers=2, virtual_node=virtual_node) nfeat = torch.randn(num_nodes, data_info['node_feat_size']) efeat = torch.randn(num_edges, data_info['edge_feat_size']) model(g, nfeat, efeat)
def test_rand_graph(): g = dgl.rand_graph(10000, 100000) assert g.number_of_nodes() == 10000 assert g.number_of_edges() == 100000 # test random seed dgl.random.seed(42) g1 = dgl.rand_graph(100, 30) dgl.random.seed(42) g2 = dgl.rand_graph(100, 30) u1, v1 = g1.edges() u2, v2 = g2.edges() assert F.array_equal(u1, u2) assert F.array_equal(v1, v2)
def test_formats(): g = dgl.rand_graph(10, 20) # in_degrees works if coo or csc available # out_degrees works if coo or csr available try: g.in_degrees() g.out_degrees() g.formats('coo').in_degrees() g.formats('coo').out_degrees() g.formats('csc').in_degrees() g.formats('csr').out_degrees() fail = False except DGLError: fail = True finally: assert not fail # in_degrees NOT works if csc available only try: g.formats('csc').out_degrees() fail = True except DGLError: fail = False finally: assert not fail # out_degrees NOT works if csr available only try: g.formats('csr').in_degrees() fail = True except DGLError: fail = False finally: assert not fail
def test_basics(): g = rand_graph(10, 20, device=F.cpu()) x = torch.ones(g.num_nodes(), 10) # launch on default stream fetched via torch.cuda s = torch.cuda.default_stream(device=F.ctx()) with torch.cuda.stream(s): xx = x.to(device=F.ctx(), non_blocking=True) with FS.stream(s): gg = g.to(device=F.ctx()) s.synchronize() OPS.copy_u_sum(gg, xx) # launch on new stream created via torch.cuda s = torch.cuda.Stream(device=F.ctx()) with torch.cuda.stream(s): xx = x.to(device=F.ctx(), non_blocking=True) with FS.stream(s): gg = g.to(device=F.ctx()) s.synchronize() OPS.copy_u_sum(gg, xx) # launch on default stream used in DGL xx = x.to(device=F.ctx()) gg = g.to(device=F.ctx()) OPS.copy_u_sum(gg, xx)
def test_pickling_subgraph(): f1 = io.BytesIO() f2 = io.BytesIO() g = dgl.rand_graph(10000, 100000) g.ndata['x'] = F.randn((10000, 4)) g.edata['x'] = F.randn((100000, 5)) pickle.dump(g, f1) sg = g.subgraph([0, 1]) sgx = sg.ndata['x'] # materialize pickle.dump(sg, f2) # TODO(BarclayII): How should I test that the size of the subgraph pickle file should not # be as large as the size of the original pickle file? assert f1.tell() > f2.tell() * 50 f2.seek(0) f2.truncate() sgx = sg.edata['x'] # materialize pickle.dump(sg, f2) assert f1.tell() > f2.tell() * 50 f2.seek(0) f2.truncate() sg = g.edge_subgraph([0]) sgx = sg.edata['x'] # materialize pickle.dump(sg, f2) assert f1.tell() > f2.tell() * 50 f2.seek(0) f2.truncate() sgx = sg.ndata['x'] # materialize pickle.dump(sg, f2) assert f1.tell() > f2.tell() * 50 f1.close() f2.close()
def test_pna(): # Test for ogbg-mol datasets data_info = { 'name': 'ogbg-molhiv', 'delta': 1, 'out_size': 1 } model = PNA(data_info, embed_size=10, num_layers=2) num_nodes = 5 num_edges = 15 g = dgl.rand_graph(num_nodes, num_edges) nfeat = torch.zeros(num_nodes, 9).long() model(g, nfeat) # Test for non-ogbg-mol datasets data_info = { 'name': 'a_dataset', 'node_feat_size': 15, 'delta': 1, 'out_size': 1 } model = PNA(data_info, embed_size=10, num_layers=2) nfeat = torch.randn(num_nodes, data_info['node_feat_size']) model(g, nfeat)
def create_graph(num_part, dist_graph_path, hetero): if not hetero: g = dgl.rand_graph(10000, 42000) g.ndata['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_part, dist_graph_path) else: from scipy import sparse as spsp num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020} etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', random_state=100) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes['n1'].data['feat'] = F.unsqueeze( F.arange(0, g.number_of_nodes('n1')), 1) g.edges['r1'].data['feat'] = F.unsqueeze( F.arange(0, g.number_of_edges('r1')), 1) partition_graph(g, graph_name, num_part, dist_graph_path)
def get_single_event(self, event_idx): # ------- building the cluster graph ---------- # cluster_cell_ID = self.ev_tree['cluster_cell_ID'].array( entry_start=event_idx, entry_stop=event_idx + 1, library='np')[0] cluster_cell_E = self.ev_tree['cluster_cell_E'].array( entry_start=event_idx, entry_stop=event_idx + 1, library='np')[0] n_clusters = len(cluster_cell_ID) if (n_clusters == 0): #print('Empty cluster event') return { 'gr': [dgl.rand_graph(2, 1)], 'truth_E': torch.tensor([-1.]) } graph_list = [] # ---- loop over clusters ---- # for ic in range(n_clusters): cell_E = np.array(cluster_cell_E[ic]) cell_idx = np.array(cluster_cell_ID[ic]) cluster_cell_pos = torch.tensor( [self.id_to_position[x] for x in cell_idx]) cluster_cell_pos = torch.reshape( cluster_cell_pos, (1, cluster_cell_pos.shape[0], cluster_cell_pos.shape[1])) n_part = len(cluster_cell_pos[0]) if (n_part < 2): continue if (n_part < self.n_neighbor): graph_frn = FixedRadiusNNGraph(radius=self.R, n_neighbor=n_part) else: graph_frn = FixedRadiusNNGraph(radius=self.R, n_neighbor=self.n_neighbor) fps = FarthestPointSampler(n_part) centroids = fps(cluster_cell_pos) gr_frn = graph_frn(cluster_cell_pos, centroids) gr_frn.ndata['x'] = cluster_cell_pos[0] gr_frn.ndata['en'] = torch.tensor(cell_E) graph_list.append(gr_frn) # -------- # cluster_energy_truth = self.ev_tree['cluster_ENG_CALIB_TOT'].array( entry_start=event_idx, entry_stop=event_idx + 1, library='np')[0] # ---------------------------------------------------------------- # return { 'gr': graph_list, 'truth_E': torch.tensor(cluster_energy_truth) }
def test_set_batch_info(idtype): ctx = F.ctx() g1 = dgl.rand_graph(30, 100).astype(idtype).to(F.ctx()) g2 = dgl.rand_graph(40, 200).astype(idtype).to(F.ctx()) bg = dgl.batch([g1, g2]) batch_num_nodes = F.astype(bg.batch_num_nodes(), idtype) batch_num_edges = F.astype(bg.batch_num_edges(), idtype) # test homogeneous node subgraph sg_n = dgl.node_subgraph(bg, list(range(10, 20)) + list(range(50, 60))) induced_nodes = sg_n.ndata['_ID'] induced_edges = sg_n.edata['_ID'] new_batch_num_nodes = _get_subgraph_batch_info(bg.ntypes, [induced_nodes], batch_num_nodes) new_batch_num_edges = _get_subgraph_batch_info(bg.canonical_etypes, [induced_edges], batch_num_edges) sg_n.set_batch_num_nodes(new_batch_num_nodes) sg_n.set_batch_num_edges(new_batch_num_edges) subg_n1, subg_n2 = dgl.unbatch(sg_n) subg1 = dgl.node_subgraph(g1, list(range(10, 20))) subg2 = dgl.node_subgraph(g2, list(range(20, 30))) assert subg_n1.num_edges() == subg1.num_edges() assert subg_n2.num_edges() == subg2.num_edges() # test homogeneous edge subgraph sg_e = dgl.edge_subgraph(bg, list(range(40, 70)) + list(range(150, 200)), preserve_nodes=True) induced_nodes = sg_e.ndata['_ID'] induced_edges = sg_e.edata['_ID'] new_batch_num_nodes = _get_subgraph_batch_info(bg.ntypes, [induced_nodes], batch_num_nodes) new_batch_num_edges = _get_subgraph_batch_info(bg.canonical_etypes, [induced_edges], batch_num_edges) sg_e.set_batch_num_nodes(new_batch_num_nodes) sg_e.set_batch_num_edges(new_batch_num_edges) subg_e1, subg_e2 = dgl.unbatch(sg_e) subg1 = dgl.edge_subgraph(g1, list(range(40, 70)), preserve_nodes=True) subg2 = dgl.edge_subgraph(g2, list(range(50, 100)), preserve_nodes=True) assert subg_e1.num_nodes() == subg1.num_nodes() assert subg_e2.num_nodes() == subg2.num_nodes()
def test_gat_conv(): ctx = F.ctx() g = dgl.rand_graph(100, 1000) gat = nn.GATConv(5, 2, 4) feat = F.randn((100, 5)) gat = gat.to(ctx) h = gat(g, feat) assert h.shape == (100, 4, 2) g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) gat = nn.GATConv((5, 10), 2, 4) feat = (F.randn((100, 5)), F.randn((200, 10))) gat = gat.to(ctx) h = gat(g, feat) assert h.shape == (200, 4, 2)
def get_single_event(self, event_idx): # ------- building the cluster graph ---------- # cluster_cell_ID = self.ev_tree['cluster_cell_ID'].array( entry_start=event_idx, entry_stop=event_idx + 1, library='np')[0] cluster_cell_E = self.ev_tree['cluster_cell_E'].array( entry_start=event_idx, entry_stop=event_idx + 1, library='np')[0] n_clusters = len(cluster_cell_ID) if (n_clusters == 0): #print('Empty cluster event') return { 'gr': [dgl.rand_graph(2, 1)], 'truth_E': torch.tensor([-1.]) } graph_list = [] # ---- loop over clusters ---- # for ic in range(n_clusters): cell_E = np.array(cluster_cell_E[ic]) cell_idx = np.array(cluster_cell_ID[ic]) cluster_cell_pos = np.array( [self.id_to_position[x] for x in cell_idx]) n_part = len(cluster_cell_pos) if (n_part < self.k): knn_g = dgl.knn_graph(torch.tensor(cluster_cell_pos), n_part) else: knn_g = dgl.knn_graph(torch.tensor(cluster_cell_pos), self.k) knn_g.ndata['x'] = torch.tensor(cluster_cell_pos) knn_g.ndata['en'] = torch.tensor(cell_E) graph_list.append(knn_g) # -------- # cluster_energy_truth = self.ev_tree['cluster_ENG_CALIB_TOT'].array( entry_start=event_idx, entry_stop=event_idx + 1, library='np')[0] # ---------------------------------------------------------------- # return { 'gr': graph_list, 'truth_E': torch.tensor(cluster_energy_truth) }
def random_graph(n, hidden_dim, f, sigma_noise=None): # Generate a random graph g g0 = dgl.rand_graph(n, int(n * np.log(n))) random_features = torch.rand(n, hidden_dim) g0.ndata["h"] = random_features # Make sure that it's symmetric adj = g0.adjacency_matrix(False).to_dense() adj = adj + torch.eye(n) adj = torch.maximum(adj, adj.transpose(0, 1)) d = g0.out_degrees() h = f(random_features) y = adj @ h / (d[:, None] + 1) if sigma_noise is not None: y += np.random.normal(0, sigma_noise, y.shape) g = dgl.DGLGraph() g.add_nodes(n, {"h": g0.ndata["h"]}) for i in range(adj.shape[0]): for j in range(adj.shape[1]): if adj[i, j] == 1: g.add_edge(i, j) if y.ndim == 2 and y.shape[1] > 1: num_classes = y.shape[1] y = np.argmax(y, axis=1) g.ndata["y"] = y else: g.ndata["y"] = y.flatten() # n_train = int(n * 0.2) mask = np.zeros((n, )) mask[:n_train] = 1 np.random.shuffle(mask) train_mask = mask == 1 test_mask = mask == 0 return g, train_mask, test_mask
def graphs_and_features(): import numpy as np import torch permutation_idx = np.random.permutation(5) permutation_matrix = np.zeros((5, 5), dtype=np.float32) permutation_matrix[np.arange(5), permutation_idx, ] = 1 permutation_matrix = torch.tensor(permutation_matrix, dtype=torch.float32) import dgl g0 = dgl.rand_graph(5, 20) g1 = dgl.reorder_graph(g0, "custom", permute_config={"nodes_perm": permutation_idx}) import hpno g0 = hpno.heterograph(g0) g1 = hpno.heterograph(g1) h0 = torch.randn(5, 3) h1 = permutation_matrix @ h0 return g0, g1, h0, h1, permutation_matrix
def test_multi_layer_gin(self): """ Test MultiLayerGNN with GIN layers. """ # 1. load dummy config config_fname = os.path.join(self.current_path, 'config', 'multi_layer_gin.yml') with open(config_fname, 'r') as file: config = yaml.safe_load(file)['model'] # 2. dummy data graph = dgl.rand_graph(100, 10) features = torch.rand(100, 512) # 2. multi layer GNN model = MultiLayerGNN(input_dim=512, **config) out = model(graph, features, with_readout=False) # 3. tests self.assertIsInstance(out, torch.Tensor) self.assertEqual(out.shape[0], 100) self.assertEqual(out.shape[1], 96) # 3 layers x 32 hidden dimension
def test_partial_edge_softmax(): g = dgl.rand_graph(30, 900) score = F.randn((300, 1)) score.requires_grad_() grad = F.randn((300, 1)) import numpy as np eids = np.random.choice(900, 300, replace=False).astype('int64') eids = F.zerocopy_from_numpy(eids) # compute partial edge softmax y_1 = nn.edge_softmax(g, score, eids) y_1.backward(grad) grad_1 = score.grad score.grad.zero_() # compute edge softmax on edge subgraph subg = g.edge_subgraph(eids) y_2 = nn.edge_softmax(subg, score) y_2.backward(grad) grad_2 = score.grad score.grad.zero_() assert F.allclose(y_1, y_2) assert F.allclose(grad_1, grad_2)
def test_pickling_is_pinned(idtype): from copy import deepcopy g = dgl.rand_graph(10, 20, idtype=idtype, device=F.cpu()) hg = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ('user', 'wishes', 'game'): ([0, 2], [1, 0]), ('developer', 'develops', 'game'): ([0, 1], [0, 1]) }, idtype=idtype, device=F.cpu()) for graph in [g, hg]: assert not graph.is_pinned() graph.pin_memory_() assert graph.is_pinned() pg = _reconstruct_pickle(graph) assert pg.is_pinned() pg.unpin_memory_() dg = deepcopy(graph) assert dg.is_pinned() dg.unpin_memory_() graph.unpin_memory_()
def test_edge_softmax(idtype): # Basic g = dgl.graph(nx.path_graph(3)) g = g.astype(idtype).to(F.ctx()) edata = F.ones((g.number_of_edges(), 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test higher dimension case edata = F.ones((g.number_of_edges(), 3, 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test both forward and backward with PyTorch built-in softmax. g = dgl.rand_graph(30, 900) g = g.astype(idtype).to(F.ctx()) score = F.randn((900, 1)) score.requires_grad_() grad = F.randn((900, 1)) y = F.softmax(score.view(30, 30), dim=0).view(-1, 1) y.backward(grad) grad_score = score.grad score.grad.zero_() y_dgl = nn.edge_softmax(g, score) assert len(g.ndata) == 0 assert len(g.edata) == 0 # check forward assert F.allclose(y_dgl, y) y_dgl.backward(grad) # checkout gradient assert F.allclose(score.grad, grad_score) print(score.grad[:10], grad_score[:10])
def test_gat_conv(): ctx = F.ctx() g = dgl.rand_graph(100, 1000) gat = nn.GATConv(5, 2, 4) feat = F.randn((100, 5)) gat = gat.to(ctx) h = gat(g, feat) assert h.shape == (100, 4, 2) g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) gat = nn.GATConv((5, 10), 2, 4) feat = (F.randn((100, 5)), F.randn((200, 10))) gat = gat.to(ctx) h = gat(g, feat) assert h.shape == (200, 4, 2) g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = th.unique(g.edges()[1]) block = dgl.to_block(g, seed_nodes) gat = nn.GATConv(5, 2, 4) feat = F.randn((block.number_of_src_nodes(), 5)) gat = gat.to(ctx) h = gat(block, feat) assert h.shape == (block.number_of_dst_nodes(), 4, 2)
def test_partial_edge_softmax(idtype): g = dgl.rand_graph(30, 900) g = g.astype(idtype).to(F.ctx()) score = F.randn((300, 1)) score.requires_grad_() grad = F.randn((300, 1)) import numpy as np eids = np.random.choice(900, 300, replace=False) eids = F.tensor(eids, dtype=g.idtype) # compute partial edge softmax y_1 = nn.edge_softmax(g, score, eids) y_1.backward(grad) grad_1 = score.grad score.grad.zero_() # compute edge softmax on edge subgraph subg = g.edge_subgraph(eids, preserve_nodes=True) y_2 = nn.edge_softmax(subg, score) y_2.backward(grad) grad_2 = score.grad score.grad.zero_() assert F.allclose(y_1, y_2) assert F.allclose(grad_1, grad_2)
udf_reduce = { 'sum': lambda nodes: { 'v': F.sum(nodes.mailbox['m'], 1) }, 'min': lambda nodes: { 'v': F.min(nodes.mailbox['m'], 1) }, 'max': lambda nodes: { 'v': F.max(nodes.mailbox['m'], 1) } } graphs = [ # dgl.rand_graph(30, 0), dgl.rand_graph(100, 30), dgl.rand_graph(100, 3000), dgl.rand_bipartite(80, 160, 3000) ] spmm_shapes = [((1, 2, 1, 3, 1), (4, 1, 3, 1, 1)), ((5, 3, 1, 7), (1, 3, 7, 1)), ((1, 3, 1), (4, 1, 3)), ((3, 3), (1, 3)), ((1, ), (3, )), ((3, ), (1, )), ((1, ), (1, ))] sddmm_shapes = [((1, 2, 1, 3, 1), (4, 1, 3, 1, 1)), ((5, 3, 1, 7), (1, 3, 7, 7)), ((1, 3, 3), (4, 1, 3)), ((3, 3), (1, 3)), ((3, ), (3, )), ((1, ), (1, ))] @pytest.mark.parametrize('g', graphs)
import torch import dgl import dgl.backend as F g = dgl.rand_graph(10, 15).int().to(torch.device(0)) gidx = g._graph u = torch.rand((10, 2, 8), device=torch.device(0)) v = torch.rand((10, 2, 8), device=torch.device(0)) e = dgl.ops.gsddmm(g, 'dot', u, v) print(e) e = torch.zeros((15, 2, 1), device=torch.device(0)) u = F.zerocopy_to_dgl_ndarray(u) v = F.zerocopy_to_dgl_ndarray(v) e = F.zerocopy_to_dgl_ndarray_for_write(e) dgl.sparse._CAPI_FG_LoadModule("../build/featgraph/libfeatgraph_kernels.so") dgl.sparse._CAPI_FG_SDDMMTreeReduction(gidx, u, v, e) print(e)
udf_reduce = { 'sum': lambda nodes: { 'v': F.sum(nodes.mailbox['m'], 1) }, 'min': lambda nodes: { 'v': F.min(nodes.mailbox['m'], 1) }, 'max': lambda nodes: { 'v': F.max(nodes.mailbox['m'], 1) } } graphs = [ # dgl.rand_graph(30, 0), dgl.rand_graph(30, 100), dgl.rand_bipartite(30, 40, 300) ] spmm_shapes = [((1, 2, 1, 3, 1), (4, 1, 3, 1, 1)), ((3, 3), (1, 3)), ((1, ), (3, )), ((3, ), (1, )), ((1, ), (1, ))] sddmm_shapes = [((1, 2, 1, 3, 1), (4, 1, 3, 1, 1)), ((5, 3, 1, 7), (1, 3, 7, 7)), ((1, 3, 3), (4, 1, 3)), ((3, ), (3, )), ((1, ), (1, ))] edge_softmax_shapes = [(1, ), (1, 3), (3, 4, 5)] @pytest.mark.parametrize('g', graphs) @pytest.mark.parametrize('shp', spmm_shapes)
def get_random_graph(N, num_edges_factor=18): graph = dgl.transform.remove_self_loop( dgl.rand_graph(N, N * num_edges_factor)) return graph
feat_with_e = th.cat([edges.src['feat'], edges.data['feat']], 2) # apply a fc layer to adjust the dim of node feat that concatenate E_p to the out_feat_dim feat_with_e = self.nfeat_with_e_fc(feat_with_e) return {'m': edges.data['a'] * feat_with_e} graph.update_all(message_func, fn.sum('m', 'ft')) rst = graph.dstdata['ft'] rst = th.sigmoid(rst) return rst # test start_time = time() num_nodes = 5 num_edges = 4 node_feat_dim = 3 edge_feat_dim = 2 out_node_feat_dim = 4 g = dgl.rand_graph(num_nodes,num_edges) model = EGATLayer(node_feat_dim, out_node_feat_dim, edge_feat_dim) rst = model(g, th.randn(num_nodes,node_feat_dim), th.randn(num_edges,edge_feat_dim)) print(rst) end_time = time() print("Time used: " + str(end_time - start_time))