def __init__( self, h_dims=128, n_classes=10, filters=[16, 32, 64, 128, 256], kernel_size=(3, 3), final_pool_size=(2, 2), glimpse_type='gaussian', glimpse_size=(15, 15), cnn='cnn', cnn_file='cnn.pt', ): nn.Module.__init__(self) #self.T_MAX_RECUR = kwarg['steps'] t = nx.balanced_tree(2, 2) t_uni = nx.bfs_tree(t, 0) self.G = DGLGraph(t) self.root = 0 self.h_dims = h_dims self.n_classes = n_classes self.message_module = MessageModule() self.G.register_message_func(self.message_module) # default: just copy cnnmodule = CNN( cnn=cnn, n_layers=6, h_dims=h_dims, n_classes=n_classes, final_pool_size=final_pool_size, filters=filters, kernel_size=kernel_size, input_size=glimpse_size, ) if cnn_file is not None: cnnmodule.load_state_dict(T.load(cnn_file)) #self.update_module = UpdateModule(h_dims, n_classes, glimpse_size) self.update_module = UpdateModule( glimpse_type=glimpse_type, glimpse_size=glimpse_size, cnn=cnnmodule, max_recur=1, # T_MAX_RECUR n_classes=n_classes, h_dims=h_dims, ) self.G.register_update_func(self.update_module) self.readout_module = ReadoutModule(h_dims=h_dims, n_classes=n_classes) self.G.register_readout_func(self.readout_module) #self.walk_list = [(0, 1), (1, 2), (2, 1), (1, 0)] self.walk_list = [] dfs_walk(t_uni, self.root, self.walk_list)
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) # add a back flow from 9 to 0 g.add_edge(9, 0) ncol = Variable(th.randn(10, D), requires_grad=grad) ecol = Variable(th.randn(17, D), requires_grad=grad) g.ndata['h'] = ncol g.edata['l'] = ecol return g
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) # add a back flow from 9 to 0 g.add_edge(9, 0) ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata['h'] = ncol g.edata['l'] = ecol return g
def test_map_to_subgraph(): g = DGLGraph() g.add_nodes(10) g.add_edges(F.arange(0, 9), F.arange(1, 10)) h = g.subgraph([0, 1, 2, 5, 8]) v = h.map_to_subgraph_nid([0, 8, 2]) assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2]))
def perpare_dgl(graph, local, device): dgl_list = [] for i in range(len(local)): dgl_graph = DGLGraph() dgl_graph.add_nodes(len(local[i])) st, ed = np.nonzero(graph[i]) dgl_graph.add_edges(st, ed) dgl_graph.ndata['tk'] = local[i].to(device=device)
def knn_graphE(x, k, istrain=False): """Transforms the given point set to a directed graph, whose coordinates are given as a matrix. The predecessors of each point are its k-nearest neighbors. If a 3D tensor is given instead, then each row would be transformed into a separate graph. The graphs will be unioned. Parameters ---------- x : Tensor The input tensor. If 2D, each row of ``x`` corresponds to a node. If 3D, a k-NN graph would be constructed for each row. Then the graphs are unioned. k : int The number of neighbors Returns ------- DGLGraph The graph. The node IDs are in the same order as ``x``. """ if F.ndim(x) == 2: x = F.unsqueeze(x, 0) n_samples, n_points, _ = F.shape(x) dist = pairwise_squared_distance(x) if istrain and np.random.rand() > 0.5: k_indices = F.argtopk(dist, round(1.5 * k), 2, descending=False) rand_k = np.random.permutation(round(1.5 * k) - 1)[0:k - 1] + 1 # 0 + random k-1 rand_k = np.append(rand_k, 0) k_indices = k_indices[:, :, rand_k] # add 0 else: k_indices = F.argtopk(dist, k, 2, descending=False) dst = F.copy_to(k_indices, F.cpu()) src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1)) per_sample_offset = F.reshape( F.arange(0, n_samples) * n_points, (-1, 1, 1)) dst += per_sample_offset src += per_sample_offset dst = F.reshape(dst, (-1, )) src = F.reshape(src, (-1, )) adj = sparse.csr_matrix( (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src)))) g = DGLGraph(adj, readonly=True) return g
def perpare_dgl(graph, local, device): dgl_list = [] for i in range(len(local)): dgl_graph = DGLGraph() dgl_graph.add_nodes(len(local[i])) st, ed = np.nonzero(graph[i]) dgl_graph.add_edges(st, ed) dgl_graph.ndata['tk'] = local[i].to(device=device) dgl_list.append(dgl_graph) batched_graph = dgl.batch(dgl_list) return batched_graph
def __init__(self, h_dims=128, n_classes=10, filters=[16, 32, 64, 128, 256], kernel_size=(3, 3), final_pool_size=(2, 2), glimpse_type='gaussian', glimpse_size=(15, 15), cnn='cnn'): from networkx.algorithms.traversal.breadth_first_search import bfs_edges nn.Module.__init__(self) t = nx.balanced_tree(1, 2) self.G = DGLGraph(t) self.root = 0 #self.walk_list = bfs_edges(t, self.root) self.walk_list = [(0, 1), (1, 2)] self.h_dims = h_dims self.n_classes = n_classes self.update_module = UpdateModule( h_dims=h_dims, n_classes=n_classes, filters=filters, kernel_size=kernel_size, final_pool_size=final_pool_size, glimpse_type=glimpse_type, glimpse_size=glimpse_size, cnn='cnn', ) self.message_module = MessageModule( h_dims=h_dims, g_dims=self.update_module.glimpse.att_params) self.readout_module = ReadoutModule( h_dims=h_dims, n_classes=n_classes, ) self.G.register_message_func(self.message_module) self.G.register_update_func(self.update_module) self.G.register_readout_func(self.readout_module)
def _build_tree(self, qt): root = qt.logic_tree g = nx.DiGraph() def _rec_build(nid, root): for child in [root.left, root.mid, root.right]: if child: cid = g.number_of_nodes() try: # word = self.vocab.labelToIdx[child.val] word = self.featuretotensor(child.val) except: # print("unknown word", child.val) word = [0] * 150 word[0] = 1 g.add_node(cid, x=word, y=0) g.add_edge(cid, nid) _rec_build(cid, child) # add root solving_time = qt.gettime(self.time_selection) if self.task == "classification": if isinstance(solving_time, bool): result = 0 if solving_time else 1 else: result = 0 if solving_time > 60 else 1 else: result = solving_time if not result: result = 0.0 if result == None: result = 0 # g.add_node(0, x=self.vocab.labelToIdx[root.val], y=result) g.add_node(0, x=self.featuretotensor(root.val), y=result) _rec_build(0, root) ret = DGLGraph() ret.from_networkx(g, node_attrs=['x', 'y']) return ret
def main(args): # dropout parameters input_dropout = args.idrop attention_dropout = args.adrop # load and preprocess dataset adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( args.dataset) features = preprocess_features(features) # initialize graph g = DGLGraph(adj) # create model model = GeniePath(args.num_layers, features.shape[1], args.num_hidden, y_train.shape[1], args.num_heads, F.elu, input_dropout, attention_dropout, args.residual) model.train(g, features, y_train, epochs=args.epochs)
def _load(self): """Loads input data. train/test/valid_graph.json => the graph data used for training, test and validation as json format; train/test/valid_feats.npy => the feature vectors of nodes as numpy.ndarry object, it's shape is [n, v], n is the number of nodes, v is the feature's dimension; train/test/valid_labels.npy=> the labels of the input nodes, it is a numpy ndarry, it's like[[0, 0, 1, ... 0], [0, 1, 1, 0 ...1]], shape of it is n*h, n is the number of nodes, h is the label's dimension; train/test/valid/_graph_id.npy => the element in it indicates which graph the nodes belong to, it is a one dimensional numpy.ndarray object and the length of it is equal the number of nodes, it's like [1, 1, 2, 1...20]. """ name = 'ppi' dir = windows_dir_pre = "/mnt/md1/a503tongxueheng/3DCNN_data_process/data/output/graph_1000/" # zip_file_path = '{}/{}.zip'.format(dir, name) # download(_get_dgl_url(_url), path=zip_file_path) # extract_archive(zip_file_path, # '{}/{}'.format(dir, name)) print('Loading G...') if self.mode == 'train': with open(dir + 'train_graph.json'.format(dir)) as jsonfile: g_data = json.load(jsonfile) self.labels = np.load(dir + 'train_labels.npy'.format(dir)) self.features = preprocessing.scale( np.load(dir + 'train_feats.npy'.format(dir))) self.graph = DGLGraph( nx.DiGraph(json_graph.node_link_graph(g_data))) self.graph_id = np.load(dir + 'train_graph_id.npy'.format(dir)) if self.mode == 'valid': with open(dir + 'valid_graph.json'.format(dir)) as jsonfile: g_data = json.load(jsonfile) self.labels = np.load(dir + 'valid_labels.npy'.format(dir)) self.features = preprocessing.scale( np.load(dir + 'valid_feats.npy'.format(dir))) self.graph = DGLGraph( nx.DiGraph(json_graph.node_link_graph(g_data))) self.graph_id = np.load(dir + 'valid_graph_id.npy'.format(dir)) if self.mode == 'test': with open(dir + 'test_graph.json'.format(dir)) as jsonfile: g_data = json.load(jsonfile) self.labels = np.load(dir + 'test_labels.npy'.format(dir)) self.features = preprocessing.scale( np.load(dir + 'test_feats.npy'.format(dir))) self.graph = DGLGraph( nx.DiGraph(json_graph.node_link_graph(g_data))) self.graph_id = np.load(dir + 'test_graph_id.npy'.format(dir))
def line_graph(g, backtracking=True, shared=False): """Return the line graph of this graph. Parameters ---------- g : dgl.DGLGraph backtracking : bool, optional Whether the returned line graph is backtracking. shared : bool, optional Whether the returned line graph shares representations with `self`. Returns ------- DGLGraph The line graph of this graph. """ graph_data = g._graph.line_graph(backtracking) node_frame = g._edge_frame if shared else None return DGLGraph(graph_data, node_frame)
def check_pull_0deg(readonly): if readonly: row_idx = [] col_idx = [] row_idx.append(0) col_idx.append(1) ones = np.ones(shape=(len(row_idx))) csr = spsp.csr_matrix((ones, (row_idx, col_idx)), shape=(2, 2)) g = DGLGraph(csr, readonly=True) else: g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.mailbox['m'].sum(1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(2, 5)) # test#1: pull only 0-deg node g.ndata['h'] = old_repr g.pull(0, _message, _reduce, _apply) new_repr = g.ndata['h'] # 0deg check: equal to apply_nodes assert np.allclose(new_repr[0].asnumpy(), old_repr[0].asnumpy() * 2) # non-0deg check: untouched assert np.allclose(new_repr[1].asnumpy(), old_repr[1].asnumpy()) # test#2: pull only non-deg node g.ndata['h'] = old_repr g.pull(1, _message, _reduce, _apply) new_repr = g.ndata['h'] # 0deg check: untouched assert np.allclose(new_repr[0].asnumpy(), old_repr[0].asnumpy()) # non-0deg check: recved node0 and got applied assert np.allclose(new_repr[1].asnumpy(), old_repr[0].asnumpy() * 2) # test#3: pull only both nodes g.ndata['h'] = old_repr g.pull([0, 1], _message, _reduce, _apply) new_repr = g.ndata['h'] # 0deg check: init and applied t = mx.nd.zeros(shape=(2, 5)) + 4 assert np.allclose(new_repr[0].asnumpy(), t.asnumpy()) # non-0deg check: recv node0 and applied assert np.allclose(new_repr[1].asnumpy(), old_repr[0].asnumpy() * 2)
def generate_graph(grad=False, readonly=False): if readonly: row_idx = [] col_idx = [] for i in range(1, 9): row_idx.append(0) col_idx.append(i) row_idx.append(i) col_idx.append(9) row_idx.append(9) col_idx.append(0) ones = np.ones(shape=(len(row_idx))) csr = spsp.csr_matrix((ones, (row_idx, col_idx)), shape=(10, 10)) g = DGLGraph(csr, readonly=True) ncol = mx.nd.random.normal(shape=(10, D)) ecol = mx.nd.random.normal(shape=(17, D)) if grad: ncol.attach_grad() ecol.attach_grad() g.ndata['h'] = ncol g.edata['w'] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g else: g = DGLGraph() g.add_nodes(10) # 10 nodes. # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) # add a back flow from 9 to 0 g.add_edge(9, 0) ncol = mx.nd.random.normal(shape=(10, D)) ecol = mx.nd.random.normal(shape=(17, D)) if grad: ncol.attach_grad() ecol.attach_grad() g.ndata['h'] = ncol g.edata['w'] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g
def test_update_all_0deg(): # test#1 g = DGLGraph() g.add_nodes(5) g.add_edge(1, 0) g.add_edge(2, 0) g.add_edge(3, 0) g.add_edge(4, 0) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + mx.nd.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(5, 5)) g.ndata['h'] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata['h'] # the first row of the new_repr should be the sum of all the node # features; while the 0-deg nodes should be initialized by the # initializer and applied with UDF. assert np.allclose(new_repr[1:].asnumpy(), 2 * (2 + np.zeros((4, 5)))) assert np.allclose(new_repr[0].asnumpy(), 2 * mx.nd.sum(old_repr, 0).asnumpy()) # test#2: graph with no edge g = DGLGraph() g.add_nodes(5) g.set_n_initializer(_init2, 'h') g.ndata['h'] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata['h'] # should fallback to apply assert np.allclose(new_repr.asnumpy(), 2 * old_repr.asnumpy())
def check_reduce_0deg(readonly): if readonly: row_idx = [] col_idx = [] for i in range(1, 5): row_idx.append(i) col_idx.append(0) ones = np.ones(shape=(len(row_idx))) csr = spsp.csr_matrix((ones, (row_idx, col_idx)), shape=(5, 5)) g = DGLGraph(csr, readonly=True) else: g = DGLGraph() g.add_nodes(5) g.add_edge(1, 0) g.add_edge(2, 0) g.add_edge(3, 0) g.add_edge(4, 0) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(5, 5)) g.set_n_repr({'h': old_repr}) g.update_all(_message, _reduce) new_repr = g.ndata['h'] assert np.allclose(new_repr[1:].asnumpy(), 2 + np.zeros((4, 5))) assert np.allclose(new_repr[0].asnumpy(), old_repr.sum(0).asnumpy())
def test_recv_0deg_newfld(): # test recv with 0deg nodes; the reducer also creates a new field g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h1': nodes.data['h'] + mx.nd.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h1': nodes.data['h1'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape=shape, dtype=dtype, ctx=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) # test#1: recv both 0deg and non-0deg nodes old = mx.nd.random.normal(shape=(2, 5)) g.set_n_initializer(_init2, 'h1') g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata.pop('h1') # 0deg check: initialized with the func and got applied assert np.allclose(new[0].asnumpy(), np.full((5, ), 4)) # non-0deg check assert np.allclose(new[1].asnumpy(), mx.nd.sum(old, 0).asnumpy() * 2) # test#2: recv only 0deg node old = mx.nd.random.normal(shape=(2, 5)) g.ndata['h'] = old g.ndata['h1'] = mx.nd.full((2, 5), -1) # this is necessary g.send((0, 1)) g.recv(0) new = g.ndata.pop('h1') # 0deg check: fallback to apply assert np.allclose(new[0].asnumpy(), np.full((5, ), -2)) # non-0deg check: not changed assert np.allclose(new[1].asnumpy(), np.full((5, ), -1))
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # 10 nodes. # create a graph where 0 is the source and 9 is the sink # 16 edges for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) ncol = Variable(th.randn(10, D), requires_grad=grad) ecol = Variable(th.randn(16, D), requires_grad=grad) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) g.ndata['h'] = ncol g.edata['w'] = ecol return g
def test_filter(): g = DGLGraph() g.add_nodes(4) g.add_edges([0,1,2,3], [1,2,3,0]) n_repr = F.zeros((4, 5)) e_repr = F.zeros((4, 5)) n_repr[[1, 3]] = 1 e_repr[[1, 3]] = 1 g.ndata['a'] = n_repr g.edata['a'] = e_repr def predicate(r): return F.max(r.data['a'], 1) > 0 # full node filter n_idx = g.filter_nodes(predicate) assert set(F.zerocopy_to_numpy(n_idx)) == {1, 3} # partial node filter n_idx = g.filter_nodes(predicate, [0, 1]) assert set(F.zerocopy_to_numpy(n_idx)) == {1} # full edge filter e_idx = g.filter_edges(predicate) assert set(F.zerocopy_to_numpy(e_idx)) == {1, 3} # partial edge filter e_idx = g.filter_edges(predicate, [0, 1]) assert set(F.zerocopy_to_numpy(e_idx)) == {1}
def test_send_recv_after_conversion(): # test send and recv after converting from a graph with edges g = generate_graph() # nx graph nxg = g.to_networkx(node_attrs=['h']) g1 = DGLGraph() # some random node and edges g1.add_nodes(4) g1.add_edges([1, 2], [2, 3]) g1.set_n_initializer(dgl.init.zero_initializer) g1.from_networkx(nxg, node_attrs=['h']) # sparse matrix row, col = g.all_edges() data = range(len(row)) n = g.number_of_nodes() a = sp.coo_matrix( (data, (F.zerocopy_to_numpy(row), F.zerocopy_to_numpy(col))), shape=(n, n)) g2 = DGLGraph() # some random node and edges g2.add_nodes(5) g2.add_edges([1, 2, 4], [2, 3, 0]) g2.set_n_initializer(dgl.init.zero_initializer) g2.from_scipy_sparse_matrix(a) g2.ndata['h'] = g.ndata['h'] # on dgl graph g.send(message_func=message_func) g.recv([0, 1, 3, 5], reduce_func=reduce_func, apply_node_func=apply_node_func) g.recv([0, 2, 4, 8], reduce_func=reduce_func, apply_node_func=apply_node_func) # nx g1.send(message_func=message_func) g1.recv([0, 1, 3, 5], reduce_func=reduce_func, apply_node_func=apply_node_func) g1.recv([0, 2, 4, 8], reduce_func=reduce_func, apply_node_func=apply_node_func) # sparse matrix g2.send(message_func=message_func) g2.recv([0, 1, 3, 5], reduce_func=reduce_func, apply_node_func=apply_node_func) g2.recv([0, 2, 4, 8], reduce_func=reduce_func, apply_node_func=apply_node_func) assert F.allclose(g.ndata['h'], g1.ndata['h']) assert F.allclose(g.ndata['h'], g2.ndata['h'])
class AcidDataset(object): """A toy Protein-Protein Interaction network dataset. Adapted from https://github.com/williamleif/GraphSAGE/tree/master/example_data. The dataset contains 24 graphs. The average number of nodes per graph is 2372. Each node has 50 features and 121 labels. We use 20 graphs for training, 2 for validation and 2 for testing. """ def __init__(self, mode): """Initialize the dataset. Paramters --------- mode : str ('train', 'valid', 'test'). """ assert mode in ['train', 'valid', 'test'] self.mode = mode self._load() self._preprocess() def _load(self): """Loads input data. train/test/valid_graph.json => the graph data used for training, test and validation as json format; train/test/valid_feats.npy => the feature vectors of nodes as numpy.ndarry object, it's shape is [n, v], n is the number of nodes, v is the feature's dimension; train/test/valid_labels.npy=> the labels of the input nodes, it is a numpy ndarry, it's like[[0, 0, 1, ... 0], [0, 1, 1, 0 ...1]], shape of it is n*h, n is the number of nodes, h is the label's dimension; train/test/valid/_graph_id.npy => the element in it indicates which graph the nodes belong to, it is a one dimensional numpy.ndarray object and the length of it is equal the number of nodes, it's like [1, 1, 2, 1...20]. """ name = 'ppi' dir = windows_dir_pre = "/mnt/md1/a503tongxueheng/3DCNN_data_process/data/output/graph/" # zip_file_path = '{}/{}.zip'.format(dir, name) # download(_get_dgl_url(_url), path=zip_file_path) # extract_archive(zip_file_path, # '{}/{}'.format(dir, name)) # print('Loading G...') if self.mode == 'train': with open(dir + 'train_graph.json'.format(dir)) as jsonfile: g_data = json.load(jsonfile) self.labels = np.load(dir + 'train_labels.npy'.format(dir)) self.features = np.load(dir + 'train_feats.npy'.format(dir)) self.graph = DGLGraph( nx.DiGraph(json_graph.node_link_graph(g_data))) self.graph_id = np.load(dir + 'train_graph_id.npy'.format(dir)) if self.mode == 'valid': with open(dir + 'valid_graph.json'.format(dir)) as jsonfile: g_data = json.load(jsonfile) self.labels = np.load(dir + 'valid_labels.npy'.format(dir)) self.features = np.load(dir + 'valid_feats.npy'.format(dir)) self.graph = DGLGraph( nx.DiGraph(json_graph.node_link_graph(g_data))) self.graph_id = np.load(dir + 'valid_graph_id.npy'.format(dir)) if self.mode == 'test': with open(dir + 'test_graph.json'.format(dir)) as jsonfile: g_data = json.load(jsonfile) self.labels = np.load(dir + 'test_labels.npy'.format(dir)) self.features = np.load(dir + 'test_feats.npy'.format(dir)) self.graph = DGLGraph( nx.DiGraph(json_graph.node_link_graph(g_data))) self.graph_id = np.load(dir + 'test_graph_id.npy'.format(dir)) def _preprocess(self): if self.mode == 'train': self.train_mask_list = [] self.train_graphs = [] self.train_labels = [] for train_graph_id in range(0, 3986): train_graph_mask = np.where(self.graph_id == train_graph_id)[0] self.train_mask_list.append(train_graph_mask) self.train_graphs.append(self.graph.subgraph(train_graph_mask)) self.train_labels.append(self.labels[train_graph_mask]) if self.mode == 'valid': self.valid_mask_list = [] self.valid_graphs = [] self.valid_labels = [] for valid_graph_id in range(3987, 4484): valid_graph_mask = np.where(self.graph_id == valid_graph_id)[0] self.valid_mask_list.append(valid_graph_mask) self.valid_graphs.append(self.graph.subgraph(valid_graph_mask)) self.valid_labels.append(self.labels[valid_graph_mask]) if self.mode == 'test': self.test_mask_list = [] self.test_graphs = [] self.test_labels = [] for test_graph_id in range(4485, 4983): test_graph_mask = np.where(self.graph_id == test_graph_id)[0] self.test_mask_list.append(test_graph_mask) self.test_graphs.append(self.graph.subgraph(test_graph_mask)) self.test_labels.append(self.labels[test_graph_mask]) def __len__(self): """Return number of samples in this dataset.""" if self.mode == 'train': return len(self.train_mask_list) if self.mode == 'valid': return len(self.valid_mask_list) if self.mode == 'test': return len(self.test_mask_list) def __getitem__(self, item): """Get the i^th sample. Paramters --------- idx : int The sample index. Returns ------- (dgl.DGLGraph, ndarray) The graph, and its label. """ if self.mode == 'train': g = self.train_graphs[item] g.ndata['feat'] = self.features[self.train_mask_list[item]] label = self.train_labels[item] elif self.mode == 'valid': g = self.valid_graphs[item] g.ndata['feat'] = self.features[self.valid_mask_list[item]] label = self.valid_labels[item] elif self.mode == 'test': g = self.test_graphs[item] g.ndata['feat'] = self.features[self.test_mask_list[item]] label = self.test_labels[item] return g, label
def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() def _init(shape, dtype, ctx, ids): return F.copy_to(F.astype(F.randn(shape), dtype), ctx) g.set_n_initializer(_init) g.set_e_initializer(_init) def _message(edges): return { 'm': edges.src['h1'] + edges.dst['h2'] + edges.data['h1'] + edges.data['h2'] } def _reduce(nodes): return {'h': F.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h': nodes.data['h']} g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # add nodes and edges g.add_nodes(N) g.ndata.update({'h1': F.randn((N, D)), 'h2': F.randn((N, D))}) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': F.randn((2, D)), 'h2': F.randn((2, D))}) g.send() expected = F.copy_to(F.ones((g.number_of_edges(), ), dtype=F.int64), F.cpu()) assert F.array_equal(g._get_msg_index().tousertensor(), expected) # add more edges g.add_edges([0, 2], [2, 0], {'h1': F.randn((2, D))}) g.send(([0, 2], [2, 0])) g.recv(0) g.add_edge(1, 2) g.edges[4].data['h1'] = F.randn((1, D)) g.send((1, 2)) g.recv([1, 2]) h = g.ndata.pop('h') # a complete round of send and recv g.send() g.recv() assert F.allclose(h, g.ndata['h'])
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # 10 nodes. # create a graph where 0 is the source and 9 is the sink # 16 edges for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) ncol = F.randn((10, D)) ecol = F.randn((16, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) g.ndata['h'] = ncol g.edata['w'] = ecol return g
def test_send_twice_different_field(): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(2) g.add_edge(0, 1) def _message_a(edges): return {'a': edges.src['a']} def _message_b(edges): return {'b': edges.src['b']} def _reduce(nodes): return { 'a': F.sum(nodes.mailbox['a'], 1), 'b': F.sum(nodes.mailbox['b'], 1) } old_a = F.randn((2, 5)) old_b = F.randn((2, 5)) g.set_n_repr({'a': old_a, 'b': old_b}) g.send((0, 1), _message_a) g.send((0, 1), _message_b) g.recv([1], _reduce) new_repr = g.get_n_repr() assert F.allclose(new_repr['a'][1], old_a[0]) assert F.allclose(new_repr['b'][1], old_b[0])
class DFSGlimpseSingleObjectClassifier(nn.Module): def __init__( self, h_dims=128, n_classes=10, filters=[16, 32, 64, 128, 256], kernel_size=(3, 3), final_pool_size=(2, 2), glimpse_type='gaussian', glimpse_size=(15, 15), cnn='cnn', cnn_file='cnn.pt', ): nn.Module.__init__(self) #self.T_MAX_RECUR = kwarg['steps'] t = nx.balanced_tree(2, 2) t_uni = nx.bfs_tree(t, 0) self.G = DGLGraph(t) self.root = 0 self.h_dims = h_dims self.n_classes = n_classes self.message_module = MessageModule() self.G.register_message_func(self.message_module) # default: just copy cnnmodule = CNN( cnn=cnn, n_layers=6, h_dims=h_dims, n_classes=n_classes, final_pool_size=final_pool_size, filters=filters, kernel_size=kernel_size, input_size=glimpse_size, ) if cnn_file is not None: cnnmodule.load_state_dict(T.load(cnn_file)) #self.update_module = UpdateModule(h_dims, n_classes, glimpse_size) self.update_module = UpdateModule( glimpse_type=glimpse_type, glimpse_size=glimpse_size, cnn=cnnmodule, max_recur=1, # T_MAX_RECUR n_classes=n_classes, h_dims=h_dims, ) self.G.register_update_func(self.update_module) self.readout_module = ReadoutModule(h_dims=h_dims, n_classes=n_classes) self.G.register_readout_func(self.readout_module) #self.walk_list = [(0, 1), (1, 2), (2, 1), (1, 0)] self.walk_list = [] dfs_walk(t_uni, self.root, self.walk_list) def forward(self, x, pretrain=False): batch_size = x.shape[0] self.update_module.set_image(x) init_states = { 'h': x.new(batch_size, self.h_dims).zero_(), 'b': x.new(batch_size, self.update_module.glimpse.att_params).zero_(), 'b_next': x.new(batch_size, self.update_module.glimpse.att_params).zero_(), 'a': x.new(batch_size, 1).zero_(), 'y': x.new(batch_size, self.n_classes).zero_(), 'g': None, 'b_fix': None, 'db': None, } for n in self.G.nodes(): self.G.node[n].update(init_states) #TODO: the following two lines is needed for single object #TODO: but not useful or wrong for multi-obj self.G.recvfrom(self.root, []) if pretrain: return self.G.readout([self.root], pretrain=True) else: #for u, v in self.walk_list: # self.G.update_by_edge((u, v)) # update local should be inside the update module #for i in self.T_MAX_RECUR: # self.G.update_local(u) self.G.propagate(self.walk_list) return self.G.readout('all', pretrain=False)
def test_multi_recv_0deg(): # test recv with 0deg nodes; g = DGLGraph() def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + F.zeros(shape, dtype=dtype, ctx=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(_init2) g.add_nodes(2) g.add_edge(0, 1) # recv both 0deg and non-0deg nodes old = F.randn((2, 5)) g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata['h'] # 0deg check: initialized with the func and got applied assert F.allclose(new[0], F.full((5, ), 4, F.float32)) # non-0deg check assert F.allclose(new[1], F.sum(old, 0) * 2) # recv again on zero degree node g.recv([0]) assert F.allclose(g.nodes[0].data['h'], F.full((5, ), 8, F.float32)) # recv again on node with no incoming message g.recv([1]) assert F.allclose(g.nodes[1].data['h'], F.sum(old, 0) * 4)
class TopDownNet(nn.Module): def __init__(self, h_dims=128, n_classes=10, filters=[16, 32, 64, 128, 256], kernel_size=(3, 3), final_pool_size=(2, 2), glimpse_type='gaussian', glimpse_size=(15, 15), cnn='cnn'): from networkx.algorithms.traversal.breadth_first_search import bfs_edges nn.Module.__init__(self) t = nx.balanced_tree(1, 2) self.G = DGLGraph(t) self.root = 0 #self.walk_list = bfs_edges(t, self.root) self.walk_list = [(0, 1), (1, 2)] self.h_dims = h_dims self.n_classes = n_classes self.update_module = UpdateModule( h_dims=h_dims, n_classes=n_classes, filters=filters, kernel_size=kernel_size, final_pool_size=final_pool_size, glimpse_type=glimpse_type, glimpse_size=glimpse_size, cnn='cnn', ) self.message_module = MessageModule( h_dims=h_dims, g_dims=self.update_module.glimpse.att_params) self.readout_module = ReadoutModule( h_dims=h_dims, n_classes=n_classes, ) self.G.register_message_func(self.message_module) self.G.register_update_func(self.update_module) self.G.register_readout_func(self.readout_module) def forward(self, x): batch_size = x.shape[0] g_dims = self.update_module.glimpse.att_params self.update_module.set_image(x) zero_tensor_x = lambda r, c: \ x.new(r, c).zero_() init_states = { 's': zero_tensor_x(batch_size, self.h_dims), 'a': ( zero_tensor_x(batch_size, self.h_dims), zero_tensor_x(batch_size, g_dims), ), 'g': None, 'c': zero_tensor_x(batch_size, 1), } for n in self.G.nodes(): self.G.node[n].update(init_states) self.G.recvfrom(self.root, []) # Update root node self.G.propagate(self.walk_list) return self.G.readout()
def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() # Test node addition g.add_nodes(N) g.ndata.update({'h1': th.randn(N, D), 'h2': th.randn(N, D)}) g.add_nodes(3) assert g.ndata['h1'].shape[0] == g.ndata['h2'].shape[0] == N + 3 # Test edge addition g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': th.randn(2, D), 'h2': th.randn(2, D)}) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 2 g.add_edges([0, 2], [2, 0]) g.edata['h1'] = th.randn(4, D) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 4 g.add_edge(1, 2) g.edges[4].data['h1'] = th.randn(1, D) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 5
def test_send_twice_different_msg(): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(2, 1) def _message_a(edges): return {'a': edges.src['a']} def _message_b(edges): return {'a': edges.src['a'] * 3} def _reduce(nodes): return {'a': F.max(nodes.mailbox['a'], 1)} old_repr = F.randn((3, 5)) g.ndata['a'] = old_repr g.send((0, 1), _message_a) g.send((0, 1), _message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], old_repr[0] * 3) g.ndata['a'] = old_repr g.send((0, 1), _message_a) g.send((2, 1), _message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], F.max(F.stack([old_repr[0], old_repr[2] * 3], 0), 0))
def _load(self): """ Loads input dataset from dataset/NAME/NAME.txt file """ print('loading data...') with open(self.file, 'r') as f: # line_1 == N, total number of graphs self.N = int(f.readline().strip()) for i in range(self.N): if (i + 1) % 10 == 0 and self.verbosity is True: print('processing graph {}...'.format(i + 1)) grow = f.readline().strip().split() # line_2 == [n_nodes, l] is equal to # [node number of a graph, class label of a graph] n_nodes, glabel = [int(w) for w in grow] # relabel graphs if glabel not in self.glabel_dict: mapped = len(self.glabel_dict) self.glabel_dict[glabel] = mapped self.labels.append(self.glabel_dict[glabel]) g = DGLGraph() g.add_nodes(n_nodes) nlabels = [] # node labels nattrs = [] # node attributes if it has m_edges = 0 for j in range(n_nodes): nrow = f.readline().strip().split() # handle edges and attributes(if has) tmp = int(nrow[1]) + 2 # tmp == 2 + #edges if tmp == len(nrow): # no node attributes nrow = [int(w) for w in nrow] nattr = None elif tmp > len(nrow): nrow = [int(w) for w in nrow[:tmp]] nattr = [float(w) for w in nrow[tmp:]] nattrs.append(nattr) else: raise Exception('edge number is incorrect!') # relabel nodes if it has labels # if it doesn't have node labels, then every nrow[0]==0 if not nrow[0] in self.nlabel_dict: mapped = len(self.nlabel_dict) self.nlabel_dict[nrow[0]] = mapped #nlabels.append(self.nlabel_dict[nrow[0]]) nlabels.append(nrow[0]) m_edges += nrow[1] g.add_edges(j, nrow[2:]) # add self loop if self.self_loop: m_edges += 1 g.add_edge(j, j) if (j + 1) % 10 == 0 and self.verbosity is True: print( 'processing node {} of graph {}...'.format( j + 1, i + 1)) print('this node has {} edgs.'.format( nrow[1])) if nattrs != []: nattrs = np.stack(nattrs) g.ndata['attr'] = nattrs self.nattrs_flag = True else: nattrs = None g.ndata['label'] = np.array(nlabels) if len(self.nlabel_dict) > 1: self.nlabels_flag = True assert len(g) == n_nodes # update statistics of graphs self.n += n_nodes self.m += m_edges self.graphs.append(g) # if no attr if not self.nattrs_flag: print('there are no node features in this dataset!') label2idx = {} # generate node attr by node degree if self.degree_as_nlabel: print('generate node features by node degree...') nlabel_set = set([]) for g in self.graphs: # actually this label shouldn't be updated # in case users want to keep it # but usually no features means no labels, fine. g.ndata['label'] = g.in_degrees() # extracting unique node labels nlabel_set = nlabel_set.union(set(g.ndata['label'].numpy())) nlabel_set = list(nlabel_set) # in case the labels/degrees are not continuous number self.ndegree_dict = { nlabel_set[i]: i for i in range(len(nlabel_set)) } label2idx = self.ndegree_dict # generate node attr by node label else: print('generate node features by node label...') label2idx = self.nlabel_dict for g in self.graphs: g.ndata['attr'] = np.zeros(( g.number_of_nodes(), len(label2idx))) g.ndata['attr'][range(g.number_of_nodes( )), [label2idx[nl.item()] for nl in g.ndata['label']]] = 1 # after load, get the #classes and #dim self.gclasses = len(self.glabel_dict) self.nclasses = len(self.nlabel_dict) self.eclasses = len(self.elabel_dict) self.dim_nfeats = len(self.graphs[0].ndata['attr'][0]) print('Done.') print( """ -------- Data Statistics --------' #Graphs: %d #Graph Classes: %d #Nodes: %d #Node Classes: %d #Node Features Dim: %d #Edges: %d #Edge Classes: %d Avg. of #Nodes: %.2f Avg. of #Edges: %.2f Graph Relabeled: %s Node Relabeled: %s Degree Relabeled(If degree_as_nlabel=True): %s \n """ % ( self.N, self.gclasses, self.n, self.nclasses, self.dim_nfeats, self.m, self.eclasses, self.n / self.N, self.m / self.N, self.glabel_dict, self.nlabel_dict, self.ndegree_dict))