def get_train_data(self): g1 = dgl.from_networkx(self.g1) g2 = dgl.from_networkx(self.g2) feat1 = torch.from_numpy(self.node_feat1).float() feat2 = torch.from_numpy(self.node_feat2).float() # 对于无监督对齐场景,先使用随机初始化的GCN找到对齐的seed h1 = self.model.GCNLayer(g1, feat1) h2 = self.model.GCNLayer(g2, feat2) h1 = h1.cpu().detach().numpy() h2 = h2.cpu().detach().numpy() h1 = preprocessing.normalize(h1, norm='l2') h2 = preprocessing.normalize(h2, norm='l2') rough_similarity = cosine_similarity(h1, h2) * cosine_similarity( self.node_feat1, self.node_feat2) # 挑选出前K大 candidates = np.argpartition(-rough_similarity, kth=self.config.top_candidates, axis=1) candidates = candidates[:, :self.config.top_candidates] # train_indices train_indices = defaultdict(list) for i in range(candidates.shape[0]): train_indices[i] = candidates[i, :].tolist() return train_indices, g1, g2, feat1, feat2
def test_sequential(): ctx = F.ctx() # test single graph class ExampleLayer(gluon.nn.Block): def __init__(self, **kwargs): super().__init__(**kwargs) def forward(self, graph, n_feat, e_feat): graph = graph.local_var() graph.ndata['h'] = n_feat graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) n_feat += graph.ndata['h'] graph.apply_edges(fn.u_add_v('h', 'h', 'e')) e_feat += graph.edata['e'] return n_feat, e_feat g = dgl.graph(([], [])).to(F.ctx()) g.add_nodes(3) g.add_edges([0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]) net = nn.Sequential() net.add(ExampleLayer()) net.add(ExampleLayer()) net.add(ExampleLayer()) net.initialize(ctx=ctx) n_feat = F.randn((3, 4)) e_feat = F.randn((9, 4)) n_feat, e_feat = net(g, n_feat, e_feat) assert n_feat.shape == (3, 4) assert e_feat.shape == (9, 4) # test multiple graphs class ExampleLayer(gluon.nn.Block): def __init__(self, **kwargs): super().__init__(**kwargs) def forward(self, graph, n_feat): graph = graph.local_var() graph.ndata['h'] = n_feat graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) n_feat += graph.ndata['h'] return n_feat.reshape(graph.number_of_nodes() // 2, 2, -1).sum(1) g1 = dgl.from_networkx(nx.erdos_renyi_graph(32, 0.05)).to(F.ctx()) g2 = dgl.from_networkx(nx.erdos_renyi_graph(16, 0.2)).to(F.ctx()) g3 = dgl.from_networkx(nx.erdos_renyi_graph(8, 0.8)).to(F.ctx()) net = nn.Sequential() net.add(ExampleLayer()) net.add(ExampleLayer()) net.add(ExampleLayer()) net.initialize(ctx=ctx) n_feat = F.randn((32, 4)) n_feat = net([g1, g2, g3], n_feat) assert n_feat.shape == (4, 4)
def batch_graphs(data_root, data_list, windowing=False): data_files = [ line.rstrip() for line in open(os.path.join(data_root, data_list)) ] all_graphs = [] all_labels = [] all_features = [] for file in data_files: # Convert the gpickle file to a dgl graph for batching #dgl_g = convert_gpickle_to_dgl_graph(file) nxg = nx.read_gpickle(os.path.join(data_root, file)) if windowing: nxg_list = sliding_window.perform_windowing(nxg) for nxg in nxg_list: # Get the annotated labels labels = get_labels(nxg) # Get the feature from the file features = chris_get_features(nxg) dgl_g = dgl.from_networkx(nxg) # Append the information for batching all_graphs.append(dgl_g) all_labels.append(labels) all_features.append(features) else: # Get the annotated labels labels = get_labels(nxg) # Get the feature from the file features = chris_get_features(nxg) dgl_g = dgl.from_networkx(nxg) # Append the information for batching all_graphs.append(dgl_g) all_labels.append(labels) all_features.append(features) # Batch the graphs batched_graph = dgl.batch(all_graphs) # all_labels is a list of tensors, so concetenate into one tensor conc_labels = torch.LongTensor(batched_graph.number_of_nodes(), 1) torch.cat(all_labels, out=conc_labels) # all_features is a list of tensors, so concetenate into one tensor conc_features = torch.Tensor(batched_graph.number_of_nodes(), 1) torch.cat(all_features, out=conc_features) return batched_graph, conc_labels, conc_features
def test_simple_pool(): g = dgl.from_networkx(nx.path_graph(15)).to(F.ctx()) sum_pool = nn.SumPooling() avg_pool = nn.AvgPooling() max_pool = nn.MaxPooling() sort_pool = nn.SortPooling(10) # k = 10 print(sum_pool, avg_pool, max_pool, sort_pool) # test#1: basic h0 = F.randn((g.number_of_nodes(), 5)) h1 = sum_pool(g, h0) check_close(F.squeeze(h1, 0), F.sum(h0, 0)) h1 = avg_pool(g, h0) check_close(F.squeeze(h1, 0), F.mean(h0, 0)) h1 = max_pool(g, h0) check_close(F.squeeze(h1, 0), F.max(h0, 0)) h1 = sort_pool(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.ndim == 2 # test#2: batched graph g_ = dgl.from_networkx(nx.path_graph(5)).to(F.ctx()) bg = dgl.batch([g, g_, g, g_, g]) h0 = F.randn((bg.number_of_nodes(), 5)) h1 = sum_pool(bg, h0) truth = mx.nd.stack(F.sum(h0[:15], 0), F.sum(h0[15:20], 0), F.sum(h0[20:35], 0), F.sum(h0[35:40], 0), F.sum(h0[40:55], 0), axis=0) check_close(h1, truth) h1 = avg_pool(bg, h0) truth = mx.nd.stack(F.mean(h0[:15], 0), F.mean(h0[15:20], 0), F.mean(h0[20:35], 0), F.mean(h0[35:40], 0), F.mean(h0[40:55], 0), axis=0) check_close(h1, truth) h1 = max_pool(bg, h0) truth = mx.nd.stack(F.max(h0[:15], 0), F.max(h0[15:20], 0), F.max(h0[20:35], 0), F.max(h0[35:40], 0), F.max(h0[40:55], 0), axis=0) check_close(h1, truth) h1 = sort_pool(bg, h0) assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2
def preprocessing(data, emb_file, seed, trans): num_graphs = len(data[0]) nx_graphs = [data[0][i].g for i in range(num_graphs)] dgl_graphs = [dgl.from_networkx(graph) for graph in nx_graphs] batch_graphs = dgl.batch(dgl_graphs) num_nodes = len(batch_graphs.nodes()) graph_size = [len(g.nodes()) for g in nx_graphs] emb = np.loadtxt(emb_file) if trans: emb = np.dot(emb,DCT(num_nodes).T) G = batch_graphs.to_networkx() Sub = {} for i in range(num_graphs): if i == 0: node_start = 0 else: node_start = sum(graph_size[:i-1]) node_end = sum(graph_size[:i]) nbunch = [node for node in range(node_start, node_end)] subgraph = nx.subgraph(G, nbunch) Sub[data[0][i]] = np.dot(emb, encode(G,subgraph)) idx_list = separate_data(data[0],seed = seed) return Sub, idx_list, data[0]
def edge_list_to_graph(args): global label global acc_cnts global graphs global labels global cnts global num_class i = args[0] f = str(args[1]) G = args[2] L = args[3] print(f'Preprocessing file: {f}') with open(f, "rb") as file: edges = nx.read_edgelist(file, create_using=nx.Graph, nodetype=int) # for labeling class of malware class_ = 0 found = False for idx, cond in enumerate(acc_cnts): if i < cond: class_ = idx + 1 found = True break if found == False: class_ = len(acc_cnts) + 1 hetero_graph = dgl.from_networkx(edges) L.append(class_) G.append(hetero_graph)
def group_labels_features(data_root, data_list, windowing=False): #data_path = 'data/' data_files = [ line.rstrip() for line in open(os.path.join(data_root, data_list)) ] # Initialize empty list dataset = [] print("loading {} files".format(len(data_files))) for idx, file in enumerate(data_files): graph = [] nxg = nx.read_gpickle(os.path.join(data_root, file)) # Get the annotated labels labels = get_labels(nxg) # Get the feature from the file features = chris_get_features(nxg) dgl_g = dgl.from_networkx(nxg) # Append the information for batching graph.append(dgl_g) graph.append(labels) graph.append(features) dataset.append(graph) return dataset
def convert_nx_to_dgl(G: Graph) -> DGLHeteroGraph: """ Convert NetworkX graph import DGL graph """ return from_networkx(nx_graph=G, node_attrs=["nfeat"], edge_attrs=["efeat", "label"])
def test_tagconv(out_dim): g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx()) ctx = F.ctx() adj = g.adjacency_matrix(transpose=True, ctx=ctx) norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5) conv = nn.TAGConv(5, out_dim, bias=True) conv.initialize(ctx=ctx) print(conv) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 shp = norm.shape + (1,) * (h0.ndim - 1) norm = norm.reshape(shp).as_in_context(h0.context) assert F.allclose(h1, _S2AXWb(adj, norm, h0, conv.lin.data(ctx), conv.h_bias.data(ctx))) conv = nn.TAGConv(5, out_dim) conv.initialize(ctx=ctx) # test#2: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert h1.shape[-1] == out_dim
def graph_update(self, number): """ :param Y: [batch_size, frames, num_nodes, num_nodes] the processed Node admittance matrix :param infos: [batch_size, in_channels, frames, num_nodes] the features of each node :param weights: [features, num_nodes, num_nodes] learnable weigths for message passing, using nn.Embedding() :return: graph """ # todo frames的判断应该根据frame_0来 batches = number // self.frames frames = number % self.frames Y_number = 0 if frames >= 1: Y_number = 1 if frames > 11: Y_number = 2 Y_need = self.Y[batches, Y_number, :, :].cpu().numpy() nx_graph = nx.from_numpy_matrix(Y_need) graph = dgl.from_networkx(nx_graph).to(torch.device('cuda:0')) # add features to all the nodes graph.ndata['feats'] = (self.infos[batches, :, frames, :]).T # graph.edata['weights'] = (self.Y[batches, Y_number, :, :] * # self.weights).permute((1, 2, 0))\ # .reshape((self.num_nodes * self.num_nodes, self.c_in)) self.graph_list[number] = graph return graph
def __getitem__(self, idx): g_path = os.path.join(self.path, self.all_graphs[idx]) try: graph = read_graph(g_path) except Exception as e: print(e) print("ERROR could not read graph file:\n", g_path) # graph = nx.to_undirected(graph) # graph = nx.Graph(graph) one_hot = {} for edge, label in (nx.get_edge_attributes(graph, 'LW')).items(): if '.' in label: graph.remove_edge(edge[0], edge[1]) continue try: one_hot[edge] = torch.tensor(self.edge_map[label.upper()]) except KeyError as e: # print('ERROR: unrecognized edge label:') # print(e) graph.remove_edge(edge[0], edge[1]) interface = get_labels(graph, interaction=self.interaction, mode=self.use_mode) nx.set_node_attributes(graph, name='interface', values = interface) nx.set_edge_attributes(graph, name='one_hot', values=one_hot) g_dgl = dgl.from_networkx(nx_graph=graph, edge_attrs=['one_hot'], node_attrs=['interface']) return g_dgl, [idx]
def parsed_tree_to_dgl_tree(parsed_tree, vocab): PAD_WORD = -1 g = nx.DiGraph() def _rec_build(u): if len(u.child) == 1: return _rec_build(u.child[0]) elif len(u.child) > 1: assert len(u.child) == 2 nid = g.number_of_nodes() g.add_node(nid, x=PAD_WORD, y=0) left = _rec_build(u.child[0]) right = _rec_build(u.child[1]) g.add_edge(left, nid) g.add_edge(right, nid) return nid else: cid = g.number_of_nodes() word = vocab.get(u.value, PAD_WORD) g.add_node(cid, x=word, y=0) return cid # add root root = _rec_build(parsed_tree) g.add_node(root, x=PAD_WORD) return dgl.from_networkx(g, node_attrs=['x', 'y'])
def networkx_to_torch(self, networkx_graph): import dgl # graph = dgl.DGLGraph() graph = dgl.from_networkx(networkx_graph) graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) graph = graph.to(self.device) return graph
def make_full_graph(g): """ Converting the given graph to fully connected """ full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes())) full_g.ndata['feat'] = g.ndata['feat'] full_g.edata['feat'] = torch.zeros(full_g.number_of_edges()) return full_g
def load_dgl(): global g g_x = load_graph() g = dgl.from_networkx(g_x, node_attrs=['vector', 'node_order'], edge_attrs=None) # g = dgl.DGLGraph() # g.from_networkx(g_x,node_attrs=['tipo','vector','node_order'], edge_attrs=None) print("Meta-feature graph from datasets loaded")
def convert_to_dgl_graph(graph): # directed graph g = nx.DiGraph() for edge, weight in graph.items(): nodes = edge.split(",") src, dst = int(nodes[0]), int(nodes[1]) g.add_edge(src, dst, weight=float(weight)) return dgl.from_networkx(g, edge_attrs=['weight'])
def test_graph_conv(idtype, out_dim): g = dgl.from_networkx(nx.path_graph(3)) g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() adj = g.adjacency_matrix(transpose=True, ctx=ctx) conv = nn.GraphConv(5, out_dim, norm='none', bias=True) conv.initialize(ctx=ctx) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 check_close(h1, _AXWb(adj, h0, conv.weight, conv.bias)) # test#2: more-dim h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 check_close(h1, _AXWb(adj, h0, conv.weight, conv.bias)) conv = nn.GraphConv(5, out_dim) conv.initialize(ctx=ctx) # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 conv = nn.GraphConv(5, out_dim) conv.initialize(ctx=ctx) with autograd.train_mode(): # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test not override features g.ndata["h"] = 2 * F.ones((3, 1)) h1 = conv(g, h0) assert len(g.ndata) == 1 assert len(g.edata) == 0 assert "h" in g.ndata check_close(g.ndata['h'], 2 * F.ones((3, 1)))
def rGIN(g): g = dgl.from_networkx(g) f = np.random.standard_normal(size=(g.number_of_nodes(), 1)) x = torch.tensor(f, dtype=torch.float) g.ndata['x'] = x lin = torch.nn.Linear(1, 1) conv = GINConv(lin, 'sum') res = conv(g, x) sumpool = SumPooling() return sumpool(g, res)[0].detach().numpy()
def load_graph(): # pandas reads csv edges_data = pd.read_csv('data/knowledge_aquisition_reference.csv') # networkx reads pandas g_nx: nx.DiGraph = nx.from_pandas_edgelist(edges_data, 'paper_id', 'reference_id', create_using=nx.DiGraph()) # dgl read networkx # ATTENTION!!!: nodes in dgl graph is ordered by paperid return dgl.from_networkx(g_nx)
def make_full_graph(g): """ Converting the given graph to fully connected This function just makes full connections removes available edge features """ full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes())) full_g.ndata['feat'] = g.ndata['feat'] full_g.edata['feat'] = torch.zeros(full_g.number_of_edges()).long() return full_g
def test_appnp_conv(): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() appnp_conv = nn.APPNPConv(3, 0.1, 0) appnp_conv.initialize(ctx=ctx) print(appnp_conv) # test#1: basic h0 = F.randn((20, 10)) h1 = appnp_conv(g, h0) assert h1.shape == (20, 10)
def test_cheb_conv(out_dim): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() cheb = nn.ChebConv(10, out_dim, 3) # k = 3 cheb.initialize(ctx=ctx) print(cheb) # test#1: basic h0 = F.randn((20, 10)) h1 = cheb(g, h0) assert h1.shape == (20, out_dim)
def main(args): # load dataset if args.dataset == 'syn1': g, labels, name = gen_syn1() elif args.dataset == 'syn2': g, labels, name = gen_syn2() elif args.dataset == 'syn3': g, labels, name = gen_syn3() elif args.dataset == 'syn4': g, labels, name = gen_syn4() elif args.dataset == 'syn5': g, labels, name = gen_syn5() else: raise NotImplementedError #Transform to dgl graph. graph = dgl.from_networkx(g) labels = th.tensor(labels, dtype=th.long) graph.ndata['label'] = labels graph.ndata['feat'] = th.randn(graph.number_of_nodes(), args.feat_dim) hid_dim = th.tensor(args.hidden_dim, dtype=th.long) label_dict = {'hid_dim':hid_dim} # save graph for later use save_graphs(filename='./'+args.dataset+'.bin', g_list=[graph], labels=label_dict) num_classes = max(graph.ndata['label']).item() + 1 n_feats = graph.ndata['feat'] #create model dummy_model = dummy_gnn_model(args.feat_dim, args.hidden_dim, num_classes) loss_fn = nn.CrossEntropyLoss() optim = th.optim.Adam(dummy_model.parameters(), lr=args.lr, weight_decay=args.wd) # train and output for epoch in range(args.epochs): dummy_model.train() logits = dummy_model(graph, n_feats) loss = loss_fn(logits, labels) acc = th.sum(logits.argmax(dim=1) == labels).item() / len(labels) optim.zero_grad() loss.backward() optim.step() print('In Epoch: {:03d}; Acc: {:.4f}; Loss: {:.6f}'.format(epoch, acc, loss.item())) # save model model_stat_dict = dummy_model.state_dict() model_path = os.path.join('./', 'dummy_model_{}.pth'.format(args.dataset)) th.save(model_stat_dict, model_path)
def test_sg_conv(): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() sgc = nn.SGConv(5, 2, 2) sgc.initialize(ctx=ctx) print(sgc) # test #1: basic h0 = F.randn((g.number_of_nodes(), 5)) h1 = sgc(g, h0) assert h1.shape == (g.number_of_nodes(), 2)
def load_batch_graph(dataset): data = load_data(dataset,True) num_graphs = len(data[0]) nx_graphs = [data[0][i].g for i in range(num_graphs)] dgl_graphs = [dgl.from_networkx(graph) for graph in nx_graphs] batch_graphs = dgl.batch(dgl_graphs) node_features = [data[0][i].node_features for i in range(num_graphs)] batch_features = torch.cat(node_features,0) graph_size = [len(g.nodes()) for g in nx_graphs] return batch_graphs, batch_features, graph_size
def build_graph(smiles): """ Constructs a NetworkX graph out of a SMILES representation of a molecule from the train/test data. :param smiles: a string object of SMILES format :return: nx.Graph: A graph describing a molecule. Nodes will have an 'element', 'aromatic' and a 'charge', and if `explicit_hydrogen` is False a 'hcount'. Depending on the input, they will also have 'isotope' and 'class' information. Edges will have an 'order'. """ ''' can access node data and edge data when the graph is in networkx format dgl.from_networkx(g) converts networkx to dgl graph but the node data and edge data doesnt seem to be transferred Goal: save the node feats and edge feats of networkx as tensor and set them to dgl graph ndata and edata Question: Do we save ndata as ('C', 'C', 'C', 'O', 'C') or do we create one hot vectors like in the hw ''' # read the smile graphs in using pysmiles & build network g = pysmiles.read_smiles(smiles) # get the features from the graph and convert to tensor elems = g.nodes(data='element') h_count = g.nodes(data='hcount') aros = g.nodes(data='aromatic') raw_node_feats = [] for elem, data, aro in zip(elems, h_count, aros): node = list(elem) node.append(data[1]) node.append(aro[1] * 1) raw_node_feats.append(node) na = np.array(list(raw_node_feats)) byte_node_feats = tf.convert_to_tensor(na[:, 1]) # turn the byte string node feats into one_hot node feats node_feats = pt_lookup(byte_node_feats).numpy() node_feats[:, -2] = na[:, 2] node_feats[:, -1] = na[:, 3] node_feats = tf.convert_to_tensor(node_feats) # get edge data and extract bonds, double them, then convert to tensor edata = g.edges(data='order') bonds = list(edata) na = np.array(bonds) tup = zip(na[:, 2], na[:, 2]) bond_data = tf.convert_to_tensor(list(itertools.chain(*tup))) bond_data = tf.cast(bond_data, tf.float32) # build dgl graph dgl_graph = dgl.from_networkx(g) dgl_graph.ndata['node_feats'] = node_feats dgl_graph.edata['edge_feats'] = bond_data return dgl_graph
def prepare_minibatch(targets, node_mptype_mpinstances, type_mask, node_orders, nlayer, sampling, device): layer_ntype_mptype_g = [defaultdict(dict) for _ in range(nlayer)] layer_ntype_mptype_mpinstances = [defaultdict(dict) for _ in range(nlayer)] layer_ntype_mptype_iftargets = [defaultdict(dict) for _ in range(nlayer)] for layer_index in range(nlayer): ## group target nodes by type ntype_targets = defaultdict(set) for target in targets: ntype_targets[type_mask[target]].add(target) ## sample metapath instances for each ntype targets = set() for ntype, curr_targets in ntype_targets.items(): mptype_mpinstances = sample_mpinstances_perntype( curr_targets, node_mptype_mpinstances, sampling) for mptype, mpinstances in mptype_mpinstances.items(): ng = nx.MultiDiGraph() ng.add_nodes_from(curr_targets) ng.add_edges_from( np.vstack([mpinstances[:, 0], mpinstances[:, -1]]).T) g = dgl.from_networkx(ng).to(device) iftargets = {src: False for src in mpinstances[:, 0]} iftargets.update({dst: True for dst in curr_targets}) layer_ntype_mptype_g[-layer_index - 1][ntype][mptype] = g layer_ntype_mptype_mpinstances[-layer_index - 1][ntype][mptype] = mpinstances layer_ntype_mptype_iftargets[-layer_index - 1][ntype][mptype] = np.array( sorted(iftargets.items(), key=lambda x: x[0])) targets.update(np.unique(mpinstances)) batch_ntype_orders = defaultdict(dict) for target in targets: batch_ntype_orders[type_mask[target]][target] = node_orders[target] for ntype in batch_ntype_orders: batch_ntype_orders[ntype] = { target: order for target, order in sorted(batch_ntype_orders[ntype].items(), key=lambda x: x[1]) } return layer_ntype_mptype_g, layer_ntype_mptype_mpinstances, layer_ntype_mptype_iftargets, batch_ntype_orders
def load_data(path, backend='dgl', format='tuple'): if backend == 'dgl': try: print("Trying to load dgl graph directly") glist, __ = load_graphs(osp.join(path, 'g.bin')) g = glist[0] print("Success") except DGLError as e: print("File not found", e) print("Loading nx graph") nx_graph = nx.read_adjlist(osp.join(path, 'adjlist.txt'), nodetype=int) print("Type:", type(nx_graph)) g = dgl.from_networkx(nx_graph) N = g.number_of_nodes() X = np.load(osp.join(path, 'X.npy')) y = np.load(osp.join(path, 'y.npy')) t = np.load(osp.join(path, 't.npy')) assert X.shape[0] == N assert y.size == N assert t.size == N return g, X, y, t elif backend == 'geometric': # DONE test this! nx_graph = nx.read_adjlist(osp.join(path, 'adjlist.txt'), nodetype=int) X = np.load(osp.join(path, 'X.npy')) y = np.load(osp.join(path, 'y.npy')) t = np.load(osp.join(path, 't.npy')) print("Type:", type(nx_graph)) attr_dict = { i: { 'X': X[i], 'y': y[i], 't': t[i] } for i in range(X.shape[0]) } print("attr_dict loaded!") nx.set_node_attributes(nx_graph, attr_dict) print("attributes set!") del attr_dict gc.collect() g = tg.utils.from_networkx(nx_graph) del nx_graph if format == 'tuple': return g.edge_index, g.X, g.y, g.t else: g.x = g.X return g else: raise ValueError("Unknown backend: " + backend)
def test_gg_conv(): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() gg_conv = nn.GatedGraphConv(10, 20, 3, 4) # n_step = 3, n_etypes = 4 gg_conv.initialize(ctx=ctx) print(gg_conv) # test#1: basic h0 = F.randn((20, 10)) etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx) h1 = gg_conv(g, h0, etypes) assert h1.shape == (20, 20)
def generate_data(args): data = load_data(args) labels = torch.LongTensor(data.labels) features = torch.FloatTensor(data.features) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g).to('cuda:0') g.add_edges(g.nodes(), g.nodes()) netg = nx.from_numpy_matrix(g.adjacency_matrix().to_dense().numpy(), create_using=nx.DiGraph) print(netg) g = dgl.from_networkx(netg, edge_attrs=['weight']).to("cuda:0") n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] print("train_mask-shape", train_mask) return g, num_feats, n_classes, heads, cuda, features, labels, train_mask, val_mask, test_mask