def load_graphs(feat_data, labels, adj_lists): g = DGLGraph() g.add_nodes(feat_data.shape[0]) g.ndata['feat'] = feat_data g.add_edges(adj_lists[0], adj_lists[1]) g.ndata['labels'] = labels return g
def rdf2dgl(rdf_graph, metadata, relation2id, bidirectional=True): assert set(relation2id.values()) == set(range(len(relation2id))) with RDFReader(rdf_graph) as reader: relations = reader.relationList() subjects = reader.subjectSet() objects = reader.objectSet() nodes = sorted(list(subjects.union(objects))) assert [int(node) for node in nodes] == list(range( len(nodes))) # to make sure the metadata-node alignment is correct num_node = len(nodes) assert num_node == len(metadata) num_rel = len(relations) num_rel = 2 * num_rel # * 2 for bi-directionality if num_node == 0: g = DGLGraph() g.gdata = {'metadata': metadata} return g assert num_node < np.iinfo(np.int32).max edge_list = [] for i, (s, p, o) in enumerate(reader.triples()): assert int(s) < num_node and int(o) < num_node rel = relation2id[p] edge_list.append((int(s), int(o), rel)) if bidirectional: edge_list.append((int(o), int(s), rel + len(relation2id))) # sort indices by destination edge_list = sorted(edge_list, key=lambda x: (x[1], x[0], x[2])) edge_list = np.array(edge_list, dtype=np.int) edge_src, edge_dst, edge_type = edge_list.transpose() # normalize by dst degree _, inverse_index, count = np.unique((edge_dst, edge_type), axis=1, return_inverse=True, return_counts=True) degrees = count[inverse_index] edge_norm = np.ones(len(edge_dst), dtype=np.float32) / degrees.astype( np.float32) node_ids = torch.arange(0, num_node, dtype=torch.long).view(-1, 1) edge_type = torch.from_numpy(edge_type) edge_norm = torch.from_numpy(edge_norm).unsqueeze(1) g = DGLGraph() g.add_nodes(num_node) g.add_edges(edge_src, edge_dst) g.ndata.update({'id': node_ids}) g.edata.update({'type': edge_type, 'norm': edge_norm}) g.gdata = {'metadata': metadata} # we add this field in DGLGraph return g
def construct_complete_graph_from_mol(mol, add_self_loop=False): """Construct a complete graph with topology only for the molecule The **i** th atom in the molecule, i.e. ``mol.GetAtomWithIdx(i)``, corresponds to the **i** th node in the returned DGLGraph. The edges are in the order of (0, 0), (1, 0), (2, 0), ... (0, 1), (1, 1), (2, 1), ... If self loops are not created, we will not have (0, 0), (1, 1), ... Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule holder add_self_loop : bool Whether to add self loops in DGLGraphs. Default to False. Returns ------- g : DGLGraph Empty complete graph topology of the molecule """ g = DGLGraph() num_atoms = mol.GetNumAtoms() g.add_nodes(num_atoms) if add_self_loop: g.add_edges([i for i in range(num_atoms) for j in range(num_atoms)], [j for i in range(num_atoms) for j in range(num_atoms)]) else: g.add_edges( [i for i in range(num_atoms) for j in range(num_atoms - 1)], [j for i in range(num_atoms) for j in range(num_atoms) if i != j]) return g
def synthetic_data(num_node=3000, num_feature=10, num_class=2, num_important=4): gnp = nx.barabasi_albert_graph(num_node, 2) gnp.remove_edges_from(nx.selfloop_edges(gnp)) g = DGLGraph(gnp) g.add_edges(g.nodes(), g.nodes()) data = EasyDict() data.graph = gnp data.num_labels = num_class data.g = g data.adj = g.adjacency_matrix(transpose=None).to_dense() means = np.zeros(num_node) degree = np.zeros((num_node, num_node)) for i in range(num_node): degree[i, i] = data.adj[i].sum()**-0.5 lap_matrix = np.identity(num_node) - np.matmul( np.matmul(degree, data.adj.numpy()), degree) cov = np.linalg.inv(lap_matrix + np.identity(num_node)) data.features = th.from_numpy( multivariate_normal(means, cov, num_feature).transpose()) data.features = data.features.float().abs() g.ndata['x'] = data.features W = th.randn(num_feature) * 0.1 W[range(num_important)] = th.Tensor([10, -10, 10, -10]) data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1) logits = th.sigmoid( th.matmul(th.matmul(normalize(data.adj, p=1, dim=1), data.features), W)) labels = th.zeros(num_node) labels[logits > 0.5] = 1 data.labels = labels.long() data.size = num_node return data
def load_cls_data(args): data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) n_classes = data.num_labels if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) row = g.edges()[0] col = g.edges()[1] g = dgl.graph((row, col)) return g, features, labels, n_classes, train_mask, val_mask, test_mask
def mols2graphs(mols): """ inputs mols: a list of molecules outputs cand_graphs: a list of dgl graphs """ graphs = [] for mol in tqdm(mols): n_atoms = mol.GetNumAtoms() g = DGLGraph() node_feats = [] for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() node_feats.append(atom_features(atom)) g.add_nodes(n_atoms) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() begin_idx = a1.GetIdx() end_idx = a2.GetIdx() bond_src.append(begin_idx) bond_dst.append(end_idx) bond_src.append(end_idx) bond_dst.append(begin_idx) g.add_edges(bond_src, bond_dst) g.ndata['h'] = torch.Tensor([a.tolist() for a in node_feats]) graphs.append(g) return graphs
def load_npz(file_name): with np.load(file_name) as loader: loader = dict(loader) num_nodes = loader['adj_shape'][0] adj_matrix = sp.csr_matrix((loader['adj_data'], loader['adj_indices'], loader['adj_indptr']), shape=loader['adj_shape']).tocoo() if 'attr_data' in loader: # Attributes are stored as a sparse CSR matrix attr_matrix = sp.csr_matrix((loader['attr_data'], loader['attr_indices'], loader['attr_indptr']), shape=loader['attr_shape']).todense() elif 'attr_matrix' in loader: # Attributes are stored as a (dense) np.ndarray attr_matrix = loader['attr_matrix'] else: attr_matrix = None if 'labels_data' in loader: # Labels are stored as a CSR matrix labels = sp.csr_matrix((loader['labels_data'], loader['labels_indices'], loader['labels_indptr']), shape=loader['labels_shape']).todense() elif 'labels' in loader: # Labels are stored as a numpy array labels = loader['labels'] else: labels = None g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(adj_matrix.row, adj_matrix.col) g.add_edges(adj_matrix.col, adj_matrix.row) g.ndata['feat'] = attr_matrix g.ndata['label'] = labels return g
def mol2dgl_single(smiles): n_edges = 0 atom_x = [] bond_x = [] mol = get_mol(smiles) n_atoms = mol.GetNumAtoms() n_bonds = mol.GetNumBonds() graph = DGLGraph() for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() atom_x.append(atom_features(atom)) graph.add_nodes(n_atoms) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): begin_idx = bond.GetBeginAtom().GetIdx() end_idx = bond.GetEndAtom().GetIdx() features = bond_features(bond) bond_src.append(begin_idx) bond_dst.append(end_idx) bond_x.append(features) # set up the reverse direction bond_src.append(end_idx) bond_dst.append(begin_idx) bond_x.append(features) graph.add_edges(bond_src, bond_dst) n_edges += n_bonds return graph, torch.stack(atom_x), \ torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0)
def prepare_data(data, cuda=True): features = torch.FloatTensor(data.features) print('features: ', features) labels = torch.LongTensor(data.labels) print('labels: ', labels) mask = torch.ByteTensor(data.train_mask) print('mask: ', mask) test_mask = torch.ByteTensor(data.test_mask) print('test_mask: ', test_mask) val_mask = torch.ByteTensor(data.val_mask) print('val_mask: ', val_mask) n_edges = data.graph.number_of_edges() print('n_edges: ', n_edges) # create DGL graph g = DGLGraph(data.graph) # add self loop g.add_edges(g.nodes(), g.nodes()) degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: features = features.cuda() labels = labels.cuda() norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) return features, g, labels, mask, val_mask, test_mask, n_edges
def load_graphs(data, self_loop): graphs = [] num_nodes = data[:, 0:2].max() - data[:, 0:2].min() + 1 delta = datetime.timedelta(days=14).total_seconds() time_index = np.around( (data[:, 3] - data[:, 3].min()) / delta).astype(np.int64) prevind = 0 for i in range(time_index.max()): g = DGLGraph() g.add_nodes(num_nodes) row_mask = time_index <= i edges = data[row_mask][:, 0:2] rate = data[row_mask][:, 2] diffmask = np.arange(len(edges)) >= prevind g.add_edges(edges[:, 0], edges[:, 1]) g.edata['feat'] = torch.FloatTensor(rate.reshape(-1, 1)) g.edata['diff'] = diffmask g.ndata['feat'] = l if self_loop == True: g.add_edges(g.nodes(), g.nodes()) selfedgemask = np.zeros(g.number_of_edges(), dtype=bool) selfedgemask[-g.number_of_nodes():] = True g.edata['self_edge'] = selfedgemask graphs.append(g) prevind = len(edges) train_graphs = graphs[:95] valid_graphs = graphs[95:109] test_graphs = graphs[109:] return train_graphs, valid_graphs, test_graphs
def test_local_var(): g = DGLGraph(nx.path_graph(5)) g = g.to(F.ctx()) g.ndata['h'] = F.zeros((g.number_of_nodes(), 3)) g.edata['w'] = F.zeros((g.number_of_edges(), 4)) # test override def foo(g): g = g.local_var() g.ndata['h'] = F.ones((g.number_of_nodes(), 3)) g.edata['w'] = F.ones((g.number_of_edges(), 4)) foo(g) assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3))) assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4))) # test out-place update def foo(g): g = g.local_var() g.nodes[[2, 3]].data['h'] = F.ones((2, 3)) g.edges[[2, 3]].data['w'] = F.ones((2, 4)) foo(g) assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3))) assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4))) # test out-place update 2 def foo(g): g = g.local_var() g.apply_nodes(lambda nodes: {'h' : nodes.data['h'] + 10}, [2, 3]) g.apply_edges(lambda edges: {'w' : edges.data['w'] + 10}, [2, 3]) foo(g) assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3))) assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4))) # test auto-pop def foo(g): g = g.local_var() g.ndata['hh'] = F.ones((g.number_of_nodes(), 3)) g.edata['ww'] = F.ones((g.number_of_edges(), 4)) foo(g) assert 'hh' not in g.ndata assert 'ww' not in g.edata # test initializer1 g = DGLGraph() g = g.to(F.ctx()) g.add_nodes(2) g.add_edges([0, 1], [1, 1]) g.set_n_initializer(dgl.init.zero_initializer) def foo(g): g = g.local_var() g.nodes[0].data['h'] = F.ones((1, 1)) assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]])) foo(g) # test initializer2 def foo_e_initializer(shape, dtype, ctx, id_range): return F.ones(shape) g.set_e_initializer(foo_e_initializer, field='h') def foo(g): g = g.local_var() g.edges[0, 1].data['h'] = F.ones((1, 1)) assert F.allclose(g.edata['h'], F.ones((2, 1))) g.edges[0, 1].data['w'] = F.ones((1, 1)) assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]])) foo(g)
def main(args): NUM_NODES = args.num_nodes NUM_HEADS = args.num_heads NUM_HIDDEN = args.num_hidden negative_slope = args.negative_slope dropout_ratio = args.attn_drop th.cuda.set_device(args.gpu) IN_FEATS = 1 g = DGLGraph() g.add_nodes(NUM_NODES) g.add_edges([i for i in range(NUM_NODES)], 0) g.add_edges([i for i in range(NUM_NODES)], 1) feat_src = th.rand((NUM_NODES, IN_FEATS)) feat_src.requires_grad = True feat_src = feat_src.cuda() conv_test = EglGATConvTest(IN_FEATS, NUM_HIDDEN, num_heads=NUM_HEADS, feat_drop=0., attn_drop=0., negative_slope=negative_slope, residual=False, activation=None) conv_test.cuda() rst, dgl_rst = conv_test.forward(g, feat_src)
def to_dgl_graph(self): """Convert to DGL graph data instance Returns ------- dgl.DGLGraph Graph data for DGL Notes ----- This method requires DGL to be installed. """ try: import torch from dgl import DGLGraph except ModuleNotFoundError: raise ValueError("This function requires DGL to be installed.") g = DGLGraph() g.add_nodes(self.num_nodes) g.add_edges( torch.from_numpy(self.edge_index[0]).long(), torch.from_numpy(self.edge_index[1]).long()) g.ndata['x'] = torch.from_numpy(self.node_features).float() if self.node_pos_features is not None: g.ndata['pos'] = torch.from_numpy(self.node_pos_features).float() if self.edge_features is not None: g.edata['edge_attr'] = torch.from_numpy(self.edge_features).float() return g
def mol2dgl_single(cand_batch): cand_graphs = [] tree_mess_source_edges = [] # map these edges from trees to... tree_mess_target_edges = [] # these edges on candidate graphs tree_mess_target_nodes = [] n_nodes = 0 atom_x = [] bond_x = [] for mol, mol_tree, ctr_node_id in cand_batch: n_atoms = mol.GetNumAtoms() g = DGLGraph() for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() atom_x.append(atom_features(atom)) g.add_nodes(n_atoms) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() begin_idx = a1.GetIdx() end_idx = a2.GetIdx() features = bond_features(bond) bond_src.append(begin_idx) bond_dst.append(end_idx) bond_x.append(features) bond_src.append(end_idx) bond_dst.append(begin_idx) bond_x.append(features) x_nid, y_nid = a1.GetAtomMapNum(), a2.GetAtomMapNum() # Tree node ID in the batch x_bid = mol_tree.nodes_dict[x_nid - 1]['idx'] if x_nid > 0 else -1 y_bid = mol_tree.nodes_dict[y_nid - 1]['idx'] if y_nid > 0 else -1 if x_bid >= 0 and y_bid >= 0 and x_bid != y_bid: if mol_tree.has_edge_between(x_bid, y_bid): tree_mess_target_edges.append( (begin_idx + n_nodes, end_idx + n_nodes)) tree_mess_source_edges.append((x_bid, y_bid)) tree_mess_target_nodes.append(end_idx + n_nodes) if mol_tree.has_edge_between(y_bid, x_bid): tree_mess_target_edges.append( (end_idx + n_nodes, begin_idx + n_nodes)) tree_mess_source_edges.append((y_bid, x_bid)) tree_mess_target_nodes.append(begin_idx + n_nodes) n_nodes += n_atoms g.add_edges(bond_src, bond_dst) cand_graphs.append(g) return cand_graphs, torch.stack(atom_x), \ torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0), \ torch.LongTensor(tree_mess_source_edges), \ torch.LongTensor(tree_mess_target_edges), \ torch.LongTensor(tree_mess_target_nodes)
def smile2graph(smile, add_self_loop=False, atom_featurizer=CanonicalAtomFeaturizer(), bond_featurizer=None): """Convert SMILES into a DGLGraph. The **i** th atom in the molecule, i.e. ``mol.GetAtomWithIdx(i)``, corresponds to the **i** th node in the returned DGLGraph. The **i** th bond in the molecule, i.e. ``mol.GetBondWithIdx(i)``, corresponds to the **(2i)**-th and **(2i+1)**-th edges in the returned DGLGraph. The **(2i)**-th and **(2i+1)**-th edges will be separately from **u** to **v** and **v** to **u**, where **u** is ``bond.GetBeginAtomIdx()`` and **v** is ``bond.GetEndAtomIdx()``. If self loops are added, the last **n** edges will separately be self loops for atoms ``0, 1, ..., n-1``. Parameters ---------- smiles : str String of SMILES add_self_loop : bool Whether to add self loops in DGLGraphs. atom_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict Featurization for atoms in a molecule, which can be used to update ndata for a DGLGraph. Default to CanonicalAtomFeaturizer(). bond_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict Featurization for bonds in a molecule, which can be used to update edata for a DGLGraph. """ mol = Chem.MolFromSmiles(smile) new_order = rdmolfiles.CanonicalRankAtoms(mol) mol = rdmolops.RenumberAtoms(mol, new_order) g = DGLGraph() num_atoms = mol.GetNumAtoms() g.add_nodes(num_atoms) src_list = [] dst_list = [] num_bonds = mol.GetNumBonds() for i in range(num_bonds): bond = mol.GetBondWithIdx(i) u = bond.GetBeginAtomIdx() v = bond.GetEndAtomIdx() src_list.extend([u, v]) dst_list.extend([v, u]) g.add_edges(src_list, dst_list) if add_self_loop: nodes = g.nodes() g.add_edges(nodes, nodes) # Featurization if atom_featurizer is not None: g.ndata.update(atom_featurizer(mol)) if bond_featurizer is not None: g.edata.update(bond_featurizer(mol)) return g
def mol2dgl_single(mols): cand_graphs = [] n_nodes = 0 n_edges = 0 bond_x = [] for mol in mols: n_atoms = mol.GetNumAtoms() n_bonds = mol.GetNumBonds() g = DGLGraph() nodeF = [] for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() nodeF.append(atom_features(atom)) g.add_nodes(n_atoms) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() begin_idx = a1.GetIdx() end_idx = a2.GetIdx() features = bond_features(bond) bond_src.append(begin_idx) bond_dst.append(end_idx) bond_x.append(features) bond_src.append(end_idx) bond_dst.append(begin_idx) bond_x.append(features) g.add_edges(bond_src, bond_dst) g.ndata['h'] = torch.Tensor(nodeF) cand_graphs.append(g) return cand_graphs
def to_dgl_graph(xy: CCoords, polar: PCoords, edges: Edges) -> DGLGraph: g = DGLGraph() g.add_nodes(len(xy)) g.add_edges(*zip(*edges)) g.ndata['xy'] = np.array(xy, dtype=np.float32) g.edata['polar'] = np.array(polar, dtype=np.float32) return g
def tensorize(self, sample: Nodes) -> TypedDGLGraphFieldOutput: nodes, node_index, token_indexes, _ = sample sources, targets, etypes = [], [], [] for node in nodes: if node.parent is not None: if "child" in self.vocabulary: sources.append(node_index[id(node.parent)]) targets.append(node_index[id(node)]) etypes.append(self.vocabulary.get_index("child")) if "parent" in self.vocabulary: sources.append(node_index[id(node.parent)]) targets.append(node_index[id(node)]) etypes.append(self.vocabulary.get_index("parent")) for previous_token_index, next_token_index in zip( islice(token_indexes, 0, None), islice(token_indexes, 1, None)): if "next_token" in self.vocabulary: sources.append(previous_token_index) targets.append(next_token_index) etypes.append(self.vocabulary.get_index("next_token")) if "previous_token" in self.vocabulary: sources.append(next_token_index) targets.append(previous_token_index) etypes.append(self.vocabulary.get_index("previous_token")) graph = DGLGraph() graph.add_nodes(len(nodes)) graph.add_edges(sources, targets) return TypedDGLGraphFieldOutput(graph=graph, etypes=tensor(etypes, dtype=torch_long))
def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() g = g.to(F.ctx()) # Test node addition g.add_nodes(N) g.ndata.update({'h1': F.randn((N, D)), 'h2': F.randn((N, D))}) g.add_nodes(3) assert g.ndata['h1'].shape[0] == g.ndata['h2'].shape[0] == N + 3 # Test edge addition g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': F.randn((2, D)), 'h2': F.randn((2, D))}) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 2 g.add_edges([0, 2], [2, 0]) g.edata['h1'] = F.randn((4, D)) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 4 g.add_edge(1, 2) g.edges[4].data['h1'] = F.randn((1, D)) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 5 # test add edge with part of the features g.add_edge(2, 1, {'h1': F.randn((1, D))}) assert len(g.edata['h1']) == len(g.edata['h2'])
def build_graph_on_ctx(self, ctx): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(self.num_nodes) g.add_edges(self.src, self.dst) g.edata['dist'] = self.dist.as_in_context(ctx) self.graph_on_ctx.append(g) self.ctx.append(ctx)
def build_graph_on_ctx(self, ctx): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(self.num_nodes) g.add_edges(self.src, self.dst) self.graph_on_ctx.append(g) self.ctx.append(ctx)
def build_graph_on_ctx(self, ctx): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(self.num_nodes) g.add_edges(self.src, self.dst) with tf.device('GPU:0'): g.edata['dist'] = self.dist self.graph_on_ctx.append(g) self.ctx.append(ctx)
def forward(self, g, features): g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) x = self.gcn1(g, features) x = self.gcn2(g, x) return x # net = Net() # print(net)
def load_data(data): features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels
def graph_construction(graph: DGLGraph, edge_pairs): graph = graph.local_var() new_graph = DGLGraph() num_nodes = graph.number_of_nodes() new_graph.add_nodes(num_nodes) src_nodes, dest_nodes = edge_pairs[:, 0], edge_pairs[:, 1] new_graph.add_edges(src_nodes, dest_nodes) for key, value in graph.ndata.items(): new_graph.ndata[key] = value return new_graph
def load_cora_data(): data = citegraph.load_cora() features = torch.Tensor(data.features).float() labels = torch.Tensor(data.labels).long() mask = torch.Tensor(data.train_mask).byte() g = data.graph g.remove_edges_from(g.selfloog_edges()) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, mask
def remove_self_loop_edges(graph: DGLGraph): g_src, g_dest = graph.all_edges() s2d_loop = g_src - g_dest src, dest = g_src[s2d_loop != 0], g_dest[s2d_loop != 0] graph_with_out_loop = DGLGraph() graph_with_out_loop.add_nodes(graph.number_of_nodes()) graph_with_out_loop.add_edges(src, dest) for key, value in graph.ndata.items(): graph_with_out_loop.ndata[key] = value return graph_with_out_loop
def mol2dgl_enc(smiles): def atom_features(atom): return (torch.Tensor( onek_encoding_unk(atom.GetSymbol(), ELEM_LIST) + onek_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) + onek_encoding_unk(atom.GetFormalCharge(), [-1, -2, 1, 2, 0]) + onek_encoding_unk(int(atom.GetChiralTag()), [0, 1, 2, 3]) + [atom.GetIsAromatic()])) def bond_features(bond): bt = bond.GetBondType() stereo = int(bond.GetStereo()) fbond = [ bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE, bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC, bond.IsInRing() ] fstereo = onek_encoding_unk(stereo, [0, 1, 2, 3, 4, 5]) return (torch.Tensor(fbond + fstereo)) n_edges = 0 atom_x = [] bond_x = [] mol = get_mol(smiles) n_atoms = mol.GetNumAtoms() n_bonds = mol.GetNumBonds() graph = DGLGraph() for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() atom_x.append(atom_features(atom)) graph.add_nodes(n_atoms) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): begin_idx = bond.GetBeginAtom().GetIdx() end_idx = bond.GetEndAtom().GetIdx() features = bond_features(bond) bond_src.append(begin_idx) bond_dst.append(end_idx) bond_x.append(features) # set up the reverse direction bond_src.append(end_idx) bond_dst.append(begin_idx) bond_x.append(features) graph.add_edges(bond_src, bond_dst) n_edges += n_bonds return graph, torch.stack(atom_x), \ torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0)
def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.BoolTensor(data.train_mask) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, mask
def forward(self, inputs): keys = [ 'class_objects', 'states_objects', 'edge_tuples', 'edge_classes', 'mask_object', 'mask_edge' ] [ all_class_names, node_states, all_edge_ids, all_edge_types, mask_nodes, mask_edges ] = [torch.unbind(inputs[key]) for key in keys] num_envs = len(all_class_names) hs = [] graphs = [] for env_id in range(num_envs): g = DGLGraph() num_nodes = int(mask_nodes[env_id].sum().item()) num_edges = int(mask_edges[env_id].sum().item()) ids = all_class_names[env_id][:num_nodes] node_states_curr = node_states[env_id][:num_nodes] g.add_nodes(num_nodes) if num_edges > 0: edge_types = all_edge_types[env_id][:num_edges].long() # try: g.add_edges( all_edge_ids[env_id][:num_edges, 0].long(), all_edge_ids[env_id][:num_edges, 1].long(), { 'rel_type': edge_types.long(), 'norm': torch.ones( (num_edges, 1)).to(edge_types.device) }) # except: # pdb.set_trace() if self.features is None: feats_in = self.feat_in(ids.long(), node_states_curr) g.ndata['h'] = feats_in graphs.append(g) batch_graph = dgl.batch(graphs) for layer in self.layers: layer(batch_graph) graphs = dgl.unbatch(batch_graph) hs_list = [] for graph in graphs: curr_graph = graph.ndata.pop('h').unsqueeze(0) curr_nodes = curr_graph.shape[1] curr_graph = F.pad(curr_graph, (0, 0, 0, self.num_nodes - curr_nodes), 'constant', 0.) hs_list.append(curr_graph) hs = torch.cat(hs_list, dim=0) return hs