def _collate_fn(self, batch): result_out = super(MolRNNLoader, self)._collate_fn(batch) # things ready for rnn mol_list = [Chem.MolFromSmiles(batch_i) for batch_i in batch] # preparing mapping graph_to_rnn = np.zeros((len(batch), self.k, data_struct.get_mol_spec().max_iter), dtype=np.int32) rnn_to_graph = [] cum_sum = 0 for i, mol_i in enumerate(mol_list): num_iter = mol_i.GetNumBonds() + 1 for k in range(self.k): graph_to_rnn[i, k, :num_iter] = (np.arange(num_iter) + cum_sum) rnn_to_graph_0 = np.ones([num_iter,], dtype=np.int32) * i rnn_to_graph_1 = np.ones_like(rnn_to_graph_0) * k rnn_to_graph_2 = np.arange(num_iter) rnn_to_graph.append(np.stack([rnn_to_graph_0, rnn_to_graph_1, rnn_to_graph_2], axis=0)) cum_sum += num_iter rnn_to_graph = np.concatenate(rnn_to_graph, axis=1) NX_cum = np.cumsum(result_out[6]) result_out = result_out + [graph_to_rnn, rnn_to_graph, NX_cum] return result_out
def get_graph_from_smiles(smiles): mol = Chem.MolFromSmiles(smiles) # build graph atom_types, atom_ranks, bonds, bond_types = [], [], [], [] for a, r in zip(mol.GetAtoms(), Chem.CanonicalRankAtoms(mol)): atom_types.append(data_struct.get_mol_spec().get_atom_type(a)) atom_ranks.append(r) for b in mol.GetBonds(): idx_1, idx_2, bt = b.GetBeginAtomIdx(), b.GetEndAtomIdx( ), data_struct.get_mol_spec().get_bond_type(b) bonds.append([idx_1, idx_2]) bond_types.append(bt) # build nx graph graph = nx.Graph() graph.add_nodes_from(range(len(atom_types))) graph.add_edges_from(bonds) return graph, atom_types, atom_ranks, bonds, bond_types
def get_graph_from_smiles_list(smiles_list): graph_list = [] for smiles in smiles_list: mol = Chem.MolFromSmiles(smiles) # build graph atom_types, bonds, bond_types = [], [], [] for a in mol.GetAtoms(): atom_types.append(data_struct.get_mol_spec().get_atom_type(a)) for b in mol.GetBonds(): idx_1, idx_2, bt = b.GetBeginAtomIdx(), b.GetEndAtomIdx( ), data_struct.get_mol_spec().get_bond_type(b) bonds.append([idx_1, idx_2]) bond_types.append(bt) X_0 = np.array(atom_types, dtype=np.int32) A_0 = np.concatenate([ np.array(bonds, dtype=np.int32), np.array(bond_types, dtype=np.int32)[:, np.newaxis] ], axis=1) graph_list.append([X_0, A_0]) return graph_list
def get_mol_from_graph(X, A, sanitize=True): try: mol = Chem.RWMol(Chem.Mol()) X, A = X.tolist(), A.tolist() for i, atom_type in enumerate(X): mol.AddAtom(data_struct.get_mol_spec().index_to_atom(atom_type)) for atom_id1, atom_id2, bond_type in A: data_struct.get_mol_spec().index_to_bond(mol, atom_id1, atom_id2, bond_type) except: return None if sanitize: try: mol = mol.GetMol() Chem.SanitizeMol(mol) return mol except: return None else: return mol
def merge_single_0(X_0, A_0, NX_0, NA_0): # shift_ids cumsum = np.cumsum(np.pad(NX_0, [[1, 0]], mode='constant')[:-1]) A_0[:, :2] += np.stack([ np.repeat(cumsum, NA_0), ] * 2, axis=1) # get D D_0_2, D_0_3 = get_d(A_0, X_0) # split A A_split = [] for i in range(data_struct.get_mol_spec().num_bond_types): A_i = A_0[A_0[:, 2] == i, :2] A_split.append(A_i) A_split.extend([D_0_2, D_0_3]) A_0 = A_split # NX_rep NX_rep_0 = np.repeat(np.arange(NX_0.shape[0]), NX_0) return X_0, A_0, NX_0, NX_rep_0