def get_multiring_atoms_bonds(self, rdk_mol: Mol, smiles): ''' Not used ''' atom_ring_times = [0] * rdk_mol.GetNumAtoms() bond_ring_times = [0] * rdk_mol.GetNumBonds() # TODO GetRingInfo gives SymmetricSSSR, not TRUE SSSR ri = rdk_mol.GetRingInfo() for id_atoms in ri.AtomRings(): for ida in id_atoms: atom_ring_times[ida] += 1 for id_bonds in ri.BondRings(): for idb in id_bonds: bond_ring_times[idb] += 1 n_atoms_multiring = len(list(filter(lambda x: x > 1, atom_ring_times))) n_bonds_multiring = len(list(filter(lambda x: x > 1, bond_ring_times))) py_mol = pybel.readstring('smi', smiles) if ri.NumRings() != len(py_mol.sssr): print( 'WARNING: SymmetricSSSR not equal to TRUE SSSR in rdkit. Use Openbabel instead:', smiles) n_atoms_multiring = pybel.Smarts('[R2]').findall(py_mol).__len__() n_bonds_multiring = n_atoms_multiring - 1 return n_atoms_multiring, n_bonds_multiring
def find_rings(mol: Mol) -> List[List[int]]: ring_info = mol.GetRingInfo() rings = ring_info.AtomRings() i2map = dict((i, a.GetAtomMapNum()) for i, a in enumerate(mol.GetAtoms())) rings_mapped = [] for ring in rings: rings_mapped.append([i2map[i] for i in ring]) return rings_mapped
def construct_mol_features(mol: rdchem.Mol, out_size: Optional[int] = -1) -> np.ndarray: """Returns the atom features of all the atoms in the molecule. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. out_size: int, optional, default=-1 The size of the returned array. If this option is negative, it does not take any effect. Otherwise, it must be larger than or equal to the number of atoms in the input molecule. If so, the end of the array is padded with zeros. Returns: -------- mol_feats: np.ndarray, shape=(n,m) Where `n` is the total number of atoms within the molecule, and `m` is the number of feats. """ # Caluclate charges and chirality of atoms within molecule rdPartialCharges.ComputeGasteigerCharges( mol) # stored under _GasteigerCharge rdmolops.AssignStereochemistry( mol) # stored under _CIPCode, see doc for more info # Retrieve atom index locations of matches HYDROGEN_DONOR = rdmolfiles.MolFromSmarts( "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]") HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts( "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" + ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," + "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]") ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]") BASIC = rdmolfiles.MolFromSmarts( "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" + ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," + "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]") hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ()) hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR), ()) acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ()) basic_match = sum(mol.GetSubstructMatches(BASIC), ()) # Get ring info ring = mol.GetRingInfo() mol_feats = [] n_atoms = mol.GetNumAtoms() for atom_idx in range(n_atoms): atom = mol.GetAtomWithIdx(atom_idx) atom_feats = [] atom_feats += one_hot(atom.GetSymbol(), [ 'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na', 'Sn', 'Se', 'other' ]) atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6]) atom_feats += one_hot(atom.GetHybridization(), list(rdchem.HybridizationType.names.values())) atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3]) g_charge = float(atom.GetProp("_GasteigerCharge")) atom_feats += [g_charge] if not np.isnan(g_charge) else [0.] atom_feats += [atom.GetIsAromatic()] atom_feats += [ ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9) ] atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) # Chirality try: atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [ atom.HasProp("_ChiralityPossible") ] except: atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")] # Hydrogen bonding atom_feats += [atom_idx in hydrogen_donor_match] atom_feats += [atom_idx in hydrogen_acceptor_match] # Is Acidic/Basic atom_feats += [atom_idx in acidic_match] atom_feats += [atom_idx in basic_match] mol_feats.append(atom_feats) if out_size < 0: return np.array(mol_feats, dtype=np.float) elif out_size >= n_atoms: # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols # NOTE: len(mol_feats[0]) is the number of feats padded_mol_feats = np.zeros((out_size, len(mol_feats[0])), dtype=np.float) padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float) return padded_mol_feats else: raise ValueError( '`out_size` (N={}) must be negative or larger than or ' 'equal to the number of atoms in the input molecules (N={}).'. format(out_size, n_atoms))