def OEMolToMorganFingerprint(mol): smiles = OECreateIsoSmiString(mol) rdkmol = Chem.MolFromSmiles(smiles) if rdkmol: Chem.SanitizeMol(rdkmol) AllChem.AssignStereochemistry(rdkmol) fp = AllChem.GetMorganFingerprintAsBitVect(rdkmol, 4, useChirality=True) return fp else: return False
def update_stereochemistry(self, conformer=-1): """ Updates stereochemistry tags in :attr:`Molecule.mol`. Parameters ---------- conformer : :class:`int`, optional The conformer to use. Returns ------- None : :class:`NoneType` """ for atom in self.mol.GetAtoms(): atom.UpdatePropertyCache() rdkit.AssignAtomChiralTagsFromStructure(self.mol, conformer) rdkit.AssignStereochemistry(self.mol, True, True, True)
def canonicalize_smiles(smiles, sanitize=True, iso=False, SLN=False): """Canonicalize given SMILES string The function is a wrapper around RDKIT function :argumnts: smiles -- (string) a compound in SMILES format sanitize -- (bool) sanitize the molecule iso -- (bool) include isomeric data in SMILES SLN -- (bool) is the molecule given in SLN format :return: canonicalized SMILES """ if SLN: smiles_ = cirpy.resolve(smiles, "smiles") mol = Chem.MolToSmiles( Chem.MolFromSmiles(smiles_), canonical=True, isomericSmiles=iso ) else: mol = Chem.MolToSmiles( Chem.MolFromSmiles(smiles), canonical=True, isomericSmiles=iso ) mol = Chem.MolFromSmiles(mol) if sanitize: mol.UpdatePropertyCache(strict=False) mol = Chem.RemoveHs( mol, implicitOnly=False, updateExplicitCount=True, sanitize=True ) Chem.SanitizeMol( mol, Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, catchErrors=False ) AllChem.AssignStereochemistry( mol, cleanIt=True, force=True, flagPossibleStereoCenters=True ) return Chem.MolToSmiles(mol, canonical=True, isomericSmiles=iso) else: return Chem.MolToSmiles(mol, canonical=True, isomericSmiles=iso)
def smilesToRDKitMol(smiles): rdkmol = Chem.MolFromSmiles(smiles) Chem.SanitizeMol(rdkmol) AllChem.AssignStereochemistry(rdkmol) return rdkmol
def standardize(compound: AllChem.Mol, add_hs=True, remove_stereo=True, thorough=False) -> AllChem.Mol: """ Standardizes an RDKit molecule by running various cleanup and sanitization operations. Parameters ---------- compound : rdkit.Chem.rdchem.Mol A chemical compound. add_hs : bool If True, adds hydrogens to the compound. remove_stereo : bool If True, removes stereochemistry info from the compound. thorough : bool If True, removes charge, isotopes, and small fragments from the compound. Returns ------- rdkit.Chem.rdchem.Mol The standardized compound. """ # basic cleanup Chem.Cleanup(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) AllChem.AssignStereochemistry(compound, cleanIt=True, force=True, flagPossibleStereoCenters=True) # remove isotopes, neutralize charge if thorough: for atom in compound.GetAtoms(): atom.SetIsotope(0) compound = _neutralize_charge(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # remove stereochemistry if remove_stereo: Chem.RemoveStereochemistry(compound) # commute inchi compound = _commute_inchi(compound) # keep biggest fragment if thorough: compound = _strip_small_fragments(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # neutralize charge compound = _neutralize_charge(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # add protons if add_hs: return Chem.AddHs(compound, explicitOnly=False, addCoords=True) return compound
def update_stereochemistry(mol): for atom in mol.GetAtoms(): atom.UpdatePropertyCache() rdkit.AssignAtomChiralTagsFromStructure(mol) rdkit.AssignStereochemistry(mol, True, True, True)
def calculate_metrics(mol): # calculate chemical descriptors ## % of sp3 carbons pct_sp3 = Lipinski.FractionCSP3(mol) ## H bond donors/acceptors h_acceptor = Lipinski.NumHAcceptors(mol) h_donor = Lipinski.NumHDonors(mol) ## number of rotable bonds n_bonds = mol.GetNumBonds() if n_bonds > 0: rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds else: rot_bonds = 0 ## number of rings, aromatic and aliphatic n_rings = Lipinski.RingCount(mol) n_rings_ali = Lipinski.NumAliphaticRings(mol) n_rings_aro = Lipinski.NumAromaticRings(mol) ## number of stereocentres Chem.AssignStereochemistry(mol) n_stereo = CalcNumAtomStereoCenters(mol) ## polarity tpsa = Chem.CalcTPSA(mol) ## hydrophobicity logP = Descriptors.MolLogP(mol) ## molecular weight mw = Descriptors.MolWt(mol) ## in Lipinski space? Ro5 = in_Ro5(mol) ## % heteroatoms n_atoms = len(mol.GetAtoms()) pct_hetero = Lipinski.NumHeteroatoms(mol) / n_atoms ## number of each atom symbols = [atom.GetSymbol() for atom in mol.GetAtoms()] atom_counts = Counter(symbols) ## Murcko scaffolds murcko = Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol)) ## NP-likeness try: np_score = calculateNPScore(mol, np_mod) except ValueError: np_score = None ## synthetic accessibility try: sa_score = calculateSAScore(mol, sa_mod) except ValueError: sa_score = None ## topological complexity bertz_idx = BertzCT(mol) # create dict metrics = { '% sp3 carbons': pct_sp3, 'H bond acceptors': h_acceptor, 'H bond donors': h_donor, '% rotatable bonds': rot_bonds, 'Rings': n_rings, 'Rings, aliphatic': n_rings_ali, 'Rings, aromatic': n_rings_aro, 'Stereocentres': n_stereo, 'Topological polar surface area': tpsa, 'LogP': logP, 'Molecular weight': mw, 'Lipinski rule of 5': Ro5, '% heteroatoms': pct_hetero, 'Murcko scaffold': murcko, 'NP-likeness score': np_score, 'Synthetic accessibility score': sa_score, 'Bertz topological complexity': bertz_idx } # append atom counts for key in atom_counts.keys(): metrics['Atoms with symbol ' + key] = atom_counts[key] return (metrics)