def main(name, argv): if len(argv) != 2: print_usage(name) return with open(argv[0], 'r') as f: smile = f.readline().split()[0] mol = Chem.MolFromSmiles(smile) size = mol.GetNumHeavyAtoms() hierarch = Recap.RecapDecompose(mol) children = [] for child in hierarch.GetAllChildren().keys() + [smile]: new_smiles = child.replace('[*]', '[H]') new = Chem.MolFromSmiles(new_smiles) if not new == None: new_size = new.GetNumHeavyAtoms() if new_size > 7 and new_size <= 25: if rdMolDescriptors.CalcNumRotatableBonds(new) <= 5: children.append(Chem.MolToSmiles(new, isomericSmiles=True)) #children.append(new_smiles) core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles( new_smiles, includeChirality=True) core = Chem.MolFromSmiles(core_smile) if rdMolDescriptors.CalcNumRotatableBonds( core) <= 5 and core.GetNumHeavyAtoms() > 7: children.append(core_smile) with open(argv[1], 'w') as f: i = 1 for m in set(children): if len(m) > 0: f.write(m + '\t' + str(i) + '\n') i += 1
def test1(self): " testing first 200 mols from NCI " # figure out which rotor version we are using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: rot_prop = NonStrict else: rot_prop = Strict suppl = Chem.SDMolSupplier(self.inFileName) idx = 1 for m in suppl: if m: calc = Lipinski.NHOHCount(m) orig = int(m.GetProp('NUM_LIPINSKIHDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NOCount(m) orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHDonors(m) orig = int(m.GetProp('NUM_HDONORS')) assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHAcceptors(m) orig = int(m.GetProp('NUM_HACCEPTORS')) assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumHeteroatoms(m) orig = int(m.GetProp('NUM_HETEROATOMS')) assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = Lipinski.NumRotatableBonds(m) orig = int(m.GetProp(rot_prop)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) # test the underlying numrotatable bonds calc = rdMolDescriptors.CalcNumRotatableBonds( m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict) orig = int(m.GetProp(NonStrict)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) calc = rdMolDescriptors.CalcNumRotatableBonds( m, rdMolDescriptors.NumRotatableBondsOptions.Strict) orig = int(m.GetProp(Strict)) assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % ( idx, m.GetProp('SMILES'), calc, orig) idx += 1
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m) max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=())) n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True)) fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m)) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \ max_ring_size, n_chiral_centers, round(fcsp3_bm, 3) except: sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n') return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def computeFeatures(mol): numRings = rdMolDescriptors.CalcNumRings(mol) numRotBonds = rdMolDescriptors.CalcNumRotatableBonds(mol) nitrogenCount = countNitrogens(mol) oxygenCount = countOxygens(mol) carbonCount = countCarbons(mol) boronCount = countBorons(mol) phosCount = countPhos(mol) sulfurCount = countSulfurs(mol) fluorCount = countFluorine(mol) iodCount = countIodine(mol) doubleBonds = countDoubleBonds(mol) surf_area = rdMolDescriptors.CalcLabuteASA(mol) mol_weight = rdMolDescriptors.CalcExactMolWt(mol) s_logp = rdMolDescriptors.SlogP_VSA_(mol) dist_hs = recurseMolHCount(mol) output = [ numRings, nitrogenCount, oxygenCount, carbonCount, boronCount, phosCount, sulfurCount, fluorCount, iodCount, doubleBonds, surf_area, mol_weight ] for s in s_logp: output.append(s) for d in dist_hs: output.append(dist_hs[d]) return output
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused except: sys.stderr.write( f'molecule {name} was omitted due to an error in calculation of some descriptors\n' ) return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def generate_conformations_sdf(smiles, name, max_conf=20): """ Generate conformations and save it in SDF format """ m = Chem.AddHs(Chem.MolFromSmiles(smiles)) rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m) confs = min(1 + 3*rot_bond, max_conf) AllChem.EmbedMultipleConfs(m, numConfs=confs, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) conf_list = [] for i, conf in enumerate(m.GetConformers()): tm = Chem.Mol(m, False, conf.GetId()) confname = name+"-"+str(i) writer = Chem.SDWriter(confname+".sdf") writer.write(tm) conf_list.append(confname) return conf_list
def identify_rotatable_bonds(mol): """ Identify rotatable bonds in a molecule. Right now this is an extremely crude and inaccurate method that should *not* be used for production. This misses simple cases like benzoic acids, amides, etc. Parameters ---------- mol: ROMol Input molecule Returns ------- set of 2-tuples Set of bonds identified as rotatable. """ pattern = Chem.MolFromSmarts("[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]") matches = mol.GetSubstructMatches(pattern, uniquify=1) # sanity check assert len(matches) >= rdMolDescriptors.CalcNumRotatableBonds(mol) sorted_matches = set() for i, j in matches: if j < i: i, j = j, i sorted_matches.add((i, j)) return sorted_matches
def generate_conformations(fragments, max_confs=20): for fragment in fragments: rot_bond = rdMolDescriptors.CalcNumRotatableBonds(fragment) confs = min(3 + 3 * rot_bond, max_confs) AllChem.EmbedMultipleConfs(fragment, numConfs=confs) return fragments
def properties(mol): """ Calculates the properties that are required to calculate the QED descriptor. """ if mol is None: raise ValueError('You need to provide a mol argument.') mol = Chem.RemoveHs(mol) qedProperties = QEDproperties( MW=rdmd._CalcMolWt(mol), ALOGP=Crippen.MolLogP(mol), HBA=sum( len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors if mol.HasSubstructMatch(pattern)), HBD=rdmd.CalcNumHBD(mol), PSA=MolSurf.TPSA(mol), ROTB=rdmd.CalcNumRotatableBonds(mol, rdmd.NumRotatableBondsOptions.Strict), AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol), AliphaticRings)), ALERTS=sum(1 for alert in StructuralAlerts if mol.HasSubstructMatch(alert)), ) # The replacement # AROM=Lipinski.NumAromaticRings(mol), # is not identical. The expression above tends to count more rings # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4 # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1 uses 2, should be 0 ? return qedProperties
def testNumRotatableBonds(self): for s in [ "C1CC1CC", "CCNC(=O)NCC", 'Cc1cccc(C)c1c1c(C)cccc1C', 'CCc1cccc(C)c1c1c(C)cccc1CC', 'Cc1cccc(C)c1c1c(C)nccc1C', 'Cc1cccc(C)c1c1c(C)cccc1', 'CCO', ]: m = Chem.MolFromSmiles(s) v1 = rdMD.CalcNumRotatableBonds(m) v2 = rdMD.CalcNumRotatableBonds(m, False) v3 = rdMD.CalcNumRotatableBonds(m, True) v4 = rdMD.CalcNumRotatableBonds( m, rdMD.NumRotatableBondsOptions.Default) v5 = rdMD.CalcNumRotatableBonds( m, rdMD.NumRotatableBondsOptions.NonStrict) v6 = rdMD.CalcNumRotatableBonds( m, rdMD.NumRotatableBondsOptions.Strict) v7 = rdMD.CalcNumRotatableBonds( m, rdMD.NumRotatableBondsOptions.StrictLinkages) self.assertEquals(v1, v4) self.assertEquals(v2, v5) self.assertEquals(v3, v6)
def _calculateDescriptors(mol): df = pd.DataFrame(index=[0]) df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0] df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1] df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df["TPSA"] = Descriptors.TPSA(mol) df["AMW"] = Descriptors.MolWt(mol) df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol) df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol) df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol) df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol) df["NumRings"] = rdMolDescriptors.CalcNumRings(mol) df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol) df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol) df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol) df["NumAromaticHeterocycles"] = \ rdMolDescriptors.CalcNumAromaticHeterocycles(mol) df["NumSaturatedHeterocycles"] = \ rdMolDescriptors.CalcNumSaturatedHeterocycles(mol) df["NumAliphaticHeterocycles"] = \ rdMolDescriptors.CalcNumAliphaticHeterocycles(mol) df["NumAromaticCarbocycles"] = \ rdMolDescriptors.CalcNumAromaticCarbocycles(mol) df["NumSaturatedCarbocycles"] = \ rdMolDescriptors.CalcNumSaturatedCarbocycles(mol) df["NumAliphaticCarbocycles"] = \ rdMolDescriptors.CalcNumAliphaticCarbocycles(mol) df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol) df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol) df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol) df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol) df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol) df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol) df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol) df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol) df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol) df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df["kappa1"] = rdMolDescriptors.CalcKappa1(mol) df["kappa2"] = rdMolDescriptors.CalcKappa2(mol) df["kappa3"] = rdMolDescriptors.CalcKappa3(mol) slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13)))) df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol)))) smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11)))) df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol)))) peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15)))) df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol)))) MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43)))) df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol)))) return df
def calculate_properties(self, mol): """this method calculates basic properties for the smiles returns : list of int or float (properties)""" properties = [] properties.append(mol.GetNumAtoms()) properties.append(desc.CalcCrippenDescriptors(mol)[0]) properties.append(desc.CalcTPSA(mol)) properties.append(desc.CalcNumRotatableBonds(mol)) properties.append(desc.CalcFractionCSP3(mol)) return properties
def build_library(in_smile, frags, lib, rules=os.environ["COVALIB"] + "/Code/Covalentizer/numbered_reaction.re", linker_lib=False, linker_smiles=''): argv = [in_smile, frags, lib] with open(argv[0], 'r') as f: smile = f.readline().split()[0] mol = Chem.MolFromSmiles(smile) if mol == None: return size = mol.GetNumHeavyAtoms() hierarch = Recap.RecapDecompose(mol) children = [] for child in hierarch.GetAllChildren().keys() + [smile]: new_smiles = child.replace('[*]', '[H]') new = Chem.MolFromSmiles(new_smiles) if not new == None: new_size = new.GetNumHeavyAtoms() if new_size > 7: if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds( new) <= 5: children.append(Chem.MolToSmiles(new, isomericSmiles=True)) core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles( new_smiles, includeChirality=True) core = Chem.MolFromSmiles(core_smile) if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds( core) <= 5 and core.GetNumHeavyAtoms() > 7: children.append(core_smile) with open(argv[1], 'w') as f: i = 1 for m in set(children): if len(m) > 0: f.write(m + '\t' + str(i) + '\n') i += 1 if not linker_lib: multi_react([rules, argv[1], argv[2]]) else: multi_linkers([rules, argv[1], argv[2]], linker_smiles)
def run(self, mol, logP, logP_sol, atom_pair_sol): mw = Descriptors.ExactMolWt(mol) rb = rdMolDescriptors.CalcNumRotatableBonds(mol) ap = (len(mol.GetSubstructMatches(Chem.MolFromSmarts('[a]'))) / mol.GetNumHeavyAtoms()) # Formula for estimating solubility (ESOL) esol = 0.16 - 0.63 * logP - 0.0062 * mw + 0.066 * rb - 0.74 * ap combined = np.asarray([logP_sol, atom_pair_sol, esol]).reshape(1, -1) scaled = self.scaler.transform(combined) return self.model.predict(scaled)[0]
def n_rotatable_bonds(mol): """ The number of rotatable bonds. Args: mol (skchem.Mol): The molecule for which to calculate the descriptor. Returns: int """ return rdMolDescriptors.CalcNumRotatableBonds(mol)
def calc_mol_prop(pdbfile): try: m = Chem.MolFromPDBFile(pdbfile) mw = Descriptors.ExactMolWt(m) numrbond = rdMolDescriptors.CalcNumRotatableBonds(m) numheavy = rdchem.Mol.GetNumHeavyAtoms(m) except: print("Error") return False, 0, 0, 0 return True, mw, numrbond, numheavy
def feature_fp(smiles): mol = Chem.MolFromSmiles(smiles) fp = rdMolDescriptors.MQNs_(mol) fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol)) fp.append(rdMolDescriptors.CalcExactMolWt(mol)) fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol)) fp.append(rdMolDescriptors.CalcFractionCSP3(mol)) fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)) fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)) fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol))) fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol)) fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol)) fp.append(rdMolDescriptors.CalcNumAromaticRings(mol)) fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol)) fp.append(rdMolDescriptors.CalcNumRings(mol)) fp.append(rdMolDescriptors.CalcNumAmideBonds(mol)) fp.append(rdMolDescriptors.CalcNumHeterocycles(mol)) fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol)) fp.append(rdMolDescriptors.CalcTPSA(mol)) return np.array(fp)
def main(in_file, output): Cmpds = {} InMols = rdkit_open([in_file]) print('\n # Number of input molecule: {0}'.format(len(InMols))) for mol in InMols: m = {} name = mol.GetProp('_Name').split()[0] m['Name'] = name m['Formula'] = rd.CalcMolFormula(mol) m['SMILES'] = Chem.MolToSmiles(mol) m['MW'] = rd._CalcMolWt(mol) # Molecular Weight m['logP'] = rd.CalcCrippenDescriptors(mol)[0] # Partition coefficient m['HDon'] = rd.CalcNumLipinskiHBD(mol) # Lipinski Hbond donor m['HAcc'] = rd.CalcNumLipinskiHBA(mol) # Lipinski Hbond acceptor m['TPSA'] = rd.CalcTPSA(mol) # Topological polar surface area m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1] # Molar refractivity m['AliRing'] = rd.CalcNumAliphaticRings(mol) # Aliphatic ring number m['AroRing'] = rd.CalcNumAromaticRings(mol) # Aromatic ring number # m['Stereo'] = rd.CalcNumAtomStereoCenters(mol) # Stereo center number # m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol) # unspecified stereo m['SMILES'] = Chem.MolToSmiles(mol, isomericSmiles=True, allHsExplicit=False) Cmpds[name] = m #################################### df = pd.DataFrame.from_dict(Cmpds, orient='index') df.index.name = 'Name' # Columns of data to print out Columns = [ 'Formula', 'MW', 'logP', 'HDon', 'HAcc', 'TPSA', 'Rotat', 'MolRef', 'AliRing', 'AroRing', #'Stereo', 'UnspStereo', 'SMILES', ] reorder = df[Columns] # Output to CSV reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8', float_format='%.5f', header=True ) # Output to Excel reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
def get_conformations(smiles, max_conf=20, confs=None): m = Chem.MolFromSmiles(smiles) m = Chem.AddHs(m) rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m) if confs is None: confs = min(1 + 3 * rot_bond, max_conf) AllChem.EmbedMultipleConfs(m, numConfs=confs, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) return m
def _init_smiles(self, smiles, use_etdg_confs=False): """ Initialise a Molecule object from a SMILES sting using RDKit :param smiles: (str) SMILES string :param use_etdg_confs: (bool) override the default conformer generation and use the ETDG algorithm :return: """ logger.info('Initialising a Molecule from a SMILES string') try: self.mol_obj = Chem.MolFromSmiles(smiles) self.mol_obj = Chem.AddHs(self.mol_obj) self.charge = Chem.GetFormalCharge(self.mol_obj) self.n_rot_bonds = rdMolDescriptors.CalcNumRotatableBonds( self.mol_obj) self.n_h_donors = rdMolDescriptors.CalcNumHBD(self.mol_obj) self.n_h_acceptors = rdMolDescriptors.CalcNumHBA(self.mol_obj) except: logger.error('RDKit failed to generate mol objects') return logger.info('Running conformation generation with RDKit... running') method = AllChem.ETKDGv2( ) if use_etdg_confs is False else AllChem.ETDG() method.pruneRmsThresh = 0.3 method.numThreads = Config.n_cores conf_ids = list( AllChem.EmbedMultipleConfs(self.mol_obj, numConfs=self.n_confs, params=method)) logger.info(' ... done') try: self.volume = AllChem.ComputeMolVolume(self.mol_obj) except ValueError: logger.error('RDKit failed to compute the molecular volume') return self.bonds = [(b.GetBeginAtomIdx(), b.GetEndAtomIdx()) for b in self.mol_obj.GetBonds()] self.conformers = extract_conformers_from_rdkit_mol_object( mol_obj=self.mol_obj, conf_ids=conf_ids) # Default to the first generated conformer in the absence of any other information self.set_atoms(atoms=self.conformers[0].atoms) return None
def genereate_conformers(molsmi, max_conf=20, min_conf=10, max_steps=1000): """ """ molobj = Chem.MolFromSmiles(molsmi) if molobj is None: return None molobj = Chem.AddHs(molobj) status_embed = AllChem.EmbedMolecule(molobj) if status_embed != 0: return None status_optim = AllChem.UFFOptimizeMolecule(molobj, maxIters=max_steps) # Keep unconverged uff # if status_optim != 0: # return None # Check bond lengths dist = Chem.rdmolops.Get3DDistanceMatrix(molobj) np.fill_diagonal(dist, 10.0) min_dist = np.min(dist) # For some atom_types in UFF, it will fail if min_dist < 0.001: print("fail", smilesstr) return None rot_bond = rdMolDescriptors.CalcNumRotatableBonds(molobj) confs = min(1 + 3*rot_bond, max_conf) confs = max(confs, min_conf) status = AllChem.EmbedMultipleConfs(molobj, numConfs=confs, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) return molobj
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3) else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def _embed_conformers(self, mol): if self.num_confs: num_confs = self.num_confs else: n_rot = rdMolDescriptors.CalcNumRotatableBonds(mol) if n_rot <= 7: num_confs = 50 elif n_rot <= 12: num_confs = 200 else: num_confs = 300 AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params=self.embed_parameters) return mol
def generate_lowestE_conformer(my_smiles): m = Chem.AddHs(Chem.MolFromSmiles(my_smiles)) # DECIDE how many conformers are to be computed rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m) n_confs = min(1 + 3*rot_bond,MAP_paths.MAX_CONF) # GENERATE conformers my_mol, confID_energies = generate_conformations(m, n_confs) #print(confID_energies) # find lowest energy ID min_conformer_id = min(confID_energies, key=confID_energies.get) #print(min_conformer_id, min(confID_energies.values())) return my_mol, min_conformer_id
def get_conformations(smiles, max_conf=20): m = Chem.AddHs(Chem.MolFromSmiles(smiles)) rot_bond = rdMolDescriptors.CalcNumRotatableBonds(m) confs = min(1 + 3 * rot_bond, max_conf) AllChem.EmbedMultipleConfs(m, numConfs=confs, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) conf_list = [] for i, conf in enumerate(m.GetConformers()): tm = Chem.Mol(m, False, conf.GetId()) conf_list.append(tm) return conf_list
def get_torsions(ligand): ''' Returns a list of the number of torsions for all the ligands in an input file. File must be in sdf or gzipped sdf format for now. ''' torsions = [] ext = os.path.splitext(ligand) if ext[-1] == '.sdf': f = open(ligand) elif ext[-1] == '.sdf.gz': f = gzip.open(ligand) else: print 'Ligands not provided in sdf format\n' sys.exit() suppl = Chem.ForwardSDMolSupplier(f) for mol in suppl: if mol is None: continue torsions.append(rdMD.CalcNumRotatableBonds(mol)) f.close() return torsions
def main(name, argv): if len(argv) != 3: print_usage(name) return MW = float(argv[1]) RB = int(argv[2]) #read molport building blocks with open(argv[0], 'r') as f: for line in f: line_s = line.split() molecule = [Chem.MolFromSmiles(line_s[0]), line_s[1]] if molecule[0] == None: continue if Descriptors.MolWt( molecule[0] ) <= MW and rdMolDescriptors.CalcNumRotatableBonds( molecule[0]) <= RB: print Chem.MolToSmiles(molecule[0]) + "\t" + line_s[1]
def filter_druglikeness_5_rules(self, smiles): count = 0 for i in smiles: mol = Chem.MolFromSmiles(i) mol = Chem.RemoveHs(mol) MW = rdmd._CalcMolWt(mol) ALOGP = Crippen.MolLogP(mol) HBA = rdmd.CalcNumHBA(mol) HBD = rdmd.CalcNumHBD(mol) PSA = MolSurf.TPSA(mol) ROTB = rdmd.CalcNumRotatableBonds( mol, rdmd.NumRotatableBondsOptions.Strict) if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11: smiles.remove(i) count = count + 1 print("unavaliable rule_5_drug:%i" % count) return smiles
def generate_conformers(molobj, max_conf=100, min_conf=10): status = AllChem.EmbedMolecule(molobj) status = AllChem.UFFOptimizeMolecule(molobj) rot_bond = rdMolDescriptors.CalcNumRotatableBonds(molobj) confs = min(1 + 3 * rot_bond, max_conf) confs = max(confs, min_conf) AllChem.EmbedMultipleConfs(molobj, numConfs=confs, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) res = AllChem.MMFFOptimizeMoleculeConfs(molobj) res = np.array(res) status = res[:, 0] energies = res[:, 1] return energies
def calculate_properties(self, smiles=None, mol=None, props=[]): """this method calculates basic properties for the mol returns : error (bool)""" if len(props) == 0: return True if mol is None: mol = Chem.MolFromSmiles(smiles) if mol is None: return True if 'py_formula' in props: self.data['py_formula'] = desc.CalcMolFormula(mol) if 'py_em' in props: self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5) if 'py_n_Cl_Br' in props: all_atoms = [] for atom in mol.GetAtoms(): all_atoms.append(atom.GetSymbol()) n_Cl = all_atoms.count('Cl') n_Br = all_atoms.count('Br') self.data['py_n_Cl_Br'] = n_Cl + n_Br if 'py_na' in props: self.data['py_na'] = mol.GetNumAtoms() if 'py_mw' in props: self.data['py_mw'] = desc._CalcMolWt(mol) if 'py_fsp3' in props: self.data['py_fsp3'] = desc.CalcFractionCSP3(mol) if 'py_rb' in props: self.data['py_rb'] = desc.CalcNumRotatableBonds(mol) if 'py_tpsa' in props: self.data['py_tpsa'] = desc.CalcTPSA(mol) if 'py_clogp' in props: self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0] if 'py_nar' in props: self.data['py_nar'] = desc.CalcNumAromaticRings(mol) if 'py_nhba' in props: self.data['py_nhba'] = desc.CalcNumHBA(mol) if 'py_nhbd' in props: self.data['py_nhbd'] = desc.CalcNumHBD(mol) return False