def test8MultiThreadMultiConf(self): mol = Chem.AddHs( Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")) cids = rdDistGeom.EmbedMultipleConfs(mol, 200, maxAttempts=30, randomSeed=100) energies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() energies.append(ee) mol = Chem.AddHs( Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")) cids = rdDistGeom.EmbedMultipleConfs(mol, 200, maxAttempts=30, randomSeed=100, numThreads=4) nenergies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() nenergies.append(ee) self.assertTrue(lstEq(energies, nenergies, tol=1e-6))
def test8MultiThreadMultiConf(self): if (rdBase.rdkitBuild.split('|')[2] != "MINGW"): ENERGY_TOLERANCE = 1.0e-6 MSD_TOLERANCE = 1.0e-6 else: ENERGY_TOLERANCE = 1.0 MSD_TOLERANCE = 1.0e-5 mol = Chem.AddHs(Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")) cids = rdDistGeom.EmbedMultipleConfs(mol, 200, maxAttempts=30, randomSeed=100) energies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() energies.append(ee) mol2 = Chem.AddHs(Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")) cids2 = rdDistGeom.EmbedMultipleConfs(mol2, 200, maxAttempts=30, randomSeed=100, numThreads=4) self.assertTrue(lstEq(cids, cids2)) nenergies = [] for cid in cids2: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol2, 10.0, cid) ee = ff.CalcEnergy() nenergies.append(ee) self.assertTrue(lstEq(energies, nenergies, tol=ENERGY_TOLERANCE)) for cid in cids: msd = 0.0 for i in range(mol.GetNumAtoms()): msd += (mol.GetConformer().GetAtomPosition(i) \ - mol2.GetConformer().GetAtomPosition(i)).LengthSq() msd /= mol.GetNumAtoms() self.assertTrue(msd < MSD_TOLERANCE)
def confgen(input, output, prunermsthresh, numconf, add_ref): mol = Chem.AddHs(Chem.MolFromMolFile(input), addCoords=True) refmol = Chem.AddHs(Chem.Mol(mol)) param = rdDistGeom.ETKDGv2() param.pruneRmsThresh = prunermsthresh cids = rdDistGeom.EmbedMultipleConfs(mol, numconf, param) mp = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant='MMFF94s') AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=0, mmffVariant='MMFF94s') w = Chem.SDWriter(output) if add_ref: refmol.SetProp('CID', '-1') refmol.SetProp('Energy', '') w.write(refmol) res = [] for cid in cids: ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=cid) e = ff.CalcEnergy() res.append((cid, e)) sorted_res = sorted(res, key=lambda x: x[1]) rdMolAlign.AlignMolConformers(mol) for cid, e in sorted_res: mol.SetProp('CID', str(cid)) mol.SetProp('Energy', str(e)) w.write(mol, confId=cid) w.close()
def test5Issue285(self): m = Chem.MolFromSmiles('CNC=O') cs = rdDistGeom.EmbedMultipleConfs(m, 10) for i, ci in enumerate(cs): for j in range(i + 1, len(cs)): cj = cs[j] self.assertTrue(Chem.MolToMolBlock(m, confId=ci) != Chem.MolToMolBlock(m, confId=cj))
def test4AlignConfs(self): mol = Chem.MolFromSmiles('C1CC1CNc(n2)nc(C)cc2Nc(cc34)ccc3[nH]nc4') cids = rdDistGeom.EmbedMultipleConfs(mol, 10, 30, 100) #writer = Chem.SDWriter('mol_899.sdf') for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, confId=cid) ff.Initialize() more = 1 while more: more = ff.Minimize() # FIX: this should not be necessary but somehow more comes out to be 0 # even with the structure still being crappy ff.Minimize() aids = [12, 13, 14, 15, 16, 17, 18] rdMolAlign.AlignMolConformers(mol, aids) # now test that the atom location of these atom are consistent confs = mol.GetConformers() for aid in aids: mpos = 0 for i, conf in enumerate(confs): if (i == 0): mpos = list(conf.GetAtomPosition(aid)) continue else: pos = list(conf.GetAtomPosition(aid)) self.failUnless(lstFeq(mpos, pos, .5))
def embed_conf(mol,initial_confs,args,log,coord_Map,alg_Map, mol_template): if coord_Map is None and alg_Map is None and mol_template is None: cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs,ignoreSmoothingFailures=True, randomSeed=args.seed,numThreads = 0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write("o Normal RDKit embeding process failed, trying to generate conformers with random coordinates (with "+str(initial_confs)+" possibilities)") cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0,ignoreSmoothingFailures=True, numZeroFail=1000, numThreads = 0) if args.verbose: log.write("o "+ str(len(cids))+" conformers initially generated") # case of embed for templates else: cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, randomSeed=args.seed,ignoreSmoothingFailures=True, coordMap = coord_Map,numThreads = 0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write("o Normal RDKit embeding process failed, trying to generate conformers with random coordinates (with "+str(initial_confs)+" possibilities)") cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, numZeroFail=1000,ignoreSmoothingFailures=True, coordMap = coord_Map,numThreads = 0) if args.verbose: log.write("o "+ str(len(cids))+" conformers initially generated") return cids
def _multiConfFromSmiles(smiles, nConfs=10, maxIters=500): """Adds hydrogens to molecule and optimises a chosen number of conformers. Returns the optimised RDKit mol.""" idea = Chem.MolFromSmiles(smiles) idea = Chem.AddHs(idea) confs = rdDistGeom.EmbedMultipleConfs(idea, nConfs) for conf in confs: opt = ChemicalForceFields.MMFFOptimizeMolecule(idea, confId=conf, maxIters=maxIters) return idea
def testGitHub2820(self): m = Chem.MolFromSmiles("[Na]C") self.assertIsNotNone(m) mp = ChemicalForceFields.MMFFGetMoleculeProperties(m) self.assertIsNone(mp) rdDistGeom.EmbedMultipleConfs(m, 2) res = ChemicalForceFields.MMFFOptimizeMoleculeConfs(m) self.assertEqual(len(res), 2) self.assertEqual(res[0], res[1]) self.assertEqual(res[0], (-1, -1.0))
def test6RmsPruning(self): smiles = [ 'CC(C)CC(NC(C1[N+]CCC1)=O)C([O-])=O', 'CC(NC(CO)C(O)c1ccc([N+]([O-])=O)cc1)=O', 'CC([N+])C(NC(C)C(N1C(C=O)CCC1)=O)=O', 'CC(NC1C(O)C=C(C([O-])=O)OC1C(O)C(O)CO)=O', 'CCCC=C(NC(C1CC1(C)C)=O)C([O-])=O', 'OCC(O)C(O)C(Cn1c2c(cc(C)c(C)c2)nc-2c(=O)[nH]c(=O)nc12)O' ] nconfs = [] expected = [4, 5, 5, 4, 5, 4] expected = [3, 3, 5, 4, 4, 4] for smi in smiles: mol = Chem.MolFromSmiles(smi) cids = rdDistGeom.EmbedMultipleConfs(mol, 50, maxAttempts=30, randomSeed=100, pruneRmsThresh=1.5) nconfs.append(len(cids)) d = [abs(x - y) for x, y in zip(expected, nconfs)] # print(nconfs) self.assertTrue(max(d) <= 1) # previous settings params = rdDistGeom.ETKDG() params.randomSeed = 100 params.maxIterations = 30 params.pruneRmsThresh = 1.5 params.useSymmetryForPruning = False nconfs = [] expected = [4, 5, 5, 4, 5, 4] for smi in smiles: mol = Chem.MolFromSmiles(smi) cids = rdDistGeom.EmbedMultipleConfs(mol, 50, params) nconfs.append(len(cids)) d = [abs(x - y) for x, y in zip(expected, nconfs)] # print(nconfs) self.assertTrue(max(d) <= 1)
def test3MultiConf(self): mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC") cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100) energies = [112.98, 103.57, 110.78, 100.40, 95.37, 101.64, 114.72, 112.65, 124.53, 107.50] nenergies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() nenergies.append(ee) #print(['%.2f'%x for x in nenergies]) #print(nenergies) self.assertTrue(lstEq(energies, nenergies, tol=1e-2))
def test3MultiConf(self): mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC") cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100, useExpTorsionAnglePrefs=False, useBasicKnowledge=False) energies = [116.330, 106.246, 109.816, 104.890, 93.060, 140.803, 139.253, 95.820, 123.591, 108.655] nenergies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() nenergies.append(ee) # print(['%.3f' % x for x in nenergies]) # print(nenergies) self.assertTrue(lstEq(energies, nenergies, tol=1e-2))
def test3MultiConf(self): mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC") cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100, useExpTorsionAnglePrefs=False, useBasicKnowledge=False) energies = [115.460, 105.891, 109.868, 104.415, 92.944, 140.917, 139.468, 95.081, 123.528, 107.885] nenergies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() nenergies.append(ee) #print(['%.2f' % x for x in nenergies]) # print(nenergies) self.assertTrue(lstEq(energies, nenergies, tol=1e-2))
def test3MultiConf(self): mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC") cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100) energies = [ 90.05, 77.35, 91.45, 81.82, 81.60, 75.65, 86.50, 80.35, 80.55, 73.73 ] nenergies = [] for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid) ee = ff.CalcEnergy() nenergies.append(ee) #print ['%.2f'%x for x in nenergies] #print nenergies self.failUnless(lstEq(energies, nenergies, tol=1e-2))
def test6RmsPruning(self): smiles = [ 'CC(C)CC(NC(C1[N+]CCC1)=O)C([O-])=O', 'CC(NC(CO)C(O)c1ccc([N+]([O-])=O)cc1)=O', 'CC([N+])C(NC(C)C(N1C(C=O)CCC1)=O)=O', 'CC(NC1C(O)C=C(C([O-])=O)OC1C(O)C(O)CO)=O', 'CCCC=C(NC(C1CC1(C)C)=O)C([O-])=O', 'OCC(O)C(O)C(Cn1c2c(cc(C)c(C)c2)nc-2c(=O)[nH]c(=O)nc12)O' ] nconfs = [] expected = [5, 6, 6, 6, 6, 3] for smi in smiles: mol = Chem.MolFromSmiles(smi) cids = rdDistGeom.EmbedMultipleConfs(mol, 50, maxAttempts=30, randomSeed=100, pruneRmsThresh=1.5) nconfs.append(len(cids)) d = [abs(x - y) for x, y in zip(expected, nconfs)] self.assertTrue(max(d) <= 1)
def testOptimizeMoleculeConfs(self): m = Chem.AddHs(Chem.MolFromSmiles("CCCO")) self.assertIsNotNone(m) cids = rdDistGeom.EmbedMultipleConfs(m, numConfs=10) self.assertEqual(len(cids), 10) mp = ChemicalForceFields.MMFFGetMoleculeProperties(m) ff = ChemicalForceFields.MMFFGetMoleculeForceField(m, mp) before = [ ChemicalForceFields.MMFFGetMoleculeForceField( m, mp, confId=cid).CalcEnergy() for cid in cids ] res, after = tuple( zip(*ChemicalForceFields.OptimizeMoleculeConfs(m, ff, maxIters=200))) self.assertEqual(len(res), 10) self.assertEqual(len(before), len(after)) self.assertTrue(all(map(lambda i: i == 0, res))) self.assertTrue(all(after[i] < b for i, b in enumerate(before)))
def test4AlignConfs(self): mol = Chem.MolFromSmiles('C1CC1CNc(n2)nc(C)cc2Nc(cc34)ccc3[nH]nc4') cids = rdDistGeom.EmbedMultipleConfs(mol, 10, 30, 100) #writer = Chem.SDWriter('mol_899.sdf') for cid in cids: ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, confId=cid) ff.Initialize() more = 1 while more: more = ff.Minimize() # FIX: this should not be necessary but somehow more comes out to be 0 # even with the structure still being crappy ff.Minimize() aids = [12, 13, 14, 15, 16, 17, 18] rdMolAlign.AlignMolConformers(mol, aids) # now test that the atom location of these atom are consistent confs = mol.GetConformers() for aid in aids: mpos = 0 for i, conf in enumerate(confs): if (i == 0): mpos = list(conf.GetAtomPosition(aid)) continue else: pos = list(conf.GetAtomPosition(aid)) self.assertTrue(lstFeq(mpos, pos, .5)) # now test that we can get a list of RMS values rmsvals = [] rdMolAlign.AlignMolConformers(mol, aids, RMSlist=rmsvals) self.assertTrue((len(rmsvals) == mol.GetNumConformers() - 1)) # make sure something sensible happens if we provide a stupid # argument: rmsvals = 4 self.assertRaises(AttributeError, rdMolAlign.AlignMolConformers, mol, atomIds=aids, RMSlist=rmsvals)
def summ_search(mol, name, args, log, dup_data, dup_data_idx, coord_Map=None, alg_Map=None, mol_template=None): '''embeds core conformers, then optimizes and filters based on RMSD. Finally the rotatable torsions are systematically rotated''' sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + args.output) Chem.SanitizeMol(mol) mol = Chem.AddHs(mol) mol.SetProp("_Name", name) # detects and applies auto-detection of initial number of conformers if args.sample == 'auto': initial_confs = int(auto_sampling(args.auto_sample, mol, log)) else: initial_confs = int(args.sample) # dup_data.at[dup_data_idx, 'Molecule'] = name dup_data.at[dup_data_idx, 'RDKIT-Initial-samples'] = initial_confs if args.nodihedrals == False: rotmatches = getDihedralMatches(mol, args.heavyonly, log) else: rotmatches = [] if len(rotmatches) > args.max_torsions: log.write("x Too many torsions (%d). Skipping %s" % (len(rotmatches), (name + args.output))) status = -1 else: if coord_Map == None and alg_Map == None and mol_template == None: if args.etkdg: ps = Chem.ETKDG() ps.randomSeed = args.seed ps.ignoreSmoothingFailures = True ps.numThreads = 0 cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, params=ps) else: cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, ignoreSmoothingFailures=True, randomSeed=args.seed, numThreads=0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write( "o conformers initially sampled with random coordinates") cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, ignoreSmoothingFailures=True, numZeroFail=1000, numThreads=0) if args.verbose: log.write("o " + str(len(cids)) + " conformers initially sampled") # case of embed for templates else: if args.etkdg: ps = Chem.ETKDG() ps.randomSeed = args.seed ps.coordMap = coord_Map ps.ignoreSmoothingFailures = True ps.numThreads = 0 cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, params=ps) else: cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, ignoreSmoothingFailures=True, coordMap=coord_Map, numThreads=0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write( "o conformers initially sampled with random coordinates") cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, numZeroFail=1000, ignoreSmoothingFailures=True, coordMap=coord_Map, numThreads=0) if args.verbose: log.write("o " + str(len(cids)) + " conformers initially sampled") #energy minimize all to get more realistic results #identify the atoms and decide Force Field for atom in mol.GetAtoms(): if atom.GetAtomicNum() > 36: #upto Kr for MMFF, if not use UFF args.ff = "UFF" #log.write("UFF is used because there are atoms that MMFF doesn't recognise") if args.verbose: log.write("o Optimizing " + str(len(cids)) + " initial conformers with" + args.ff) if args.verbose: if args.nodihedrals == False: log.write("o Found " + str(len(rotmatches)) + " rotatable torsions") # for [a,b,c,d] in rotmatches: # log.write(' '+mol.GetAtomWithIdx(a).GetSymbol()+str(a+1)+ mol.GetAtomWithIdx(b).GetSymbol()+str(b+1)+ mol.GetAtomWithIdx(c).GetSymbol()+str(c+1)+mol.GetAtomWithIdx(d).GetSymbol()+str(d+1)) else: log.write("o Systematic torsion rotation is set to OFF") cenergy, outmols = [], [] bar = IncrementalBar('o Minimizing', max=len(cids)) for i, conf in enumerate(cids): if coord_Map == None and alg_Map == None and mol_template == None: if args.ff == "MMFF": GetFF = Chem.MMFFGetMoleculeForceField( mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf) elif args.ff == "UFF": GetFF = Chem.UFFGetMoleculeForceField(mol, confId=conf) else: log.write(' Force field {} not supported!'.format( args.ff)) sys.exit() GetFF.Initialize() converged = GetFF.Minimize(maxIts=args.opt_steps_RDKit) energy = GetFF.CalcEnergy() cenergy.append(GetFF.CalcEnergy()) #if args.verbose: # log.write("- conformer", (i+1), "optimized: ", args.ff, "energy", GetFF.CalcEnergy()) #id template realign before doing calculations else: num_atom_match = mol.GetSubstructMatch(mol_template) # Force field parameters if args.ff == "MMFF": GetFF = lambda mol, confId=conf: Chem.MMFFGetMoleculeForceField( mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf) elif args.ff == "UFF": GetFF = lambda mol, confId=conf: Chem.UFFGetMoleculeForceField( mol, confId=conf) else: log.write(' Force field {} not supported!'.format( options.ff)) sys.exit() getForceField = GetFF # clean up the conformation ff_temp = getForceField(mol, confId=conf) for k, idxI in enumerate(num_atom_match): for l in range(k + 1, len(num_atom_match)): idxJ = num_atom_match[l] d = coord_Map[idxI].Distance(coord_Map[idxJ]) ff_temp.AddDistanceConstraint(idxI, idxJ, d, d, 10000) ff_temp.Initialize() #reassignned n from 4 to 10 for better embed and minimzation n = 10 more = ff_temp.Minimize() while more and n: more = ff_temp.Minimize() n -= 1 energy = ff_temp.CalcEnergy() # rotate the embedded conformation onto the core_mol: rms = rdMolAlign.AlignMol(mol, mol_template, prbCid=conf, atomMap=alg_Map, reflect=True, maxIters=100) # elif len(num_atom_match) == 5: # ff_temp = GetFF(mol, confId=conf) # conf_temp = mol_template.GetConformer() # for k in range(mol_template.GetNumAtoms()): # p = conf_temp.GetAtomPosition(k) # q = mol.GetConformer(conf).GetAtomPosition(k) # pIdx = ff_temp.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 # ff_temp.AddDistanceConstraint(pIdx, num_atom_match[k], 0, 0, 10000) # ff_temp.Initialize() # n = 10 # more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5) # while more and n: # more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5) # n -= 1 # # realign # energy = ff_temp.CalcEnergy() # rms = rdMolAlign.AlignMol(mol, mol_template,prbCid=conf, atomMap=alg_Map,reflect=True,maxIters=50) cenergy.append(energy) # outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs" # conformers. We do this to SetProp (Name and Energy) to the different conformers # and log.write in the SDF file. At the end, since all the mol objects has the same # conformers, but the energies are different, we can log.write conformers to SDF files # with the energies of the parent mol objects. We measured the computing time and # it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp # SetProp correctly pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol) bar.next() bar.finish() for i, cid in enumerate(cids): outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1)) outmols[cid].SetProp('Energy', cenergy[cid]) cids = list(range(len(outmols))) sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) log.write("\n\no Filters after intial embedding of " + str(initial_confs) + " conformers") selectedcids, selectedcids_initial, eng_dup, eng_rms_dup = [], [], -1, -1 bar = IncrementalBar('o Filtering based on energy (pre-filter)', max=len(sortedcids)) for i, conf in enumerate(sortedcids): # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if i == 0: selectedcids_initial.append(conf) # check rmsd for seenconf in selectedcids_initial: E_diff = abs(cenergy[conf] - cenergy[seenconf]) # in kcal/mol if E_diff < args.initial_energy_threshold: eng_dup += 1 excluded_conf = True break if excluded_conf == False: if conf not in selectedcids_initial: selectedcids_initial.append(conf) bar.next() bar.finish() if args.verbose == True: log.write("o " + str(eng_dup) + " Duplicates removed pre-energy filter (E < " + str(args.initial_energy_threshold) + " kcal/mol )") #reduce to unique set if args.verbose: log.write("o Removing duplicate conformers ( RMSD < " + str(args.rms_threshold) + " and E difference < " + str(args.energy_threshold) + " kcal/mol)") bar = IncrementalBar('o Filtering based on energy and rms', max=len(selectedcids_initial)) #check rmsd for i, conf in enumerate(selectedcids_initial): #set torsions to same value for m in rotmatches: rdMolTransforms.SetDihedralDeg( outmols[conf].GetConformer(conf), *m, 180.0) # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if i == 0: selectedcids.append(conf) # check rmsd for seenconf in selectedcids: E_diff = abs(cenergy[conf] - cenergy[seenconf]) # in kcal/mol if E_diff < args.energy_threshold: rms = get_conf_RMS(outmols[conf], outmols[conf], seenconf, conf, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: excluded_conf = True eng_rms_dup += 1 break if excluded_conf == False: if conf not in selectedcids: selectedcids.append(conf) bar.next() bar.finish() # unique_mols, unique_energies = [],[] # for id in selectedcids: # unique_mols.append(outmols[id]) # unique_energies.append(cenergy[id]) # log.write(unique_mols[0:2].GetConformers()[0].GetPositions()) if args.verbose == True: log.write("o " + str(eng_rms_dup) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol) after rotation") if args.verbose: log.write("o " + str(len(selectedcids)) + " unique (ignoring torsions) starting conformers remain") dup_data.at[dup_data_idx, 'RDKit-energy-duplicates'] = eng_dup dup_data.at[dup_data_idx, 'RDKit-RMS-and-energy-duplicates'] = eng_rms_dup dup_data.at[dup_data_idx, 'RDKIT-Unique-conformers'] = len(selectedcids) # now exhaustively drive torsions of selected conformers n_confs = int(len(selectedcids) * (360 / args.degree)**len(rotmatches)) if args.verbose and len(rotmatches) != 0: log.write("\n\no Systematic generation of " + str(n_confs) + " confomers") bar = IncrementalBar( 'o Generating conformations based on dihedral rotation', max=len(selectedcids)) else: bar = IncrementalBar('o Generating conformations', max=len(selectedcids)) total = 0 for conf in selectedcids: #log.write(outmols[conf]) total += genConformer_r(outmols[conf], conf, 0, rotmatches, args.degree, sdwriter, args, outmols[conf].GetProp('_Name'), log) bar.next() bar.finish() if args.verbose and len(rotmatches) != 0: log.write("o %d total conformations generated" % total) status = 1 sdwriter.close() #getting the energy from and mols after rotations if len(rotmatches) != 0: rdmols = Chem.SDMolSupplier(name + '_' + 'rdkit' + args.output, removeHs=False) if rdmols is None: log.write("Could not open " + name + args.output) sys.exit(-1) bar = IncrementalBar( 'o Filtering based on energy and rms after rotation of dihedrals', max=len(rdmols)) sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + '_' + 'rotated' + args.output) rd_count = 0 rd_selectedcids, rd_dup_energy, rd_dup_rms_eng = [], -1, 0 for i in range(len(rdmols)): # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if rd_count == 0: rd_selectedcids.append(i) if args.metal_complex == True: for atom in rdmols[i].GetAtoms(): if atom.GetSymbol() == 'I' and ( len(atom.GetBonds()) == 6 or len(atom.GetBonds()) == 5 or len(atom.GetBonds()) == 4 or len(atom.GetBonds()) == 3 or len(atom.GetBonds()) == 2): for el in elementspt: if el.symbol == args.metal: atomic_number = el.number atom.SetAtomicNum(atomic_number) sdwriter.write(rdmols[i]) # Only the first ID gets included rd_count = 1 # check rmsd for j in rd_selectedcids: if abs( float(rdmols[i].GetProp('Energy')) - float(rdmols[j].GetProp('Energy')) ) < args.initial_energy_threshold: # comparison in kcal/mol excluded_conf = True rd_dup_energy += 1 break if abs( float(rdmols[i].GetProp('Energy')) - float(rdmols[j].GetProp('Energy')) ) < args.energy_threshold: # in kcal/mol rms = get_conf_RMS(rdmols[i], rdmols[j], -1, -1, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: excluded_conf = True rd_dup_rms_eng += 1 break if excluded_conf == False: if args.metal_complex == True: for atom in rdmols[i].GetAtoms(): if atom.GetSymbol() == 'I' and ( len(atom.GetBonds()) == 6 or len(atom.GetBonds()) == 5 or len(atom.GetBonds()) == 4 or len(atom.GetBonds()) == 3 or len(atom.GetBonds()) == 2): for el in elementspt: if el.symbol == args.metal: atomic_number = el.number atom.SetAtomicNum(atomic_number) sdwriter.write(rdmols[i]) if i not in rd_selectedcids: rd_selectedcids.append(i) bar.next() bar.finish() sdwriter.close() if args.verbose == True: log.write("o " + str(rd_dup_energy) + " Duplicates removed initial energy ( E < " + str(args.initial_energy_threshold) + " kcal/mol )") if args.verbose == True: log.write("o " + str(rd_dup_rms_eng) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol) after rotation") if args.verbose == True: log.write("o " + str(len(rd_selectedcids)) + " unique (after torsions) conformers remain") #filtering process after rotations dup_data.at[dup_data_idx, 'RDKIT-Rotated-conformers'] = total dup_data.at[dup_data_idx, 'RDKIT-Rotated-Unique-conformers'] = len(rd_selectedcids) return status
def test6Chirality(self): # turn on chirality and we should get chiral volume that is pretty consistent and # positive tgtVol = 13.0 smiles = "Cl[C@](C)(F)Br" mol = Chem.MolFromSmiles(smiles) cids = rdDistGeom.EmbedMultipleConfs(mol, 30, maxAttempts=30, randomSeed=100) self.assertTrue(len(cids) == 30) for cid in cids: conf = mol.GetConformer(cid) vol = computeChiralVol(conf.GetAtomPosition(0), conf.GetAtomPosition(2), conf.GetAtomPosition(3), conf.GetAtomPosition(4)) self.assertTrue(abs(vol - tgtVol) < 1) # turn of chirality and now we should see both chiral forms smiles = "ClC(C)(F)Br" mol = Chem.MolFromSmiles(smiles) cids = rdDistGeom.EmbedMultipleConfs(mol, 30, maxAttempts=30, randomSeed=120) self.assertTrue(len(cids) == 30) nPos = 0 nNeg = 0 for cid in cids: conf = mol.GetConformer(cid) vol = computeChiralVol(conf.GetAtomPosition(0), conf.GetAtomPosition(2), conf.GetAtomPosition(3), conf.GetAtomPosition(4)) self.assertTrue(abs(vol - tgtVol) < 1 or abs(vol + tgtVol) < 1) if vol < 0: nNeg += 1 else: nPos += 1 self.assertTrue(nPos > 0) self.assertTrue(nNeg > 0) tgtVol = 5.0 for i in range(10): smiles = "Cl[C@H](F)Br" mol = Chem.MolFromSmiles(smiles) ci = rdDistGeom.EmbedMolecule(mol, 30, (i + 1) * 10) conf = mol.GetConformer(ci) vol = computeChiralVol(conf.GetAtomPosition(0), conf.GetAtomPosition(1), conf.GetAtomPosition(2), conf.GetAtomPosition(3)) self.assertTrue(abs(vol - tgtVol) < 1, "%s %s" % (vol, tgtVol)) tgtVol = 3.5 expected = [ -3.62, -3.67, -3.72, 3.91, 3.95, 3.98, 3.90, 3.94, 3.98, 3.91 ] nPos = 0 nNeg = 0 for i in range(30): smiles = "ClC(F)Br" mol = Chem.MolFromSmiles(smiles) ci = rdDistGeom.EmbedMolecule(mol, 30, (i + 1) * 10) conf = mol.GetConformer(ci) vol = computeChiralVol(conf.GetAtomPosition(0), conf.GetAtomPosition(1), conf.GetAtomPosition(2), conf.GetAtomPosition(3)) self.assertTrue(abs(vol - tgtVol) < 1 or abs(vol + tgtVol) < 1) if vol < 0: nNeg += 1 else: nPos += 1 self.assertTrue(nPos > 0) self.assertTrue(nNeg > 0) smiles = "Cl[C@H](F)Br" m = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(m) cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100) self.assertTrue(len(cids) == 10) tgtVol = 10.5 for cid in cids: conf = mol.GetConformer(cid) vol = computeChiralVol(conf.GetAtomPosition(0), conf.GetAtomPosition(2), conf.GetAtomPosition(3), conf.GetAtomPosition(4)) self.assertTrue(abs(vol - tgtVol) < 2.) # let's try a little more complicated system expectedV1 = -2.0 expectedV2 = -2.9 for i in range(5): smi = "C1=CC=C(C=C1)[C@H](OC1=C[NH]N=C1)C(=O)[NH]C[C@H](Cl)C1=CC=NC=C1" mol = Chem.MolFromSmiles(smi) ci = rdDistGeom.EmbedMolecule(mol, randomSeed=(i + 1) * 15) self.assertTrue(ci >= 0) ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, ci) ff.Minimize() conf = mol.GetConformer(ci) vol1 = computeChiralVol(conf.GetAtomPosition(6), conf.GetAtomPosition(3), conf.GetAtomPosition(7), conf.GetAtomPosition(13)) self.assertTrue( abs(vol1 - expectedV1) < 1 or abs(vol1 + expectedV1) < 1) if vol1 < 0: nNeg += 1 else: nPos += 1 vol2 = computeChiralVol(conf.GetAtomPosition(17), conf.GetAtomPosition(16), conf.GetAtomPosition(18), conf.GetAtomPosition(19)) self.assertTrue( abs(vol2 - expectedV2) < 1 or abs(vol2 + expectedV2) < 1) # remove the chiral specification and we should see other chiral # forms of the compound expectedV1 = 2.0 #[-2.30, -2.31, -2.30, 2.30, -1.77] expectedV2 = 2.8 #[2.90, 2.89, 2.69, -2.90, -2.93] self.assertTrue(nPos > 0) self.assertTrue(nNeg > 0) for i in range(5): smi = "C1=CC=C(C=C1)C(OC1=C[NH]N=C1)C(=O)[NH]CC(Cl)C1=CC=NC=C1" mol = Chem.MolFromSmiles(smi) ci = rdDistGeom.EmbedMolecule(mol, 30, (i + 1) * 10) ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, ci) ff.Minimize() conf = mol.GetConformer(ci) vol1 = computeChiralVol(conf.GetAtomPosition(6), conf.GetAtomPosition(3), conf.GetAtomPosition(7), conf.GetAtomPosition(13)) vol2 = computeChiralVol(conf.GetAtomPosition(17), conf.GetAtomPosition(16), conf.GetAtomPosition(18), conf.GetAtomPosition(19)) self.assertTrue(abs(abs(vol1) - expectedV1) < 1.0) self.assertTrue(abs(abs(vol2) - expectedV2) < 1.0)
#suppl = Chem.SDMolSupplier('./platinum_dataset_2017_01.sdf', removeHs=False) df = pd.read_csv('molecules_with_logS.csv') smiles = df.iloc[: , 1] mols = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) mols.append(mol) mol = mols[1201] #1 Conformer generation pm = rdDistGeom.ETKDGv2() m_h = Chem.AddHs(mol) cids = rdDistGeom.EmbedMultipleConfs(m_h, number_of_conformation, pm) print(m_h.GetNumConformers()) #2. MMFF optimization and calculation energy = [] prop = AllChem.MMFFGetMoleculeProperties(m_h) for cid in cids: mmff = AllChem.MMFFGetMoleculeForceField(m_h, prop, confId=cid) mmff.Minimize() energy.append(mmff.CalcEnergy()) energy = np.array(energy) # 3. Calculation for RMS m = Chem.RemoveHs(m_h) rms_mat = AllChem.GetConformerRMSMatrix(m)