def mol_from_smiles(smiles, name, standardise=False): """Generate a n RDKit `PropertyMol` from SMILES string. Parameters ---------- smile : str SMILES string name : str Name of molecule standardise : bool Clean Mol through standardisation Returns ------- RDKit PropertyMol : Molecule. """ mol = rdkit.Chem.MolFromSmiles(smiles) if mol is None: logging.error("Mol creation failed from SMILES: {!r}".format( (smiles, name))) return None if standardise: mol = mol_to_standardised_mol(mol, name) mol = PropertyMol(mol) mol.SetProp("_Name", name) mol.SetProp("_SMILES", smiles) return mol
def mol_from_mol2(mol2_file, name=None, standardise=False): """Read a mol2 file into an RDKit `PropertyMol`. Parameters ---------- mol2_file : str path to a mol2 file name : str, optional Name of molecule. If not provided, uses file basename as name standardise : bool Clean mol through standardisation Returns ------- RDKit PropertyMol : Molecule. """ if name is None: name = os.path.splitext(os.path.basename(mol2_file))[0] mol = rdkit.Chem.MolFromMol2File(mol2_file) if standardise: mol = mol_to_standardised_mol(mol, name) mol = PropertyMol(mol) mol.SetProp("_Name", name) return mol
def mult_min(name, args, program,log,dup_data,dup_data_idx): # read SDF files from RDKit optimization inmols = rdkit_sdf_read(name, args, log) cenergy, outmols = [],[] if args.verbose: log.write("\n\no Multiple minimization of "+ name+args.output+ " with "+ program) bar = IncrementalBar('o Minimizing', max = len(inmols)) for i,mol in enumerate(inmols): bar.next() if mol is not None: # optimize this structure and record the energy mol,energy = optimize(mol, args, program,log,dup_data,dup_data_idx) pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol) cenergy.append(energy) # if SQM energy exists, overwrite RDKIT energies and geometries cids = list(range(len(outmols))) sorted_all_cids = sorted(cids, key = lambda cid: cenergy[cid]) name_mol = name.split('_rdkit')[0] for i, cid in enumerate(sorted_all_cids): outmols[cid].SetProp('_Name', name_mol +' conformer ' + str(i+1)) outmols[cid].SetProp('Energy', cenergy[cid]) log.write("\n\no Applying filters to intial conformers") # filter based on energy window ewin_rdkit sortedcids = ewin_filter(sorted_all_cids,cenergy,args,dup_data,dup_data_idx,log,'xtb_ani') # pre-filter based on energy only selectedcids_initial = pre_E_filter(sortedcids,cenergy,args,dup_data,dup_data_idx,log,'xtb_ani') # filter based on energy and RMSD selectedcids = RMSD_and_E_filter(outmols,selectedcids_initial,cenergy,args,dup_data,dup_data_idx,log,'xtb_ani') if program == 'xtb': dup_data.at[dup_data_idx, 'xTB-Initial-samples'] = len(inmols) if program == 'ani': dup_data.at[dup_data_idx, 'ANI1ccx-Initial-samples'] = len(inmols) # write the filtered, ordered conformers to external file write_confs(outmols, cenergy,selectedcids, name, args, program,log)
def min_and_E_calc(mol,cids,args,log,coord_Map,alg_Map,mol_template): cenergy,outmols = [],[] bar = IncrementalBar('o Minimizing', max = len(cids)) for _, conf in enumerate(cids): if coord_Map is None and alg_Map is None and mol_template is None: GetFF = minimize_rdkit_energy(mol,conf,args,log) cenergy.append(GetFF.CalcEnergy()) # id template realign before doing calculations else: mol,GetFF = realign_mol(mol,conf,coord_Map, alg_Map, mol_template,args,log) cenergy.append(GetFF.CalcEnergy()) # outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs" conformers. We do this to SetProp (Name and Energy) to the different conformers # and log.write in the SDF file. At the end, since all the mol objects has the same conformers, but the energies are different, we can log.write conformers to SDF files # with the energies of the parent mol objects. We measured the computing time and it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp SetProp correctly pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol) bar.next() bar.finish() return outmols,cenergy
def filter_conformers(self, mol): """Filter conformers which do not meet an RMSD threshold. Parameters ---------- mol : RDKit Mol Molecule. Returns ------- A new RDKit Mol containing the chosen conformers, sorted by increasing energy. """ logging.debug("Pruning conformers for %s" % mol.GetProp('_Name')) energies = self.get_conformer_energies(mol) energy_below_threshold = np.ones_like(energies, dtype=np.bool_) sort = np.argsort(energies) # sort by increasing energy confs = np.array(mol.GetConformers()) # remove hydrogens to speed up substruct match mol = Chem.RemoveHs(mol) accepted = [] # always accept lowest-energy conformer rejected = [] rmsds = np.zeros((confs.shape[0], confs.shape[0]), dtype=np.float) for i, fit_ind in enumerate(sort): accepted_num = len(accepted) # always accept lowest-energy conformer if accepted_num == 0: accepted.append(fit_ind) # pre-compute if Es are in acceptable range of min E if self.max_energy_diff != -1.: energy_below_threshold = ( energies <= energies[fit_ind] + self.max_energy_diff) # reject conformers after first_conformers is reached if accepted_num >= self.first_conformers: rejected.append(fit_ind) continue # check if energy is too high if not energy_below_threshold[fit_ind]: rejected.append(fit_ind) continue # get RMSD to selected conformers these_rmsds = np.zeros((accepted_num, ), dtype=np.float) # reverse so all confs aligned to lowest energy for j, accepted_ind in self.reverse_enumerate(accepted): this_rmsd = AllChem.GetBestRMS(mol, mol, confs[accepted_ind].GetId(), confs[fit_ind].GetId()) # reject conformers within the RMSD threshold if this_rmsd < self.rmsd_cutoff: rejected.append(fit_ind) break else: these_rmsds[-j - 1] = this_rmsd else: rmsds[fit_ind, accepted] = these_rmsds rmsds[accepted, fit_ind] = these_rmsds accepted.append(fit_ind) # slice and order rmsds and energies to match accepted list rmsds = rmsds[np.ix_(accepted, accepted)] energies = energies[accepted] # create a new molecule with all conformers, sorted by energy new = PropertyMol.PropertyMol(mol) new.RemoveAllConformers() conf_ids = [conf.GetId() for conf in mol.GetConformers()] for i in accepted: conf = mol.GetConformer(conf_ids[i]) new.AddConformer(conf, assignId=True) logging.debug("Conformers filtered for %s" % mol.GetProp('_Name')) return new, np.asarray(accepted, dtype=np.int), energies, rmsds
def mult_min(mol, name,args): '''optimizes a bunch of molecules and then checks for unique conformers and then puts in order of energy''' opt = True # switch to off for single point only opt_precision = 0.005 # toggle for optimization convergence #adjust opt convergence criteria (args.convergence defaults to 1.0) opt_precision = opt_precision * args.convergence inmols = Chem.SDMolSupplier(name+output, removeHs=False) if inmols is None: print("Could not open ", name+output) sys.exit(-1) c_converged, c_energy, outmols = [], [], [] ani_energy,xtb_energy = 0,0 if args.ANI1ccx == True or args.xtb == True: SQM_energy, SQM_cartesians = [], [] globmin = None for i,mol in enumerate(inmols): conf = 1 if mol is not None: if args.ff == "MMFF": GetFF = Chem.MMFFGetMoleculeForceField(mol, Chem.MMFFGetMoleculeProperties(mol)) elif args.ff == "UFF": GetFF = Chem.UFFGetMoleculeForceField(mol) else: print((' Force field {} not supported!'.format(args.ff))); sys.exit() GetFF.Initialize() converged = GetFF.Minimize(maxIts=1000) energy = GetFF.CalcEnergy() # append to list #if args.verbose: print(" conformer", (i+1), energy) if globmin == None: globmin = energy if energy < globmin: globmin = energy if converged == 0 and (energy - globmin) < args.ewin: #if args.verbose: print(' minimization converged!') unique, dup_id = 0, None #print("Conformer", (i+1), "optimized with", args.ff, "Energy:", energy) for j,seenmol in enumerate(outmols): if abs(energy - c_energy[j]) < args.energy_threshold: #print((i+1), energy, (j+1), c_energy[j], getPMIDIFF(mol,seenmol)) if getPMIDIFF(mol, seenmol) < args.rms_threshold * 25: #print("o Conformer", (i+1), "matches conformer", (j+1)) unique += 1 dup_id = (j+1) if unique == 0: if args.verbose == True: print("- Conformer", (i+1), "is unique") if args.ANI1ccx == True or args.xtb == True: cartesians = mol.GetConformers()[0].GetPositions() elements = '' for atom in mol.GetAtoms(): elements += atom.GetSymbol() coordinates = torch.tensor([cartesians.tolist()], requires_grad=True, device=device) if args.ANI1ccx == True: species = model.species_to_tensor(elements).to(device).unsqueeze(0) _, ani_energy = model((species, coordinates)) if args.verbose: print("ANI Initial E:",ani_energy.item(),'eH') #Hartree if opt == True: ase_molecule = ase.Atoms(elements, positions=coordinates.tolist()[0], calculator=model.ase()) ### make a function for constraints and optimization if constraints != None: fb = ase.constraints.FixBondLength(0, 1) ase_molecule.set_distance(0,1,2.0) ase_molecule.set_constraint(fb) optimizer = ase.optimize.BFGS(ase_molecule) optimizer.run(fmax=float(opt_precision)) species_coords = ase_molecule.get_positions().tolist() coordinates = torch.tensor([species_coords], requires_grad=True, device=device) ############################################################################### # Now let's compute energy: _, ani_energy = model((species, coordinates)) aniE = ani_energy.item() #Hartree if args.verbose: print("ANI Final E:", aniE,'eH', ase_molecule.get_potential_energy(),'eV') #Hartree, eV ############################################################################### ### INCLUDE THE OPTIONS TO SOTRE MOLECULAR Descriptors ### CHECK THIS WEBPAGE: https://github.com/grimme-lab/xtb/tree/master/python elif args.xtb == True: ase_molecule = ase.Atoms(elements, positions=coordinates.tolist()[0], calculator=GFN2()) #define ase molecule using GFN2 Calculator if opt == True: if args.verbose: print("Initial XTB energy", ase_molecule.get_potential_energy()/Hartree,'Eh',ase_molecule.get_potential_energy(),'eV') #Hartree, eV optimizer = ase.optimize.BFGS(ase_molecule) optimizer.run(fmax=float(opt_precision)) species_coords = ase_molecule.get_positions().tolist() coordinates = torch.tensor([species_coords], requires_grad=True, device=device) ############################################################################### # Now let's compute energy: xtb_energy = ase_molecule.get_potential_energy() if args.verbose: print("Final XTB E:",xtb_energy/Hartree,'Eh',xtb_energy,'eV') #Hartree, eV ############################################################################### if args.ANI1ccx == True or args.xtb == True:#save Eh and coordinates to write to SDF if args.xtb == True:SQM_energy.append(xtb_energy/Hartree) else:SQM_energy.append(ani_energy.item()) cartesians = np.array(coordinates.tolist()[0]) SQM_cartesians.append(cartesians) pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol); c_converged.append(converged); c_energy.append(energy) conf += 1 else: print("x Conformer", (i+1), "is a duplicate of", dup_id) else: print("x Minimization of conformer", (i+1), " not converged / energy too high!", converged, (energy - globmin), args.ewin) #pass else: pass #print("No molecules to optimize") # if SQM energy exists, overwrite RDKIT energies and geometries cids = list(range(len(outmols))) sortedcids = sorted(cids, key = lambda cid: c_energy[cid]) if args.ANI1ccx == True or args.xtb == True: for conf in cids: c_energy[conf] = SQM_energy[conf] c = outmols[conf].GetConformer() for j in range(outmols[conf].GetNumAtoms()): #print(cartesians[i]) [x,y,z] = SQM_cartesians[conf][j] c.SetAtomPosition(j,Point3D(x,y,z)) for j in range(0,conf): if abs(c_energy[conf] - c_energy[j]) < args.energy_threshold / 2625.5 and getPMIDIFF(outmols[conf], outmols[j]) < args.rms_threshold: print("It appears ",conf, "is the same as", j) for i, cid in enumerate(sortedcids): outmols[cid].SetProp('_Name', name + ' conformer ' + str(i+1)) outmols[cid].SetProp('Energy', c_energy[cid]) return outmols, c_energy
def mult_min(name, args, program, log, dup_data, dup_data_idx): '''optimizes a bunch of molecules and then checks for unique conformers and then puts in order of energy''' inmols = Chem.SDMolSupplier(name + args.output, removeHs=False) if inmols is None: log.write("Could not open " + name + args.output) sys.exit(-1) globmin, n_high, n_dup_energy, n_dup_rms_eng = None, 0, 0, 0 c_converged, c_energy, outmols = [], [], [] if args.verbose: log.write("\n\no Multiple minimization of " + name + args.output + " with " + program) bar = IncrementalBar('o Minimizing', max=len(inmols)) for i, mol in enumerate(inmols): bar.next() conf = 1 if mol is not None: # optimize this structure and record the energy mol, converged, energy = optimize(mol, args, program, log, dup_data, dup_data_idx) #if args.verbose: log.write(" conformer", (i+1), energy) if globmin == None: globmin = energy if energy < globmin: globmin = energy if converged == 0 and ( energy - globmin) < args.ewin: # comparison in kcal/mol #if args.verbose: log.write(' minimization converged!') unique, dup_id = 0, None # compare against all previous conformers located for j, seenmol in enumerate(outmols): if abs( energy - c_energy[j] ) < args.initial_energy_threshold: # comparison in kcal/mol unique += 1 dup_id = (j + 1) n_dup_energy += 1 break #pmi_diff = get_PMIDIFF(mol, seenmol, 0, 0, args.heavyonly) #tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, seenmol, useWeights=False) #rms = get_RMS(mol, seenmol, 0, 0, args.heavyonly) #log.write(rms, tfd, pmi_diff) if abs(energy - c_energy[j] ) < args.energy_threshold: # comparison in kcal/mol rms = get_conf_RMS(mol, seenmol, 0, 0, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: #log.write("o Conformer", (i+1), "matches conformer", (j+1)) unique += 1 dup_id = (j + 1) n_dup_rms_eng += 1 break if unique == 0: #if args.verbose == True: log.write("- Conformer", (i+1), "is unique") pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol) c_converged.append(converged) c_energy.append(energy) conf += 1 # if args.verbose == True:log.write("x Conformer", (i+1), "is a duplicate of", dup_id) else: #if args.verbose == True: log.write("x Minimization of conformer", (i+1), " not converged / energy too high!", converged, (energy - globmin), args.ewin) n_high += 1 else: pass #log.write("No molecules to optimize") bar.finish() if args.verbose == True: log.write("o " + str(n_dup_energy) + " Duplicates removed initial energy ( E < " + str(args.initial_energy_threshold) + " kcal/mol )") if args.verbose == True: log.write("o " + str(n_dup_rms_eng) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol)") if args.verbose == True: log.write("o " + str(n_high) + " Conformers rejected based on energy ( E > " + str(args.ewin) + " kcal/mol)") # if SQM energy exists, overwrite RDKIT energies and geometries cids = list(range(len(outmols))) sortedcids = sorted(cids, key=lambda cid: c_energy[cid]) for i, cid in enumerate(sortedcids): outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1)) outmols[cid].SetProp('Energy', c_energy[cid]) if program == 'xtb': dup_data.at[dup_data_idx, 'xTB-Initial-samples'] = len(inmols) dup_data.at[dup_data_idx, 'xTB-initial_energy_threshold'] = n_dup_energy dup_data.at[dup_data_idx, 'xTB-RMS-and-energy-duplicates'] = n_dup_rms_eng dup_data.at[dup_data_idx, 'xTB-Unique-conformers'] = len(sortedcids) if program == 'ani': dup_data.at[dup_data_idx, 'ANI1ccx-Initial-samples'] = len(inmols) dup_data.at[dup_data_idx, 'ANI1ccx-initial_energy_threshold'] = n_dup_energy dup_data.at[dup_data_idx, 'ANI1ccx-RMS-and-energy-duplicates'] = n_dup_rms_eng dup_data.at[dup_data_idx, 'ANI1ccx-Unique-conformers'] = len(sortedcids) #bar.finish() # write the filtered, ordered conformers to external file write_confs(outmols, c_energy, name, args, program, log) return outmols, c_energy
def summ_search(mol, name, args, log, dup_data, dup_data_idx, coord_Map=None, alg_Map=None, mol_template=None): '''embeds core conformers, then optimizes and filters based on RMSD. Finally the rotatable torsions are systematically rotated''' sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + args.output) Chem.SanitizeMol(mol) mol = Chem.AddHs(mol) mol.SetProp("_Name", name) # detects and applies auto-detection of initial number of conformers if args.sample == 'auto': initial_confs = int(auto_sampling(args.auto_sample, mol, log)) else: initial_confs = int(args.sample) # dup_data.at[dup_data_idx, 'Molecule'] = name dup_data.at[dup_data_idx, 'RDKIT-Initial-samples'] = initial_confs if args.nodihedrals == False: rotmatches = getDihedralMatches(mol, args.heavyonly, log) else: rotmatches = [] if len(rotmatches) > args.max_torsions: log.write("x Too many torsions (%d). Skipping %s" % (len(rotmatches), (name + args.output))) status = -1 else: if coord_Map == None and alg_Map == None and mol_template == None: if args.etkdg: ps = Chem.ETKDG() ps.randomSeed = args.seed ps.ignoreSmoothingFailures = True ps.numThreads = 0 cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, params=ps) else: cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, ignoreSmoothingFailures=True, randomSeed=args.seed, numThreads=0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write( "o conformers initially sampled with random coordinates") cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, ignoreSmoothingFailures=True, numZeroFail=1000, numThreads=0) if args.verbose: log.write("o " + str(len(cids)) + " conformers initially sampled") # case of embed for templates else: if args.etkdg: ps = Chem.ETKDG() ps.randomSeed = args.seed ps.coordMap = coord_Map ps.ignoreSmoothingFailures = True ps.numThreads = 0 cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, params=ps) else: cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, ignoreSmoothingFailures=True, coordMap=coord_Map, numThreads=0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write( "o conformers initially sampled with random coordinates") cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, numZeroFail=1000, ignoreSmoothingFailures=True, coordMap=coord_Map, numThreads=0) if args.verbose: log.write("o " + str(len(cids)) + " conformers initially sampled") #energy minimize all to get more realistic results #identify the atoms and decide Force Field for atom in mol.GetAtoms(): if atom.GetAtomicNum() > 36: #upto Kr for MMFF, if not use UFF args.ff = "UFF" #log.write("UFF is used because there are atoms that MMFF doesn't recognise") if args.verbose: log.write("o Optimizing " + str(len(cids)) + " initial conformers with" + args.ff) if args.verbose: if args.nodihedrals == False: log.write("o Found " + str(len(rotmatches)) + " rotatable torsions") # for [a,b,c,d] in rotmatches: # log.write(' '+mol.GetAtomWithIdx(a).GetSymbol()+str(a+1)+ mol.GetAtomWithIdx(b).GetSymbol()+str(b+1)+ mol.GetAtomWithIdx(c).GetSymbol()+str(c+1)+mol.GetAtomWithIdx(d).GetSymbol()+str(d+1)) else: log.write("o Systematic torsion rotation is set to OFF") cenergy, outmols = [], [] bar = IncrementalBar('o Minimizing', max=len(cids)) for i, conf in enumerate(cids): if coord_Map == None and alg_Map == None and mol_template == None: if args.ff == "MMFF": GetFF = Chem.MMFFGetMoleculeForceField( mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf) elif args.ff == "UFF": GetFF = Chem.UFFGetMoleculeForceField(mol, confId=conf) else: log.write(' Force field {} not supported!'.format( args.ff)) sys.exit() GetFF.Initialize() converged = GetFF.Minimize(maxIts=args.opt_steps_RDKit) energy = GetFF.CalcEnergy() cenergy.append(GetFF.CalcEnergy()) #if args.verbose: # log.write("- conformer", (i+1), "optimized: ", args.ff, "energy", GetFF.CalcEnergy()) #id template realign before doing calculations else: num_atom_match = mol.GetSubstructMatch(mol_template) # Force field parameters if args.ff == "MMFF": GetFF = lambda mol, confId=conf: Chem.MMFFGetMoleculeForceField( mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf) elif args.ff == "UFF": GetFF = lambda mol, confId=conf: Chem.UFFGetMoleculeForceField( mol, confId=conf) else: log.write(' Force field {} not supported!'.format( options.ff)) sys.exit() getForceField = GetFF # clean up the conformation ff_temp = getForceField(mol, confId=conf) for k, idxI in enumerate(num_atom_match): for l in range(k + 1, len(num_atom_match)): idxJ = num_atom_match[l] d = coord_Map[idxI].Distance(coord_Map[idxJ]) ff_temp.AddDistanceConstraint(idxI, idxJ, d, d, 10000) ff_temp.Initialize() #reassignned n from 4 to 10 for better embed and minimzation n = 10 more = ff_temp.Minimize() while more and n: more = ff_temp.Minimize() n -= 1 energy = ff_temp.CalcEnergy() # rotate the embedded conformation onto the core_mol: rms = rdMolAlign.AlignMol(mol, mol_template, prbCid=conf, atomMap=alg_Map, reflect=True, maxIters=100) # elif len(num_atom_match) == 5: # ff_temp = GetFF(mol, confId=conf) # conf_temp = mol_template.GetConformer() # for k in range(mol_template.GetNumAtoms()): # p = conf_temp.GetAtomPosition(k) # q = mol.GetConformer(conf).GetAtomPosition(k) # pIdx = ff_temp.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 # ff_temp.AddDistanceConstraint(pIdx, num_atom_match[k], 0, 0, 10000) # ff_temp.Initialize() # n = 10 # more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5) # while more and n: # more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5) # n -= 1 # # realign # energy = ff_temp.CalcEnergy() # rms = rdMolAlign.AlignMol(mol, mol_template,prbCid=conf, atomMap=alg_Map,reflect=True,maxIters=50) cenergy.append(energy) # outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs" # conformers. We do this to SetProp (Name and Energy) to the different conformers # and log.write in the SDF file. At the end, since all the mol objects has the same # conformers, but the energies are different, we can log.write conformers to SDF files # with the energies of the parent mol objects. We measured the computing time and # it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp # SetProp correctly pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol) bar.next() bar.finish() for i, cid in enumerate(cids): outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1)) outmols[cid].SetProp('Energy', cenergy[cid]) cids = list(range(len(outmols))) sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) log.write("\n\no Filters after intial embedding of " + str(initial_confs) + " conformers") selectedcids, selectedcids_initial, eng_dup, eng_rms_dup = [], [], -1, -1 bar = IncrementalBar('o Filtering based on energy (pre-filter)', max=len(sortedcids)) for i, conf in enumerate(sortedcids): # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if i == 0: selectedcids_initial.append(conf) # check rmsd for seenconf in selectedcids_initial: E_diff = abs(cenergy[conf] - cenergy[seenconf]) # in kcal/mol if E_diff < args.initial_energy_threshold: eng_dup += 1 excluded_conf = True break if excluded_conf == False: if conf not in selectedcids_initial: selectedcids_initial.append(conf) bar.next() bar.finish() if args.verbose == True: log.write("o " + str(eng_dup) + " Duplicates removed pre-energy filter (E < " + str(args.initial_energy_threshold) + " kcal/mol )") #reduce to unique set if args.verbose: log.write("o Removing duplicate conformers ( RMSD < " + str(args.rms_threshold) + " and E difference < " + str(args.energy_threshold) + " kcal/mol)") bar = IncrementalBar('o Filtering based on energy and rms', max=len(selectedcids_initial)) #check rmsd for i, conf in enumerate(selectedcids_initial): #set torsions to same value for m in rotmatches: rdMolTransforms.SetDihedralDeg( outmols[conf].GetConformer(conf), *m, 180.0) # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if i == 0: selectedcids.append(conf) # check rmsd for seenconf in selectedcids: E_diff = abs(cenergy[conf] - cenergy[seenconf]) # in kcal/mol if E_diff < args.energy_threshold: rms = get_conf_RMS(outmols[conf], outmols[conf], seenconf, conf, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: excluded_conf = True eng_rms_dup += 1 break if excluded_conf == False: if conf not in selectedcids: selectedcids.append(conf) bar.next() bar.finish() # unique_mols, unique_energies = [],[] # for id in selectedcids: # unique_mols.append(outmols[id]) # unique_energies.append(cenergy[id]) # log.write(unique_mols[0:2].GetConformers()[0].GetPositions()) if args.verbose == True: log.write("o " + str(eng_rms_dup) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol) after rotation") if args.verbose: log.write("o " + str(len(selectedcids)) + " unique (ignoring torsions) starting conformers remain") dup_data.at[dup_data_idx, 'RDKit-energy-duplicates'] = eng_dup dup_data.at[dup_data_idx, 'RDKit-RMS-and-energy-duplicates'] = eng_rms_dup dup_data.at[dup_data_idx, 'RDKIT-Unique-conformers'] = len(selectedcids) # now exhaustively drive torsions of selected conformers n_confs = int(len(selectedcids) * (360 / args.degree)**len(rotmatches)) if args.verbose and len(rotmatches) != 0: log.write("\n\no Systematic generation of " + str(n_confs) + " confomers") bar = IncrementalBar( 'o Generating conformations based on dihedral rotation', max=len(selectedcids)) else: bar = IncrementalBar('o Generating conformations', max=len(selectedcids)) total = 0 for conf in selectedcids: #log.write(outmols[conf]) total += genConformer_r(outmols[conf], conf, 0, rotmatches, args.degree, sdwriter, args, outmols[conf].GetProp('_Name'), log) bar.next() bar.finish() if args.verbose and len(rotmatches) != 0: log.write("o %d total conformations generated" % total) status = 1 sdwriter.close() #getting the energy from and mols after rotations if len(rotmatches) != 0: rdmols = Chem.SDMolSupplier(name + '_' + 'rdkit' + args.output, removeHs=False) if rdmols is None: log.write("Could not open " + name + args.output) sys.exit(-1) bar = IncrementalBar( 'o Filtering based on energy and rms after rotation of dihedrals', max=len(rdmols)) sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + '_' + 'rotated' + args.output) rd_count = 0 rd_selectedcids, rd_dup_energy, rd_dup_rms_eng = [], -1, 0 for i in range(len(rdmols)): # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if rd_count == 0: rd_selectedcids.append(i) if args.metal_complex == True: for atom in rdmols[i].GetAtoms(): if atom.GetSymbol() == 'I' and ( len(atom.GetBonds()) == 6 or len(atom.GetBonds()) == 5 or len(atom.GetBonds()) == 4 or len(atom.GetBonds()) == 3 or len(atom.GetBonds()) == 2): for el in elementspt: if el.symbol == args.metal: atomic_number = el.number atom.SetAtomicNum(atomic_number) sdwriter.write(rdmols[i]) # Only the first ID gets included rd_count = 1 # check rmsd for j in rd_selectedcids: if abs( float(rdmols[i].GetProp('Energy')) - float(rdmols[j].GetProp('Energy')) ) < args.initial_energy_threshold: # comparison in kcal/mol excluded_conf = True rd_dup_energy += 1 break if abs( float(rdmols[i].GetProp('Energy')) - float(rdmols[j].GetProp('Energy')) ) < args.energy_threshold: # in kcal/mol rms = get_conf_RMS(rdmols[i], rdmols[j], -1, -1, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: excluded_conf = True rd_dup_rms_eng += 1 break if excluded_conf == False: if args.metal_complex == True: for atom in rdmols[i].GetAtoms(): if atom.GetSymbol() == 'I' and ( len(atom.GetBonds()) == 6 or len(atom.GetBonds()) == 5 or len(atom.GetBonds()) == 4 or len(atom.GetBonds()) == 3 or len(atom.GetBonds()) == 2): for el in elementspt: if el.symbol == args.metal: atomic_number = el.number atom.SetAtomicNum(atomic_number) sdwriter.write(rdmols[i]) if i not in rd_selectedcids: rd_selectedcids.append(i) bar.next() bar.finish() sdwriter.close() if args.verbose == True: log.write("o " + str(rd_dup_energy) + " Duplicates removed initial energy ( E < " + str(args.initial_energy_threshold) + " kcal/mol )") if args.verbose == True: log.write("o " + str(rd_dup_rms_eng) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol) after rotation") if args.verbose == True: log.write("o " + str(len(rd_selectedcids)) + " unique (after torsions) conformers remain") #filtering process after rotations dup_data.at[dup_data_idx, 'RDKIT-Rotated-conformers'] = total dup_data.at[dup_data_idx, 'RDKIT-Rotated-Unique-conformers'] = len(rd_selectedcids) return status