def conf_generator(self, inp): mol, name = inp conf_parm = AllChem.ETKDGv2() conf_parm.pruneRmsThresh = self.rmsd conf_parm.randomSeed = -1 ## get rotatable bond-dependent adaptive conformation number max_conf = self.adaptive_conf(mol) ## Generate 3D conformers, map atom 3D vectors in 'ids' to 'mol' ## Hydrogens are supposed to be added beforehand ids = AllChem.EmbedMultipleConfs(mol, max_conf, conf_parm) ## align all conformers to 1st frame rmslist = [] AllChem.AlignMolConformers(mol, RMSlist=rmslist) ## Minimize conformers with UFF, 2x slower than without ## with minimization, parameters can be used to cluster conformers if self.run_uff: for _id in ids: AllChem.UFFOptimizeMolecule(mol, confId=_id) mol = self.prune_conformers(mol, ids, max_conf) return [mol, name]
def generateconformations(m, n, maxAttempts=1000, pruneRmsThresh=0.1, useExpTorsionAnglePrefs=True, useBasicKnowledge=True, enforceChirality=True): m = Chem.AddHs(m) #ids=AllChem.EmbedMultipleConfs(m, numConfs=n) ids = AllChem.EmbedMultipleConfs( m, numConfs=n, maxAttempts=maxAttempts, pruneRmsThresh=pruneRmsThresh, useExpTorsionAnglePrefs=useExpTorsionAnglePrefs, useBasicKnowledge=useBasicKnowledge, enforceChirality=enforceChirality, numThreads=0) for cid in ids: #AllChem.UFFOptimizeMolecule(m, confId=cid) AllChem.MMFFOptimizeMolecule(m, confId=cid) # EmbedMultipleConfs returns a Boost-wrapped type which # cannot be pickled. Convert it to a Python list, which can. #rmslist =[] #AllChem.AlignMolConformers(m, RMSlist=rmslist) AllChem.AlignMolConformers(m) #rms = AllChem.GetConformerRMS(m, 1, 9, prealigned=True) return m, list(ids)
def generate_conformers(molecule: Chem.Mol, n_conformers: int, random_seed: int = -1, alignment: bool = False) -> Chem.Mol: """Generate conformers for a molecule Parameters ---------- molecule : rdkit.Chem.Mol Molecule for which conformers will be generated. n_conformers: int Number of conformers to generate random_seed : float or int, optional Random seed to use. alignment : bool, optional If true generated conformers will be aligned (Default: False). Returns ------- molecule : rdkit.Chem.Mol Molecule with conformers. """ if not isinstance(n_conformers, int): raise OpenPharmacophoreTypeError("n_conformers must be an integer") if n_conformers < 0: raise OpenPharmacophoreValueError("n_conformers must be greater than 0") molecule = Chem.AddHs(molecule) # Add hydrogens to generate realistic geometries cids = AllChem.EmbedMultipleConfs(molecule, numConfs=n_conformers, randomSeed=random_seed) if alignment: AllChem.AlignMolConformers(molecule) return molecule
def align_atoms(self, mol, atoms): rmsd = [] AllChem.AlignMolConformers(mol, maxIters=self.max_iters, atomIds=atoms, RMSlist=rmsd) return rmsd
def generate_conformers( molecule: Chem.rdchem.Mol, max_num_conformers: int, *, random_seed: int = -1, prune_rms_thresh: float = -1.0, max_iter: int = -1, fallback_to_random: bool = False, ) -> Chem.rdchem.Mol: """Generates conformers for a given molecule. Args: molecule: molecular representation of the compound. max_num_conformers: maximum number of conformers to generate. If pruning is done, the returned number of conformers is not guaranteed to match max_num_conformers. random_seed: random seed to use for conformer generation. prune_rms_thresh: RMSD threshold which allows to prune conformers that are too similar. max_iter: Maximum number of iterations to perform when optimising MMFF force field. If set to <= 0, energy optimisation is not performed. fallback_to_random: if conformers cannot be obtained, use random coordinates to initialise. Returns: Copy of a `molecule` with added hydrogens. The returned molecule contains force field-optimised conformers. The number of conformers is guaranteed to be <= max_num_conformers. """ mol = copy.deepcopy(molecule) mol = Chem.AddHs(mol) mol = _embed_conformers( mol, max_num_conformers, random_seed, prune_rms_thresh, fallback_to_random, use_random=False) if max_iter > 0: mol_with_conformers = _minimize_by_mmff(mol, max_iter) if mol_with_conformers is None: mol_with_conformers = _minimize_by_uff(mol, max_iter) else: mol_with_conformers = mol # Aligns conformations in a molecule to each other using the first # conformation as the reference. AllChem.AlignMolConformers(mol_with_conformers) # We remove hydrogens to keep the number of atoms consistent with the graph # nodes. mol_with_conformers = Chem.RemoveHs(mol_with_conformers) return mol_with_conformers
def build_confs(smiles, conf_num=10, element_dict=element_dict): m = Chem.MolFromSmiles(smiles) m = Chem.AddHs(m, addCoords=True) # uncomment for hydrogen filling num_atoms = m.GetNumAtoms() AllChem.EmbedMultipleConfs(m, conf_num) rmslist = [] AllChem.AlignMolConformers(m, RMSlist=rmslist) #print(rmslist) rms values from first conformer to the others. AllChem.GetConformerRMS(m2, 1, 9, prealigned=True) for others for i in range(m.GetNumConformers()): AllChem.UFFOptimizeMolecule(m, confId=i) #v.ShowMol(m,confId=i,name='conf-%d'%i,showOnly=False) w = Chem.SDWriter('confs_sdf.sdf') for i in range(m.GetNumConformers()): w.write(m, confId=i) w.flush() f = open('confs_sdf.sdf', 'r') lines = f.readlines() f.close() baseline_in = False breaker = "$$$$" rm_lines = [] lines = [] with open('confs_sdf.sdf') as search: for num, line in enumerate(search, 1): lines.append(line) if breaker in line: rm_lines.append(num) baseline_in = True with open('confs_sdf.sdf') as f: lines = f.read().splitlines() new_ls = [] for i in lines: cart_bonds = ' '.join(i.split()) new_ls.append(cart_bonds) sdf_dict = {} for k in range(1, conf_num + 1): if k == 1: sdf_dict['sec_{0}'.format(k)] = (new_ls[4:rm_lines[0] - 2]) elif k > 1 and k < conf_num + 1: sdf_dict['sec_{0}'.format(k)] = new_ls[rm_lines[k - 2] + 4:rm_lines[k - 1] - 2] return sdf_dict, num_atoms, conf_num
def torsional_search(smiles, iterations=100000, RMSthresh=1): # Input: # smiles: smiles string representing the molecule (string) # iterations: number of ETRKG iterations (integer) # RMSthresh: RMS threshold for ETRKG search (how different new conformers need to be) (float) # Returns: xyzs (list (conformers) of lists (atoms) of xyz coordinates), energies (list of conformer MMFF94s energies) xyzs = [] energies = [] # read mol into RDkit from smiles string rdmol = Chem.MolFromSmiles(smiles) # rdkit is an absolute piece of c**p, so it wont read in hydrogens, it has to add them itself rdmol = Chem.AddHs(rdmol) # Do conformational search by ETRKG # Riniker, S.; Landrum, G. A. “Better Informed Distance Geometry: Using What We Know To Improve Conformation Generation” J. Chem. Inf. Comp. Sci. 55:2562-74 (2015) ids = AllChem.EmbedMultipleConfs(rdmol, clearConfs=True, numConfs=iterations, pruneRmsThresh=RMSthresh) # align conformers, not strictly neccesary but should make visualisation more convenient later on AllChem.AlignMolConformers(rdmol) # Optimise conformers by MMFF, returns success states (ignored atm) and energies rd_es = AllChem.MMFFOptimizeMoleculeConfs(rdmol, mmffVariant='MMFF94s') # Record energies in list for e in rd_es: energies.append(e[1]) # Get list of conformer IDs confIds = [x.GetId() for x in rdmol.GetConformers()] # Define empty array for lists of coordinates xyzs = [] # Loop through conformers for id in confIds: xyz = [] # Loop over length of molecule (defined by size of mol type array) for t in range(len(rdmol.GetAtoms())): # append atom coordinates xyz.append([ float(rdmol.GetConformer(id).GetAtomPosition(t)[0]), float(rdmol.GetConformer(id).GetAtomPosition(t)[1]), float(rdmol.GetConformer(id).GetAtomPosition(t)[2]) ]) xyzs.append(xyz) return xyzs, energies
def GenerateMolConformersWithoutMinimization(Mol, MolNum=None): "Generate conformers for a molecule without performing minimization." ConfIDs = EmbedMolecule(Mol, MolNum) if not len(ConfIDs): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n" % MolName) return [Mol, False, None, None] if OptionsInfo["AlignConformers"]: AllChem.AlignMolConformers(Mol) if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintInfo("\nNumber of conformations generated for %s: %d" % (MolName, len(ConfIDs))) # Convert ConfIDs into a list... ConfIDsList = [ConfID for ConfID in ConfIDs] # Setup conformation energies... ConfEnergies = None if OptionsInfo["EnergyOut"]: ConfEnergies = [] for ConfID in ConfIDsList: EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID) Energy = "%.2f" % Energy if EnergyStatus else "NotAvailable" ConfEnergies.append(Energy) if not EnergyStatus: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) return [Mol, True, ConfIDsList, ConfEnergies]
def GenerateMolConformers(Mol, MolCount, Writer): "Generate conformers for a molecule and write them out." MolName = RDKitUtil.GetMolName(Mol, MolCount) ConfIDs = EmbedMolecule(Mol) if not len(ConfIDs): MiscUtil.PrintWarning( "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n" % MolName) return False if OptionsInfo["AlignConformers"]: AllChem.AlignMolConformers(Mol) # Write out the conformers... for ConfID in ConfIDs: SetConfMolName(Mol, MolName, ConfID) Writer.write(Mol, confId=ConfID) MiscUtil.PrintInfo("\nNumber of conformations written for %s: %d" % (MolName, len(ConfIDs))) return True
def align_to_me(self, other_conf): """Align another conformer to this one. :param other_conf: The other conformer to align. :type other_conf: MyConformer :return: The aligned MyConformer object. :rtype: MyConformer """ # Add the conformer of the other MyConformer object. self.mol.AddConformer(other_conf.conformer(), assignId=True) # Align them. AllChem.AlignMolConformers(self.mol, atomIds=self.ids_hvy_atms) # Reset the conformer of the other MyConformer object. last_conf = self.mol.GetConformers()[-1] other_conf.conformer(last_conf) # Remove the added conformer. self.mol.RemoveConformer(last_conf.GetId()) # Return that other object. return other_conf
#smiles = 'C1CCCCC1N(C)C(=O)C(=CI)C(O)C=C' #smiles = 'C1(C(O)(C)(C))=CC=CC=C1CCC(SCC2CC2CC(=O)O)C3=CC=CC(=C3)C=CC4=CC=C(C5=N4)C=CC(Cl)=C5' #Montelukast smiles = 'C1=CC=CC=C1C(=O)NC(C2=CC=CC=C2)C(O)C(=O)OC(C(C)-C3(C(OC(=O)C))C(=O)C4(C)C(O)CC(OC5)C5(OC(=O)C)C4C7(OC(=O)C6=CC=CC=C6))CC7(O)(C3(C)(C))' # Paclitaxel #smiles = 'C1CCCC1CCSCC' #smiles = 'OC(=O)O' m = Chem.MolFromSmiles(smiles) m = Chem.AddHs(m, addCoords=True) conf_num = 10 num_atoms = m.GetNumAtoms() AllChem.EmbedMultipleConfs(m, conf_num) rmslist = [] AllChem.AlignMolConformers(m, RMSlist=rmslist) #print(rmslist) rms values from first conformer to the others. AllChem.GetConformerRMS(m2, 1, 9, prealigned=True) for others for i in range(m.GetNumConformers()): AllChem.UFFOptimizeMolecule(m, confId=i) #v.ShowMol(m,confId=i,name='conf-%d'%i,showOnly=False) w = Chem.SDWriter('confs_sdf.sdf') for i in range(m.GetNumConformers()): w.write(m, confId=i) w.flush() f = open('confs_sdf.sdf', 'r') lines = f.readlines() f.close()
def GenerateMolConformersWithMinimization(Mol, MolNum): "Generate and mininize conformers for a molecule." if OptionsInfo["AddHydrogens"]: Mol = Chem.AddHs(Mol) ConfIDs = EmbedMolecule(Mol, MolNum) if not len(ConfIDs): if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n" % MolName) return [Mol, False, None, None] CalcEnergyMap = {} for ConfID in ConfIDs: try: if OptionsInfo["UseUFF"]: Status = AllChem.UFFOptimizeMolecule( Mol, confId=ConfID, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseMMFF"]: Status = AllChem.MMFFOptimizeMolecule( Mol, confId=ConfID, maxIters=OptionsInfo["MaxIters"], mmffVariant=OptionsInfo["MMFFVariant"]) else: MiscUtil.PrintError( "Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) except (RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) return [Mol, False, None, None] EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID) if not EnergyStatus: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) return [Mol, False, None, None] if Status != 0: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintWarning( "Minimization failed to converge for conformation number %d of molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (ConfID, MolName, OptionsInfo["MaxIters"])) CalcEnergyMap[ConfID] = Energy if OptionsInfo["RemoveHydrogens"]: Mol = Chem.RemoveHs(Mol) # Align molecules after minimization... if OptionsInfo["AlignConformers"]: AllChem.AlignMolConformers(Mol) SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID]) MinEnergyConfID = SortedConfIDs[0] MinConfEnergy = CalcEnergyMap[MinEnergyConfID] EnergyWindow = OptionsInfo["EnergyWindow"] EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"] ApplyEnergyRMSDCutoff = False if EnergyRMSDCutoff > 0: ApplyEnergyRMSDCutoff = True # Calculate RMSD values for conformers... PreAligned = False if OptionsInfo["AlignConformers"]: PreAligned = True CalcRMSDMap = {} if ApplyEnergyRMSDCutoff: for ConfID in SortedConfIDs: RMSD = AllChem.GetConformerRMS(Mol, MinEnergyConfID, ConfID, prealigned=PreAligned) CalcRMSDMap[ConfID] = RMSD # Track conformers with in the specified energy window from the lowest # energy conformation along with applying RMSD cutoff as needed... # SelectedConfIDs = [] ConfCount = 0 IgnoredByEnergyConfCount = 0 IgnoredByRMSDConfCount = 0 FirstConf = True for ConfID in SortedConfIDs: if FirstConf: FirstConf = False SelectedConfIDs.append(ConfID) continue ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy) if ConfEnergyDiff > EnergyWindow: IgnoredByEnergyConfCount += 1 continue if ApplyEnergyRMSDCutoff: if CalcRMSDMap[ConfID] < EnergyRMSDCutoff: IgnoredByRMSDConfCount += 1 continue ConfCount += 1 SelectedConfIDs.append(ConfID) if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolNum) MiscUtil.PrintInfo( "\nTotal Number of conformations generated for %s: %d" % (MolName, ConfCount)) MiscUtil.PrintInfo( "Number of conformations ignored due to energy window cutoff: %d" % (IgnoredByEnergyConfCount)) if ApplyEnergyRMSDCutoff: MiscUtil.PrintInfo( "Number of conformations ignored due to energy RMSD cutoff: %d" % (IgnoredByRMSDConfCount)) SelectedConfEnergies = None if OptionsInfo["EnergyOut"]: SelectedConfEnergies = [ "%.2f" % CalcEnergyMap[ConfID] for ConfID in SelectedConfIDs ] return [Mol, True, SelectedConfIDs, SelectedConfEnergies]
idx = 0 for i in range(1, number_of_conformation): rms[i][:i+1] = rms_mat[idx:i+idx+1] idx += i ## 4. 重原子の座標をnumpy配列に格納 def genConfCoord(cid): conf = m.GetConformer(cid) coord = [] for atom in m.GetAtoms(): atom_idx = atom.GetIdx() x,y,z = conf.GetAtomPosition(atom_idx) coord.extend([x,y,z]) return np.array(coord) AllChem.AlignMolConformers(m) coord_array = np.zeros((len(cids), 3*m.GetNumAtoms())) for i, cid in enumerate(cids): coord_array[i] = genConfCoord(cid) ### クラスタリング用に標準化 scaler = RobustScaler() scaler.fit(coord_array) scaled_coord = scaler.transform(coord_array) del_index = set() for i in range(number_of_conformation): d = pd.DataFrame({'rms': rms[:,i], 'energy': energy}) d.energy = d.energy - d.energy[i] del_index = del_index | set(d[i:].query('rms < 0.05 and -0.5 < energy and energy < 0.5').index)
def align_global(self, mol): rmsd = [] AllChem.AlignMolConformers(mol, maxIters=self.max_iters, RMSlist=rmsd) return rmsd
def generate_mining_minima_structures( self, rmsd_threshold: float = 0.1, include_entropy_correction: bool = False ) -> (list, unit.Quantity, list): """ Minimizes and filters conformations based on a RMSD threshold. Parameters ---------- rmsd_threshol : float Treshold for RMSD filtering. include_entropy_correction : bool whether to include a degeneracy correction or not Returns ------- confs_traj : list list of md.Trajectory objects with filtered conformations e : unitless (in kT) free energy difference dG(final_state - initial_state) minimum_energies : list list of energies for the different minimum conformations """ from .ani import ANI1ccx from .analysis import prune_conformers, calculate_weighted_energy bw_energies = [] confs_traj = [] minimum_energies = [] all_energies = [] all_conformations = [] for ( ase_mol, rdkit_mol, ligand_atoms, get_ligand_coords, top, entropy_correction, get_nr_of_confs, ) in zip( [self.initial_state_ase_mol, self.final_state_ase_mol], [ copy.deepcopy(self.initial_state_mol), copy.deepcopy(self.final_state_mol), ], [self.initial_state_ligand_atoms, self.final_state_ligand_atoms], [ self.get_initial_state_ligand_coords, self.get_final_state_ligand_coords ], [ self.initial_state_ligand_topology, self.final_state_ligand_topology ], [ self.initial_state_entropy_correction, self.final_state_entropy_correction, ], [ self.get_nr_of_initial_state_ligand_coords, self.get_nr_of_final_state_ligand_coords, ], ): print("Mining Minima starting ...") model = ANI1ccx() model = model.to(device) energy_function = ANI_force_and_energy(model=model, atoms=ligand_atoms, mol=ase_mol) energies: list = [] for conf_id in range(get_nr_of_confs()): # minimize print(f"Conf: {conf_id}") minimized_coords, _ = energy_function.minimize( get_ligand_coords(conf_id)) # minimized_coords have dimensions [1][N_atoms][3] energy = energy_function.calculate_energy(minimized_coords) try: thermochemistry_correction = energy_function.get_thermo_correction( minimized_coords) except ValueError: logger.critical( "Imaginary frequencies present - found transition state." ) continue if include_entropy_correction: energies.append(energy.energy[0] + thermochemistry_correction + entropy_correction) else: energies.append(energy.energy[0] + thermochemistry_correction) # update the coordinates in the rdkit mol for atom in rdkit_mol.GetAtoms(): conf = rdkit_mol.GetConformer(conf_id) new_coords = Geometry.rdGeometry.Point3D() new_coords.x = minimized_coords[0][ atom.GetIdx()][0].value_in_unit(unit.angstrom) new_coords.y = minimized_coords[0][ atom.GetIdx()][1].value_in_unit(unit.angstrom) new_coords.z = minimized_coords[0][ atom.GetIdx()][2].value_in_unit(unit.angstrom) conf.SetAtomPosition(atom.GetIdx(), new_coords) # aligne the molecules AllChem.AlignMolConformers(rdkit_mol) all_energies.append(copy.deepcopy(energies)) all_conformations.append(copy.deepcopy(rdkit_mol)) min_and_filtered_rdkit_mol, filtered_energies = prune_conformers( rdkit_mol, copy.deepcopy(energies), rmsd_threshold=rmsd_threshold) # generate mdtraj object traj = [] for conf_idx in range( min_and_filtered_rdkit_mol.GetNumConformers()): tmp_coord_list = [] for a in min_and_filtered_rdkit_mol.GetAtoms(): pos = min_and_filtered_rdkit_mol.GetConformer( conf_idx).GetAtomPosition(a.GetIdx()) tmp_coord_list.append([pos.x, pos.y, pos.z]) tmp_coord_list = np.array(tmp_coord_list) * unit.angstrom traj.append(tmp_coord_list.value_in_unit(unit.nanometer)) confs_traj.append(md.Trajectory(traj, top)) minimum_energies.append(filtered_energies) bw_energies.append(calculate_weighted_energy(filtered_energies)) print("Mining Minima finished ...") e = bw_energies[1] - bw_energies[0] return confs_traj, e, minimum_energies, all_energies, all_conformations
def confsearchsmiles(name, smiles, cmp, eout, optd): # Create RDKit MOL m = Chem.MolFromSmiles(smiles) print('Working on:', name, 'Heavyatoms(', m.GetNumHeavyAtoms(), ')') # Conf generation routine m = EmbedConformers(m) m = OptimizeConformersFF(m) nrot = Chem.rdMolDescriptors.CalcNumRotatableBonds(m) m = CleanConformers(m, max_keep=max(20, nrot * 4), max_energy=25.0, rmsd_thresh=0.5) del nrot # Get new cids cids = [x.GetId() for x in m.GetConformers()] print(len(cids)) if len(cids) < 1: print('Skipping ' + name + ': not enough confs to continue.') return # ANI OPT ochk = np.zeros(len(cids), dtype=np.int64) for i, cid in enumerate(cids): print(' -Optimizing confid:', cid) ochk[i] = cmp.optimize_rdkit_molecule(m, cid=cid, fmax=0.001) # Align conformers rmslist = [] AllChem.AlignMolConformers(m, RMSlist=rmslist, maxIters=200) #print(rmslist) # Get energies and std dev. E, V = cmp.energy_rdkit_conformers(m, cids) E = hdt.hatokcal * E V = hdt.hatokcal * V print(E - E.min()) # Sort by energy (low to high) idx = np.argsort(E) X, S = pya.__convert_rdkitconfs_to_nparr__(m) Xs = X[idx] Es = E[idx] Vs = V[idx] for cid, x in zip(cids, Xs): natm = m.GetNumAtoms() conf = m.GetConformer(cid) for i in range(natm): conf.SetAtomPosition( i, [float(x[i][0]), float(x[i][1]), float(x[i][2])]) # Write out conformations sdf = AllChem.SDWriter(optd + name + '.sdf') for cid in cids: sdf.write(m, confId=cid) sdf.close() # Write out energy and sigma data eout.write(name + ' ' + str(Es) + ' ' + str(Vs) + ' ' + str(ochk) + '\n') eout.flush() return
def compare_confomer_generator_and_trajectory_minimum_structures( results_path: str, name: str, base: str, tautomer_idx: int, thinning: int = 100 ): assert tautomer_idx == 1 or tautomer_idx == 2 ani_results = pickle.load(open(f"{results_path}/ani_mm_results.pickle", "rb")) exp_results = pickle.load(open(f"{results_path}/exp_results.pickle", "rb")) # generate the tautomer object t1_smiles = exp_results[name]["t1-smiles"] t2_smiles = exp_results[name]["t2-smiles"] t_type, tautomers, flipped = generate_tautomer_class_stereobond_aware( name, t1_smiles, t2_smiles, nr_of_conformations=1, enforceChirality=True ) tautomer = tautomers[0] print(f"Flipped: {flipped}") tautomer.perform_tautomer_transformation() tautomer_mol = prune_conformers( ani_results[name]["t1-confs"], ani_results[name]["t1-energies"], rmsd_threshold=0.1, ) print(len(tautomer_mol[1])) traj_path = ( f"{base}/{name}/{name}_lambda_{tautomer_idx-1}.0000_kappa_0.0000_in_vacuum.dcd" ) pdb_path = f"{base}/{name}/{name}_0.pdb" # load trajectory, remove dummy atom traj = md.load(traj_path, top=pdb_path) atom_idx = [a.index for a in traj.topology.atoms] if (tautomer_idx - 1) == 1: atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_0)) else: atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_1)) traj = traj.atom_slice(atom_indices=atom_idx) # save pdb without dummy atom tautomer_pdb = f"{base}/{name}/{name}_without_dummy_{tautomer_idx}.pdb" traj[0].save_pdb(tautomer_pdb) # generate rdkit mol object with the same atom indizes as the trajectory but without the dummy atom mol = Chem.MolFromPDBFile(tautomer_pdb, removeHs=False) # remove conf of pdb mol.RemoveAllConformers() # generate energy function, use atom symbols of rdkti mol from .ani import ANI_force_and_energy, ANI1ccx model = ANI1ccx() energy_function = ANI_force_and_energy( model=model, atoms=[a.GetSymbol() for a in mol.GetAtoms()], mol=None ) # take every 100th conformation and minimize it using ANI1 minimized_traj = [] # store min conformations in here for idx, conf in enumerate(traj[::thinning]): print(f"{idx}/{len(traj[::thinning])}") c = (conf.xyz[0]) * unit.nanometer min_conf = energy_function.minimize(c)[ 0 ] # only real atoms, therefor lambda not needed minimized_traj.append(min_conf) new_conf = _generate_conformer(min_conf) # add the conformation to the rdkit mol object mol.AddConformer(new_conf, assignId=True) # generate mdtraj object with minimized confs minimum_traj = md.Trajectory( np.array([v.value_in_unit(unit.nanometer) for v in minimized_traj]), traj.topology, ) # generate reference_mol reference = prune_conformers( ani_results[name][f"t{tautomer_idx}-confs"], ani_results[name][f"t{tautomer_idx}-energies"], rmsd_threshold=0.1, ) # remove most hydrogens reference_mol = _remove_hydrogens(copy.deepcopy(reference[0])) compare_mol = _remove_hydrogens(copy.deepcopy(mol)) # find atom indices that are compared for RMSD sub_m = rdFMCS.FindMCS( [reference_mol, compare_mol], bondCompare=Chem.rdFMCS.BondCompare.CompareOrder.CompareAny, maximizeBonds=False, ) mcsp = Chem.MolFromSmarts(sub_m.smartsString, False) # the order of the substructure lists are the same for both # substructure matches => substructure_idx_m1[i] = substructure_idx_m2[i] substructure_idx_reference = reference_mol.GetSubstructMatches(mcsp, uniquify=False) substructure_idx_compare = compare_mol.GetSubstructMatches(mcsp, uniquify=False) # generate rmsd matrix rmsd = np.zeros( (reference_mol.GetNumConformers(), mol.GetNumConformers()), dtype=float ) # save clusters got_hit = np.zeros(reference_mol.GetNumConformers(), dtype=int) # atom mapping from itertools import combinations for nr_of_mappings, (e1, e2) in enumerate( combinations(substructure_idx_reference + substructure_idx_compare, 2) ): atom_mapping = [(a1, a2) for a1, a2 in zip(e1, e2)] # get rmsd matrix with a given set of atom mapping # update rmsd matrix whenever lower RMSD appears for i in range(len(reference_mol.GetConformers())): for j in range(len(compare_mol.GetConformers())): proposed_rmsd = AllChem.AlignMol( reference_mol, compare_mol, i, j, atomMap=atom_mapping ) # test if this is optimal atom mapping if nr_of_mappings == 0: rmsd[i, j] = proposed_rmsd else: rmsd[i, j] = min(rmsd[i, j], proposed_rmsd) for i in range(len(reference_mol.GetConformers())): for j in range(len(compare_mol.GetConformers())): if rmsd[i, j] <= 0.1: got_hit[i] += 1 sns.heatmap(rmsd) plt.show() print(f"Nr of clusters: {len(got_hit)}") print( f"Nr of conformations part of one cluster: {sum(got_hit)}/{mol.GetNumConformers()}" ) print(f"Clusters present: {got_hit}") AllChem.AlignMolConformers(reference_mol) AllChem.AlignMolConformers(compare_mol) return compare_mol, minimum_traj, reference_mol, reference[1]
def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second_embed): """Gets alternate ring conformations. Meant to run with the parallelizer class. :param mol: The molecule to process (with non-aromatic ring(s)). :type mol: MyMol.MyMol :param max_variants_per_compound: To control the combinatorial explosion, only this number of variants (molecules) will be advanced to the next step. :type max_variants_per_compound: int :param thoroughness: How many molecules to generate per variant (molecule) retained, for evaluation. For example, perhaps you want to advance five molecules (max_variants_per_compound = 5). You could just generate five and advance them all. Or you could generate ten and advance the best five (so thoroughness = 2). Using thoroughness > 1 increases the computational expense, but it also increases the chances of finding good molecules. :type thoroughness: int :param second_embed: Whether to try to generate 3D coordinates using an older algorithm if the better (default) algorithm fails. This can add run time, but sometimes converts certain molecules that would otherwise fail. :type second_embed: bool :return: A list of MyMol.MyMol objects, with alternate ring conformations. :rtype: list """ # Make it easier to access the container index. contnr_idx = mol.contnr_idx # All the molecules in this container must have nonatomatic rings (because # they are all variants of the same source molecule). So just make a new # mols list. # Get the ring atom indecies rings = mol.get_idxs_of_nonaro_rng_atms() # Convert that into the bond indecies. rings_by_bond_indexes = [] # A list of lists, where each inner list has # the indexes of the bonds that comprise a # ring. for ring_atom_indecies in rings: bond_indexes = [] for ring_atm_idx in ring_atom_indecies: a = mol.rdkit_mol.GetAtomWithIdx(ring_atm_idx) bonds = a.GetBonds() for bond in bonds: atom_indecies = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] atom_indecies.remove(ring_atm_idx) other_atm_idx = atom_indecies[0] if other_atm_idx in ring_atom_indecies: bond_indexes.append(bond.GetIdx()) bond_indexes = list(set(bond_indexes)) bond_indexes.sort() rings_by_bond_indexes.append(bond_indexes) # Generate a bunch of conformations, ordered from best energy to worst. # Note that this is cached. Minimizing too. mol.add_conformers(thoroughness * max_variants_per_compound, 0.1, True) if len(mol.conformers) > 0: # Sometimes there are no conformers if it's an impossible structure. # Like # [H]c1nc(N2C(=O)[C@@]3(C([H])([H])[H])[C@@]4([H])O[C@@]([H])(C([H])([H])C4([H])[H])[C@]3(C([H])([H])[H])C2=O)sc1[H] # So don't save this one anyway. # Get the scores (lowest energy) of these minimized conformers. mol.load_conformers_into_rdkit_mol() # Extract just the rings. ring_mols = [ Chem.PathToSubmol(mol.rdkit_mol, bi) for bi in rings_by_bond_indexes ] # Align get the rmsds relative to the first conformation, for each # ring separately. list_of_rmslists = [[]] * len(ring_mols) for k in range(len(ring_mols)): list_of_rmslists[k] = [] AllChem.AlignMolConformers(ring_mols[k], RMSlist=list_of_rmslists[k]) # Get points for each conformer (rmsd_ring1, rmsd_ring2, rmsd_ring3) pts = numpy.array(list_of_rmslists).T pts = numpy.vstack((numpy.array([[0.0] * pts.shape[1]]), pts)) # Cluster those points, get lowest-energy member of each. if len(pts) < max_variants_per_compound: num_clusters = len(pts) else: num_clusters = max_variants_per_compound # When kmeans2 runs on insufficient clusters, it can sometimes throw # an error about empty clusters. This is not necessary to throw for # the user and so we have supressed it here. with warnings.catch_warnings(): warnings.simplefilter("ignore") groups = kmeans2(pts, num_clusters, minit='points')[1] # Note that you have some geometrically diverse conformations here, # but there could be other versions (enantiomers, tautomers, etc.) # that also contribute similar conformations. In the end, you'll be # selecting from all these together, so similar ones could end up # together. best_ones = {} # Key is group id from kmeans (int). Values are the # MyMol.MyConformers objects. conformers = mol.rdkit_mol.GetConformers() for k, grp in enumerate(groups): if not grp in list(best_ones.keys()): best_ones[grp] = mol.conformers[k] best_confs = best_ones.values() # best_confs has the # MyMol.MyConformers objects. # Convert rdkit mols to MyMol.MyMol and save those MyMol.MyMol objects # for returning. results = [] for conf in best_confs: new_mol = copy.deepcopy(mol) c = MyConformer(new_mol, conf.conformer(), second_embed) new_mol.conformers = [c] energy = c.energy new_mol.genealogy = mol.genealogy[:] new_mol.genealogy.append( new_mol.smiles(True) + " (nonaromatic ring conformer: " + str(energy) + " kcal/mol)") results.append(new_mol) # i is mol index return results # If you get here, something went wrong. return None
def rdkit_rms(mol): AllChem.AlignMolConformers(mol) kernel = AllChem.GetConformerRMSMatrix(mol, prealigned=True) return kernel
def molconformergenerator(m, fpf, wdir, Nconf, idx): TSS = 1 LOT = 'WB97X/6-31g*' # High level of theory rdm = 'uniform' #Random dist type = 'nmrandom' Temp = '400.0' SCF = 'Tight' MEM = '2048' dpf = 'dipeptide-' #------- End Parameters --------- if m is None: print("Error: cannot open file!") exit(1) m, ids = generateconformations(m, Nconf) rmslist = [] AllChem.AlignMolConformers(m, RMSlist=rmslist) for i in rmslist: print(i) print(m.GetNumConformers()) confcrd = open(wdir + dpf + str(idx) + '.xyz', 'w') for i in range(0, m.GetNumConformers()): print("-------Conformer " + str(i) + "------") c = m.GetConformer(i) #---------- Write Input Variables ------------ dfname = dpf + str(idx + i) + '_train.dat' vdfname = dpf + str(idx + i) + '_valid.dat' edfname = dpf + str(idx + i) + '_test.dat' filename = wdir + dpf + str(idx + i) + '.ipt' f = open(filename, 'w') if not f: print('Cannot open file: ' + filename) DOF = (3 * m.GetNumAtoms() - 6) f.write('TSS=' + str(int(TSS * DOF)) + ' \n') f.write('VSS=0 \n') f.write('ESS=0 \n') f.write('LOT=' + LOT + ' \n') f.write('rdm=' + rdm + '\n') f.write('type=' + type + '\n') f.write('Temp=' + Temp + '\n') f.write('mem=' + MEM + '\n') f.write('SCF=' + SCF + '\n') f.write('dfname=' + dfname + ' \n') f.write('vdfname=' + vdfname + ' \n') f.write('edfname=' + edfname + ' \n') f.write('optimize=1 \n') f.write('frequency=1 \n') f.write('\n') if i is 0: f.write('#Conformer RMS from 0: 0.0 Mole: ' + fpf) else: f.write('#Conformer RMS from 0: ' + str(rmslist[i - 1]) + ' Mole: ' + fpf) f.write('\n\n') f.write('$coordinates\n') confcrd.write("\n") confcrd.write(str(m.GetNumAtoms()) + "\n") for j in range(0, m.GetNumAtoms()): pos = c.GetAtomPosition(j) typ = m.GetAtomWithIdx(j).GetSymbol() f.write(' ' + str(typ) + ' ' + str(typ) + ' ' + "{:.5f}".format(pos.x) + ' ' + "{:.5f}".format(pos.y) + ' ' + "{:.5f}".format(pos.z) + '\n') confcrd.write(typ + ' ' + str(pos.x) + ' ' + str(pos.y) + ' ' + str(pos.z) + '\n') f.write('&\n\n') f.write('$connectivity\n') f.write(' NONE\n') f.write('&\n\n') f.write('$normalmodes\n') f.write(' NEED TO COMPUTE\n') f.write('&\n\n')
def calculate_charges(smiles, resp_type, overwrite=False): """Run the charge calculation for the molecule defined by smiles Parameters ---------- smiles: str the smiles string of the desired molecule resp_type: str the type of RESP to perform (RESP1 or RESP2) overwrite: boolean, optional, default=False overwrite the previous results if they exist Returns ------- Raises ------ InvalidMoleculeError if the smiles string is invalid """ smiles = canonicalize_smiles(smiles) iupac_name = smiles_to_iupac(smiles) mol_path = Path.joinpath(LIB_PATH, iupac_name) if not mol_path.is_dir(): raise RESPLibraryError( f"The directory for the molecule: '{smiles}' with name " f"{iupac_name} does not exist. Please run " "resp_library.prepare_charge_calculation() first.") # Initialize RESP stuff resp_type = resp_type.upper() resp_path = Path.joinpath(mol_path, resp_type) if not overwrite: if Path.joinpath(resp_path, "results").is_dir(): raise RESPLibraryError( "It appears that this partial charge calculation " "has already been attempted or performed. If you wish " "to re-run this calculation, please remove the 'results', " "'structures', and 'esp_grids' directories before proceeding.") inp, log = _initialize_resp(resp_path) # Molecule definition assert inp['smiles'] == smiles assert inp['resp']['type'].upper() == resp_type # Parse the YAML file n_conformers = inp['conformer_generation']['n_conformers'] rms_threshold = inp['conformer_generation']['rms_threshold'] energy_threshold = inp['conformer_generation'][ 'energy_threshold'] # kJ/mol conformer_seed = inp['conformer_generation']['random_seed'] charge_constraints = inp['resp']['charge_constraints'] equality_constraints = inp['resp']['equality_constraints'] point_density = inp['resp']['point_density'] vdw_scale_factors = inp['resp']['vdw_scale_factors'] if resp_type == "RESP1": esp_method = "hf" esp_basis_set = "6-31g*" elif resp_type == "RESP2": esp_method = "pw6b95" esp_basis_set = "aug-cc-pV(D+d)Z" else: raise RESPLibraryError( "Invalid RESP type. Only 'RESP1' and 'RESP2' are supported.") # Create the molecule, add H's rdmol = Chem.MolFromSmiles(smiles) rdmol = Chem.AddHs(rdmol) # Get the net charge net_charge = Chem.rdmolops.GetFormalCharge(rdmol) log.log(f"The net charge is {net_charge}.") # Get the elements elements = [a.GetSymbol() for a in rdmol.GetAtoms()] vdw_radii = { elem_sym: ele.element_from_symbol(elem_sym).radius_bondi for elem_sym in elements } # Generate conformers cids = AllChem.EmbedMultipleConfs(rdmol, numConfs=500, pruneRmsThresh=rms_threshold, randomSeed=conformer_seed) AllChem.AlignMolConformers(rdmol) if len(cids) < n_conformers: raise ValueError( "Not enough conformers found. Please reduce the " "'rms_threshold' or the 'n_conformers'. For molecules " "with < 5 atoms it may be difficult to generate more " "than 2 conformers.") # Select n_conformers at random np.random.seed(conformer_seed) conformer_ids = np.random.choice([i for i in range(len(cids))], size=n_conformers, replace=False) remove = [i for i in range(len(cids)) if i not in conformer_ids] for idx in remove: rdmol.RemoveConformer(idx) # Renumber conformers for idx, c in enumerate(rdmol.GetConformers()): c.SetId(idx) optimized_p4molecules = [] optimized_energies = [] # For each conformer, geometry optimize with psi4 for conformer in rdmol.GetConformers(): p4mol = build_p4mol(elements, conformer.GetPositions(), net_charge) p4mol, energy = _geometry_optimize(p4mol, resp_type) # Save optimized structure and energy optimized_p4molecules.append(p4mol) optimized_energies.append(energy) # Extract optimized coordinates; update coordinates RDKIT molecule coords = p4mol.geometry().to_array() * BOHR_TO_ANGSTROM for i in range(rdmol.GetNumAtoms()): x, y, z = coords[i] conformer.SetAtomPosition(i, Point3D(x, y, z)) # Check energies and remove high energy conformers _check_relative_conformer_energies(rdmol, optimized_p4molecules, optimized_energies, energy_threshold, log) # Align conformers for easier visual comparison _save_aligned_conformers(rdmol, log) # Save the conformers used for resp Path("structures/optimized_geometries.pdb").write_text( Chem.rdmolfiles.MolToPDBBlock(rdmol)) log.log( "Wrote the final optimized gemoetries to 'optimized_geometries.pdb'.\n\n" ) # Finally we do multi-conformer RESP pcm = False charges = _perform_resp( optimized_p4molecules, charge_constraints, equality_constraints, esp_method, esp_basis_set, pcm, point_density, vdw_radii, vdw_scale_factors, log, ) _write_results(elements, charges, equality_constraints, "vacuum", log) if resp_type == "RESP2": pcm = True charges = _perform_resp( optimized_p4molecules, charge_constraints, equality_constraints, esp_method, esp_basis_set, pcm, point_density, vdw_radii, vdw_scale_factors, log, ) _write_results(elements, charges, equality_constraints, "pcm", log) log.close()
def superpos(file_name_prefix, mol_list, amsol_dir, only_planar_rings=False, pattern_file=None, debug=False): """superposed conformational ensembles on rings or smarts patterns ARGUMENTS: - file_name_prefix: prefix of file name - mol_list: list with ensembles of molecules - amsol_dir: directory under which xx*cav_CYC lies (bad name for variable) - only_planar_rings: superimpose only on planar rings (optional) - zinc_title: (optional) zinc titles have to be treated diffrently, because they are already "pre-adjusted" (they are not unique when you download them) - pattern_file: (optional) file with smarts for pattern matching - debug: (optinal) gives more output RETURNS: None (output will be saved as files) """ #rot=DoubleArray(9) #trans=DoubleArray(3) if pattern_file != None: pattern_matching = True smarts_file = open(pattern_file, 'r') rms_cut_off = 0.001 # fewer atoms in pattern matching -> must be more accurate, otherwise mol2db screws up print("pattern matching") else: pattern_matching = False print("ring matching") rms_cut_off = 0.015 #used to be 0.01 with OE kits. Does 0.025 really work for mol2db? solv_dict = {} output_dict = {} #pattern matching is currently not working if pattern_matching: #some options from the original match.py file usa = 1 asym = 1 exph = 0 max = 0 smartsfile = None verbose = 0 qmolfile = None kekule = 0 atom_expr = None bond_expr = None smartslist = None qmollist = None nowarn = 0 smarts_list = chemistry.get_smartslist_file(smarts_file) patlist = chemistry.get_patlist(smarts_list, atom_expr, bond_expr, max) #out_file = file_name_prefix + '_om_mult_rings.mol2' out_file_tmp = file_name_prefix + '_mult_rings_tmp.mol2' solv_table_file_name = amsol_dir + '/' + file_name_prefix + '_amsol_cav_CYC/' + file_name_prefix + '_CYC.solv' #print solv_table_file_name new_solv_table_file_name = amsol_dir + '/' + file_name_prefix + '_amsol_cav_CYC/' + file_name_prefix + '_mult_rings_CYC.solv' solv_table_file = open(solv_table_file_name, 'r') #new_solv_talbe_file = open(new_solv_table_file_name, 'w') #read solvation_table, this might have to be changed, if tables get too long to be held in memory solv_dict = read_solv_table(solv_table_file) #print solv_dict.keys() for mol in mol_list: #loop over all molecule esembles in the list title = mol.GetProp("_Name") print(title) output_dict[title] = [] #go only on, if solvation energy calculation was sucessfull if title in solv_dict: if not pattern_matching: #get the atoms that have to be superimposed if only_planar_rings: #currently not working match_list, count = super_impose_only_on_planar_rings( ringlist, mol, count) #print count else: match_list, count = fuse_rings(mol, debug) else: #currently not working #print title match_list, count = get_matches(mol, patlist) #print match_list refmol = OECreateOEMol() super_pos_counter = 0 #print count, ' count' cids = [x.GetId() for x in mol.GetConformers()] #get all conformer ids #if there is only one conformer, I don't need to align them if len(cids) == 1: count = 1 #no need to loop over all rings of only one conformer super_pos_counter = 0 for i in range(0, count): #loop over ring systems print(match_list, 'match_list') print('ring id:', i) saved = 0 #write out amsol charges if this easily possible #align molecules on best reference molecule found_ref = [] for conf_id in cids: if conf_id not in found_ref: found_ref.append(conf_id) align_list = [conf_id] #align onto current conformer for cid_2 in cids: #find other conformers to align if cid_2 not in found_ref: align_list.append( cid_2 ) #align these conformers in this round rms_values = [] AllChem.AlignMolConformers(mol, confIds=align_list, atomIds=match_list[i][0], RMSlist=rms_values) if debug: print('cids:', cids, 'len rms_values', len(rms_values), rms_values) print('align_list', align_list) found_this_round = [conf_id] for i_rms in range( 0, len(rms_values )): #find conformes that aligned well #first entry in rms_values list corresponds to alignement of 2nd entry in align list on first entry in align list and so on if rms_values[i_rms] <= rms_cut_off: found_ref.append(align_list[i_rms + 1]) found_this_round.append(align_list[i_rms + 1]) print('found this round:', found_this_round) #write out aligned molecules super_pos_counter = super_pos_counter + 1 print("super_pos_counter", super_pos_counter) #for zinc, you have to adjust the titles before => don't destroy this adjustment here new_title = gen_new_title(title, super_pos_counter) #mol.SetProp("_Name", new_title) print(mol.GetProp("_Name"), "name >----------") #order atoms for mol2db tmp_mol, index_list = OrderMol2db( mol, match_list[i][0]) tmp_mol.SetProp("_Name", title) print(index_list, "index_list") #reorder solv table #new_first_line = solv_dict[title][0].replace(title, new_title) new_solv_table_file_name = amsol_dir + '/' + file_name_prefix + '_amsol_cav_CYC/' + new_title + '_mult_rings_CYC.solv' new_solv_table_file = open(new_solv_table_file_name, 'w') new_solv_table_file.write(solv_dict[title][0]) for index in index_list: #sort solvation table according to atom indexes new_solv_table_file.write( solv_dict[title][1][index]) new_solv_table_file.close() #problem with mol2db: aligned is not accurate enough, does not find identical atoms #get coordinates from ref conformer #use them for matched conformers if len( found_this_round ) > 1: #no need to resort if only one molecule in the list coord_list = [] for atom_idx in match_list[i][0]: pt1 = mol.GetConformer( found_this_round[0]).GetAtomPosition( atom_idx) coord_list.append(pt1) out_file = new_title + '_om_mult_rings.mol2' output_dict[title].append(new_title) for cid_3 in found_this_round: #saved = saved + 1 if len( found_this_round ) > 1: #update coordinates, no need to do if only one molecule in the list for pos in range(0, len(coord_list) ): #atoms are already sorted tmp_mol.GetConformer( cid_3).SetAtomPosition( pos, coord_list[pos]) #get amsol charges to write into mol2 file for atom in tmp_mol.GetAtoms(): pos_unsorted_solv_table = index_list[ atom.GetIdx()] #print (pos_unsorted_solv_table, atom.GetIdx()) charge = solv_dict[title][1][ pos_unsorted_solv_table].split()[0].strip( ) #print (charge) atom.SetProp('_GasteigerCharge', charge) Mol2Writer.MultiMolToMol2File([tmp_mol], out_file, confId=cid_3, addHs=True, append=True, addCharges=False) #print ('saved: ', saved, 'ring id:', i) #saved = 0 #new_solv_talbe_file.close() solv_table_file.close() print("done superpose_conf_ensemble!") return output_dict
def align_conformers(mol, clust_ids): rmslist = [] AllChem.AlignMolConformers(mol, confIds=clust_ids, RMSlist=rmslist) return rmslist
def confsearchsmiles(name, smiles, Ew, NCONF, cmp, eout, optd): # Create RDKit MOL m = Chem.MolFromSmiles(smiles) print('Working on:', name, 'Heavyatoms(', m.GetNumHeavyAtoms(), ')') if m.GetNumHeavyAtoms() > 50: print('Skipping ' + name + ': more than 50 atoms.') return # Add hydrogens m = Chem.AddHs(m) # Embed 50 random conformations cids = AllChem.EmbedMultipleConfs(m, useExpTorsionAnglePrefs=True, useBasicKnowledge=True, numConfs=NCONF) print(' -Confs:', len(cids), 'Total atoms:', m.GetNumAtoms()) if len(cids) < 1: print('Skipping ' + name + ': not enough confs embedded.') return # Classical OPT for cid in cids: _ = AllChem.MMFFOptimizeMolecule(m, confId=cid) # Locate clusters dmat = AllChem.GetConformerRMSMatrix(m, prealigned=False) rms_clusters = Butina.ClusterData(dmat, m.GetNumConformers(), 0.3, isDistData=True, reordering=True) keep_ids = [i[0] for i in rms_clusters] mask = np.ones(len(cids), dtype=bool) mask[keep_ids] = 0 # Remove conformers RemoveConformers(m, mask) # Get new cids cids = [x.GetId() for x in m.GetConformers()] # Calculate energies E, V = cmp.energy_rdkit_conformers(m, cids) E = hdt.hatokcal * (E - E.min()) # Build index < Ew kcal/mol mask = np.ones(E.size, dtype=bool) if np.where(E < 5.0)[0].size > 25: mask[np.where(E < 5.0)[0]] = 0 elif E.size > 25: mask[np.argsort(E)[25]] = 0 else: mask = np.zeros(E.size, dtype=bool) # Remove conformers RemoveConformers(m, mask) # Get new cids cids = [x.GetId() for x in m.GetConformers()] if len(cids) < 1: print('Skipping ' + name + ': not enough confs to continue.') return # ANI OPT ochk = np.zeros(len(cids), dtype=np.int64) for i, cid in enumerate(cids): #print(' -Optimizing confid:', cid) ochk[i] = cmp.optimize_rdkit_molecule(m, cid=cid, fmax=0.001) # Align conformers rmslist = [] AllChem.AlignMolConformers(m, RMSlist=rmslist, maxIters=200) #print(rmslist) # Get energies and std dev. E, V = cmp.energy_rdkit_conformers(m, cids) E = hdt.hatokcal * E V = hdt.hatokcal * V # Sort by energy (low to high) idx = np.argsort(E) X, S = pya.__convert_rdkitconfs_to_nparr__(m) Xs = X[idx] Es = E[idx] Vs = V[idx] for cid, x in zip(cids, Xs): natm = m.GetNumAtoms() conf = m.GetConformer(cid) for i in range(natm): conf.SetAtomPosition( i, [float(x[i][0]), float(x[i][1]), float(x[i][2])]) # Write out conformations sdf = AllChem.SDWriter(optd + name + '.sdf') for cid in cids: sdf.write(m, confId=cid) sdf.close() # Write out energy and sigma data eout.write(name + ' ' + str(Es) + ' ' + str(Vs) + ' ' + str(ochk) + '\n') eout.flush() return