def __init__(self, mol_config: MolConfig, max_steps=200): super(ConformerEnv, self).__init__() logging.info('initializing conformer environment') self.config = copy.deepcopy(mol_config) self.max_steps = max_steps self.total_reward = 0 self.current_step = 0 self.step_info = {} self.episode_info = {} self.mol = self.config.mol # set mol to have exactly one conformer if self.mol.GetNumConformers() != 1: logging.warn( "Input molecule to environment should have exactly one conformer, none or more than one detected." ) self.mol.RemoveAllConformers() if Chem.EmbedMolecule(self.mol, randomSeed=self.config.seed, useRandomCoords=True) == -1: raise Exception( 'Unable to embed molecule with conformer using rdkit') self.conf = self.mol.GetConformer() nonring, ring = TorsionFingerprints.CalculateTorsionLists(self.mol) self.nonring = [list(atoms[0]) for atoms, ang in nonring] self.reset()
def butina_clustering_m(rdkit_mol, difference_matrix='tfd', threshold=0.001): """ Clustering conformers with RDKit's Butina algorithem """ # calculate difference matrix if difference_matrix.lower() == 'tfd': diffmat = TorsionFingerprints.GetTFDMatrix(rdkit_mol) if difference_matrix.lower() == 'rms': diffmat = AllChem.GetConformerRMSMatrix(rdkit_mol, prealigned=False) # cluster conformers num_confs = rdkit_mol.GetNumConformers() clt = Butina.ClusterData(diffmat, num_confs, threshold, isDistData=True, reordering=True) # new conformers centroid_idx = [c[0] for c in clt] # centroid indexes. new_rdkit_mol = copy.deepcopy(rdkit_mol) new_rdkit_mol.RemoveAllConformers() for idx in centroid_idx: centroid_conf = rdkit_mol.GetConformer(idx) new_rdkit_mol.AddConformer(centroid_conf, assignId=True) del rdkit_mol # delete old mol, is this nessesary? return new_rdkit_mol
def TFD_for_oemols(ref_mol, query_mol): """ This is the TFD_for_oemols script. It makes use of RDKit's TFD calculation and the function rdmol_from_oemol. TFD_for_oemols takes in two OEMOLs. It does not matter which mol is the ref mol and which is the querymol. TFD metric is the same no matter which is the ref and which is the query. First, OEmols are made RDKit compatible. Then, TFD is computed and returned using RDKit's TorsionFingerprints Module. Takes one input reference mol2 and one input query mol2. Args: ref_mol (oemol) An oemol that has already been read in. query_mol (oemol) An oemol that has already been read in. Returns: tfd (float) The torsion fingerprint deviation between ref and query. """ # converts refmol to one readable by RDKit rrdmol2 = rdmol_from_oemol(ref_mol) # converts querymol to one readable by RDKit qrdmol2 = rdmol_from_oemol(query_mol) # If there was a mistake in the conversion process, return -1 if (Chem.MolToSmiles(qrdmol2) != Chem.MolToSmiles(rrdmol2)): tfd = -1 else: # calculates the TFD try: tfd = TorsionFingerprints.GetTFDBetweenMolecules(rrdmol2, qrdmol2) except IndexError: tfd = 0 return tfd
def prune_last_conformer( mol: Chem.Mol, tfd_thresh: float, energies: List[float]) -> Tuple[Chem.Mol, List[float]]: """Prunes the last conformer of the molecule. If no conformers in `mol` have a TFD (Torsional Fingerprint Deviation) with the last conformer of less than `tfd_thresh`, the last conformer is kept. Otherwise, the lowest energy conformer with TFD less than `tfd_thresh` is kept and all other conformers are discarded. Parameters ---------- mol : RDKit Mol The molecule to be pruned. The conformers in the molecule should be ordered by ascending energy. tfd_thresh : float The minimum threshold for TFD between conformers. energies : list of float A list of all the energies of the conformers in `mol`. Returns ------- mol : RDKit Mol The updated molecule after pruning, with conformers sorted by ascending energy. energies : list of float A list of all the energies of the conformers in `mol` after pruning and sorting by ascending energy. """ if tfd_thresh < 0 or mol.GetNumConformers() <= 1: return mol, energies idx = bisect.bisect(energies[:-1], energies[-1]) tfd = TorsionFingerprints.GetTFDBetweenConformers( mol, range(0, mol.GetNumConformers() - 1), [mol.GetNumConformers() - 1], useWeights=False) tfd = np.array(tfd) # if lower energy conformer is within threshold, drop new conf if not np.all(tfd[:idx] >= tfd_thresh): energies = energies[:-1] mol.RemoveConformer(mol.GetNumConformers() - 1) return mol, energies else: keep = list(range(0, idx)) keep.append(mol.GetNumConformers() - 1) keep += [ x for x in range(idx, mol.GetNumConformers() - 1) if tfd[x] >= tfd_thresh ] new = Chem.Mol(mol) new.RemoveAllConformers() for i in keep: conf = mol.GetConformer(i) new.AddConformer(conf, assignId=True) return new, [energies[i] for i in keep]
def tfd_matrix(mol: Chem.Mol) -> np.array: """Calculates the TFD matrix for all conformers in a molecule. """ tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=False) n = int(np.sqrt(len(tfd) * 2)) + 1 idx = np.tril_indices(n, k=-1, m=n) matrix = np.zeros((n, n)) matrix[idx] = tfd matrix += np.transpose(matrix) return matrix
def cluster_conformers(mol, mode="RMSD", threshold=2.0): if mode == "TFD": dmat = TorsionFingerprints.GetTFDMatrix(mol) else: dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=False) rms_clusters = Butina.ClusterData(dmat, mol.GetNumConformers(), threshold, isDistData=True, reordering=True) return rms_clusters
def testTorsionFingerprintsColinearBonds(self): # test that single bonds adjacent to triple bonds are ignored mol = Chem.MolFromSmiles('CCC#CCC') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, ignoreColinearBonds=True) self.assertEqual(len(tors_list), 0) weights = TorsionFingerprints.CalculateTorsionWeights(mol, ignoreColinearBonds=True) self.assertEqual(len(weights), 0) # test that they are not ignored, but alternative atoms searched for tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists( mol, ignoreColinearBonds=False) self.assertEqual(len(tors_list), 1) self.assertEqual(tors_list[0][0][0], (0, 1, 4, 5)) weights = TorsionFingerprints.CalculateTorsionWeights(mol, ignoreColinearBonds=False) self.assertEqual(len(weights), 1) # test that single bonds adjacent to terminal triple bonds are always ignored mol = Chem.MolFromSmiles('C#CCC') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, ignoreColinearBonds=True) self.assertEqual(len(tors_list), 0) tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists( mol, ignoreColinearBonds=False) self.assertEqual(len(tors_list), 0)
def testTorsionFingerprintsAtomReordering(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand.pdb') ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol1 = Chem.MolFromPDBFile(refFile) mol1 = AllChem.AssignBondOrdersFromTemplate(ref, mol1) refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand_reordered.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol1, mol2) self.assertEqual(tfd, 0.0)
def calc_tfd(ref_mol, query_mol): """ Calculate Torsion Fingerprint Deviation between two molecular structures. RDKit is required for TFD calculation. References ---------- Modified from the following code: https://github.com/MobleyLab/benchmarkff/03_analysis/compare_ffs.py TFD reference: https://pubs.acs.org/doi/10.1021/ci2002318 Parameters ---------- ref_mol : RDKit RDMol query_mol : RDKit RDMol Returns ------- tfd : float Torsion Fingerprint Deviation between ref and query molecules """ # check if the molecules are the same # tfd requires the two molecules must be instances of the same molecule rsmiles = Chem.MolToSmiles(ref_mol) qsmiles = Chem.MolToSmiles(query_mol) if rsmiles != qsmiles: print(f"- WARNING: The reference mol {ref_mol.GetProp('_Name')} and " f"query mol {query_mol.GetProp('_Name')} do NOT have the same " f"SMILES strings as determined by RDKit MolToSmiles. " f"\n {rsmiles}\n {qsmiles}") tfd = np.nan # calculate the TFD else: try: tfd = TorsionFingerprints.GetTFDBetweenMolecules( ref_mol, query_mol) # triggered for molecules such as urea except IndexError: print( f"- Error calculating TFD on molecule {ref_mol.GetProp('_Name')}." " Possibly no non-terminal rotatable bonds found.") tfd = np.nan return tfd
def rdkit_tfd(mol): kernel = TorsionFingerprints.GetTFDMatrix(mol) return kernel
for mol in mols: if mol != None: mol = Chem.AddHs(mol) conf = AllChem.EmbedMultipleConfs(mol, numConfs=int(N), pruneRmsThresh=float(RMS), useExpTorsionAnglePrefs=True, useBasicKnowledge=True, numThreads=int(nbthread)) if len(conf) > 0: Chem.rdMolAlign.AlignMolConformers(mol) AllChem.UFFOptimizeMoleculeConfs(mol, numThreads=int(nbthread)) ## Here new code to discard identical conformers around an axis of symmetry (not supported by pruneRmsThresh in the previous fct) matrix = TorsionFingerprints.GetTFDMatrix(mol, useWeights=False, maxDev='equal', symmRadius=2, ignoreColinearBonds=True) conf_clusters = Butina.ClusterData(matrix, len(conf), cutoff, True) confnb = 1 for cluster in conf_clusters: writer = Chem.SDWriter(output_folder + "/" + mol.GetProp("_Name") + "_conf_" + str(confnb) + ".sdf") writer.write(mol, confId=cluster[0]) # output only centroid writer.close() confnb += 1 else: # not able to make conformers print("Could not generate any conformers for %s" % (mol.GetProp("_Name")))
def testGithub4720(self): # exceptions with highly-coordinated atoms mol = Chem.MolFromSmiles('S(F)(F)(F)(F)(Cl)c1ccccc1') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists( mol) self.assertEqual(len(tors_list), 1)
def calculate(self, mol): yield tuple( Torsion(*mol.get_atoms(atoms[0])) for atoms, _ in (TorsionFingerprints.CalculateTorsionLists(mol.to_rdkit_mol())[0]))
def calc_tfd(ref_mol, query_mol, conf_id_tag): """ Calculate Torsion Fingerprint Deviation between two molecular structures. RDKit is required for TFD calculation. References ---------- Modified from the following code: https://github.com/MobleyLab/off-ffcompare TFD reference: https://pubs.acs.org/doi/10.1021/ci2002318 Parameters ---------- ref_mol : OEMol query_mol : OEMol conf_id_tag : string label of the SD tag that should be the same for matching conformers in different files Returns ------- tfd : float Torsion Fingerprint Deviation between ref and query molecules """ # convert refmol to one readable by RDKit ref_rdmol = reader.rdmol_from_oemol(ref_mol) # convert querymol to one readable by RDKit que_rdmol = reader.rdmol_from_oemol(query_mol) # check if the molecules are the same # tfd requires the two molecules must be instances of the same molecule rsmiles = Chem.MolToSmiles(ref_rdmol) qsmiles = Chem.MolToSmiles(que_rdmol) if rsmiles != qsmiles: print(f"- WARNING: The reference mol \'{ref_mol.GetTitle()}\' and " f"query mol \'{query_mol.GetTitle()}\' do NOT have the same " "SMILES strings as determined by RDKit MolToSmiles. It is " "possible that they did not have matching SMILES even before " "conversion from OEMol to RDKit mol. Listing in order the " "QCArchive SMILES string, RDKit SMILES for ref mol, and " "RDKit SMILES for query mol:" f"\n {oechem.OEGetSDData(ref_mol, conf_id_tag)}" f"\n {rsmiles}\n {qsmiles}") tfd = np.nan # calculate the TFD else: try: tfd = TorsionFingerprints.GetTFDBetweenMolecules( ref_rdmol, que_rdmol) # triggered for molecules such as urea except IndexError: print( f"- Error calculating TFD on molecule '{ref_mol.GetTitle()}'." " Possibly no non-terminal rotatable bonds found.") tfd = np.nan return tfd
def get_tfd(mol): return TorsionFingerprints.GetTFDBetweenConformers(mol, [0], [1])
def get_tfd(source_1, source_2, file_in, seed): confab_tfd_uniform = [] confab_tfd_EI = [] confab_tfd_LCB = [] bo_tfd_confab = [] bo_tfd_uniform = [] bo_tfd_EI = [] bo_tfd_LCB = [] bo_check_EI = [] bo_check_LCB = [] confab_check_EI = [] confab_check_LCB = [] bo_target = [] confab_target = [] for i in range(len(file_in)): print(file_in.iloc[i,0]) if file_in.iloc[i,2] == "Yes": if "200" in str(file_in.iloc[i,1]): basenames = file_in.iloc[i,0] + '/' + file_in.iloc[i,1] + '.sdf' inputs = os.path.join(source_2, basenames) ref_mol = Chem.SDMolSupplier(inputs) else: basenames = file_in.iloc[i,0] +'/' + file_in.iloc[i,1] + '.sdf' inputs = os.path.join(source_1, basenames) ref_mol = Chem.SDMolSupplier(inputs) bo_target.append(file_in.iloc[i,0]) # read EI_bayes EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) EI_input = os.path.join(source_1, EI_bases) EI_mol = Chem.SDMolSupplier(EI_input) # read LCB_bayes LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) LCB_input = os.path.join(source_1, LCB_bases) LCB_mol = Chem.SDMolSupplier(LCB_input) # read uniform uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) uniform_input = os.path.join(source_1, uniform_bases) uniform_mol = Chem.SDMolSupplier(uniform_input) # read confab confab_bases = file_in.iloc[i,0] + "/confab.sdf" confab_input = os.path.join(source_1, confab_bases) confab_mol = Chem.SDMolSupplier(confab_input) bo_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0])) bo_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0])) bo_tfd_confab.append(TFP.GetTFDBetweenMolecules(confab_mol[0], ref_mol[0])) bo_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0])) else: basenames = file_in.iloc[i,0] + '/confab.sdf' inputs = os.path.join(source_1, basenames) ref_mol = Chem.SDMolSupplier(inputs) confab_target.append(file_in.iloc[i,0]) # read EI_bayes EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) EI_input = os.path.join(source_1, EI_bases) EI_mol = Chem.SDMolSupplier(EI_input) # read LCB_bayes LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) LCB_input = os.path.join(source_1, LCB_bases) LCB_mol = Chem.SDMolSupplier(LCB_input) # read uniform uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) uniform_input = os.path.join(source_1, uniform_bases) uniform_mol = Chem.SDMolSupplier(uniform_input) confab_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0])) confab_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0])) confab_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0])) bo_data = pd.DataFrame({"target": bo_target, "Uniform": bo_tfd_uniform, "EI": bo_tfd_EI, "LCB": bo_tfd_LCB, "Confab": bo_tfd_confab, "N_rot": 5}, columns = ["target", "Uniform", "EI", "LCB", "Confab", "N_rot"]) confab_data = pd.DataFrame({'target': confab_target, "Uniform": confab_tfd_uniform, "EI": confab_tfd_EI, "LCB": confab_tfd_LCB, "N_rot":5}, columns = ["target","Uniform","EI","LCB", "N_rot"]) return confab_data, bo_data
def testTorsionFingerprints(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1DWD_ligand.pdb') ref = Chem.MolFromSmiles( 'NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol = Chem.MolFromPDBFile(refFile) mol = AllChem.AssignBondOrdersFromTemplate(ref, mol) # the torsion lists tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) self.assertEqual(len(tors_list), 11) self.assertEqual(len(tors_list_rings), 4) self.assertAlmostEqual(tors_list[-1][1], 180.0, 4) tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, maxDev='spec') self.assertAlmostEqual(tors_list[-1][1], 90.0, 4) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionLists, mol, maxDev='test') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, symmRadius=0) self.assertEqual(len(tors_list[0][0]), 2) # the weights weights = TorsionFingerprints.CalculateTorsionWeights(mol) self.assertAlmostEqual(weights[4], 1.0) self.assertEqual(len(weights), len(tors_list + tors_list_rings)) weights = TorsionFingerprints.CalculateTorsionWeights(mol, 15, 14) self.assertAlmostEqual(weights[3], 1.0) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionWeights, mol, 15, 3) # the torsion angles tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) torsions = TorsionFingerprints.CalculateTorsionAngles(mol, tors_list, tors_list_rings) self.assertEqual(len(weights), len(torsions)) self.assertAlmostEqual(torsions[2][0][0], 232.5346, 4) # the torsion fingerprint deviation tfd = TorsionFingerprints.CalculateTFD(torsions, torsions) self.assertAlmostEqual(tfd, 0.0) refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1PPC_ligand.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) torsions2 = TorsionFingerprints.CalculateTorsionAngles(mol2, tors_list, tors_list_rings) weights = TorsionFingerprints.CalculateTorsionWeights(mol) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2, weights=weights) self.assertAlmostEqual(tfd, 0.0691, 4) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2) self.assertAlmostEqual(tfd, 0.1115, 4) # the wrapper functions tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, mol2) self.assertAlmostEqual(tfd, 0.0691, 4) mol.AddConformer(mol2.GetConformer(), assignId=True) mol.AddConformer(mol2.GetConformer(), assignId=True) tfd = TorsionFingerprints.GetTFDBetweenConformers(mol, confIds1=[0], confIds2=[1, 2]) self.assertEqual(len(tfd), 2) self.assertAlmostEqual(tfd[0], 0.0691, 4) tfdmat = TorsionFingerprints.GetTFDMatrix(mol) self.assertEqual(len(tfdmat), 3)
if pred is None: # in case of failure entry2RMSD[refEntry] = '' entry2TFD[refEntry] = '' continue predEntry = pred.GetProp('_Name') assert(refEntry == predEntry) try: rmsd = AllChem.GetBestRMS(ref, pred) except: rmsd = '' try: m = Chem.MolFromSmiles(Chem.MolToSmiles(ref)) ref = AllChem.AssignBondOrdersFromTemplate(m, ref) pred = AllChem.AssignBondOrdersFromTemplate(m, pred) tfd = TorsionFingerprints.GetTFDBetweenMolecules(ref, pred) except: tfd = '' entry2RMSD[refEntry] = rmsd entry2TFD[refEntry] = tfd # See https://baoilleach.blogspot.com/2010/11/automorphisms-isomorphisms-symmetry.html print("Entry,SMILES,RMSD,Bond error,Angle error,Torsion error,TFD,Stereo correct") for ref, pred in zip(pybel.readfile("sdf", refFileName), pybel.readfile("sdf", predFileName)): refMol = ref.OBMol predMol = pred.OBMol refEntry = refMol.GetTitle() predEntry = predMol.GetTitle() assert refEntry == predEntry