def TFD_for_oemols(ref_mol, query_mol): """ This is the TFD_for_oemols script. It makes use of RDKit's TFD calculation and the function rdmol_from_oemol. TFD_for_oemols takes in two OEMOLs. It does not matter which mol is the ref mol and which is the querymol. TFD metric is the same no matter which is the ref and which is the query. First, OEmols are made RDKit compatible. Then, TFD is computed and returned using RDKit's TorsionFingerprints Module. Takes one input reference mol2 and one input query mol2. Args: ref_mol (oemol) An oemol that has already been read in. query_mol (oemol) An oemol that has already been read in. Returns: tfd (float) The torsion fingerprint deviation between ref and query. """ # converts refmol to one readable by RDKit rrdmol2 = rdmol_from_oemol(ref_mol) # converts querymol to one readable by RDKit qrdmol2 = rdmol_from_oemol(query_mol) # If there was a mistake in the conversion process, return -1 if (Chem.MolToSmiles(qrdmol2) != Chem.MolToSmiles(rrdmol2)): tfd = -1 else: # calculates the TFD try: tfd = TorsionFingerprints.GetTFDBetweenMolecules(rrdmol2, qrdmol2) except IndexError: tfd = 0 return tfd
def testTorsionFingerprints(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1DWD_ligand.pdb') ref = Chem.MolFromSmiles( 'NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol = Chem.MolFromPDBFile(refFile) mol = AllChem.AssignBondOrdersFromTemplate(ref, mol) # the torsion lists tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) self.assertEqual(len(tors_list), 11) self.assertEqual(len(tors_list_rings), 4) self.assertAlmostEqual(tors_list[-1][1], 180.0, 4) tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, maxDev='spec') self.assertAlmostEqual(tors_list[-1][1], 90.0, 4) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionLists, mol, maxDev='test') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, symmRadius=0) self.assertEqual(len(tors_list[0][0]), 2) # the weights weights = TorsionFingerprints.CalculateTorsionWeights(mol) self.assertAlmostEqual(weights[4], 1.0) self.assertEqual(len(weights), len(tors_list + tors_list_rings)) weights = TorsionFingerprints.CalculateTorsionWeights(mol, 15, 14) self.assertAlmostEqual(weights[3], 1.0) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionWeights, mol, 15, 3) # the torsion angles tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) torsions = TorsionFingerprints.CalculateTorsionAngles(mol, tors_list, tors_list_rings) self.assertEqual(len(weights), len(torsions)) self.assertAlmostEqual(torsions[2][0][0], 232.5346, 4) # the torsion fingerprint deviation tfd = TorsionFingerprints.CalculateTFD(torsions, torsions) self.assertAlmostEqual(tfd, 0.0) refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1PPC_ligand.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) torsions2 = TorsionFingerprints.CalculateTorsionAngles(mol2, tors_list, tors_list_rings) weights = TorsionFingerprints.CalculateTorsionWeights(mol) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2, weights=weights) self.assertAlmostEqual(tfd, 0.0691, 4) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2) self.assertAlmostEqual(tfd, 0.1115, 4) # the wrapper functions tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, mol2) self.assertAlmostEqual(tfd, 0.0691, 4) mol.AddConformer(mol2.GetConformer(), assignId=True) mol.AddConformer(mol2.GetConformer(), assignId=True) tfd = TorsionFingerprints.GetTFDBetweenConformers(mol, confIds1=[0], confIds2=[1, 2]) self.assertEqual(len(tfd), 2) self.assertAlmostEqual(tfd[0], 0.0691, 4) tfdmat = TorsionFingerprints.GetTFDMatrix(mol) self.assertEqual(len(tfdmat), 3)
def testTorsionFingerprintsAtomReordering(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand.pdb') ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol1 = Chem.MolFromPDBFile(refFile) mol1 = AllChem.AssignBondOrdersFromTemplate(ref, mol1) refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand_reordered.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol1, mol2) self.assertEqual(tfd, 0.0)
def calc_tfd(ref_mol, query_mol): """ Calculate Torsion Fingerprint Deviation between two molecular structures. RDKit is required for TFD calculation. References ---------- Modified from the following code: https://github.com/MobleyLab/benchmarkff/03_analysis/compare_ffs.py TFD reference: https://pubs.acs.org/doi/10.1021/ci2002318 Parameters ---------- ref_mol : RDKit RDMol query_mol : RDKit RDMol Returns ------- tfd : float Torsion Fingerprint Deviation between ref and query molecules """ # check if the molecules are the same # tfd requires the two molecules must be instances of the same molecule rsmiles = Chem.MolToSmiles(ref_mol) qsmiles = Chem.MolToSmiles(query_mol) if rsmiles != qsmiles: print(f"- WARNING: The reference mol {ref_mol.GetProp('_Name')} and " f"query mol {query_mol.GetProp('_Name')} do NOT have the same " f"SMILES strings as determined by RDKit MolToSmiles. " f"\n {rsmiles}\n {qsmiles}") tfd = np.nan # calculate the TFD else: try: tfd = TorsionFingerprints.GetTFDBetweenMolecules( ref_mol, query_mol) # triggered for molecules such as urea except IndexError: print( f"- Error calculating TFD on molecule {ref_mol.GetProp('_Name')}." " Possibly no non-terminal rotatable bonds found.") tfd = np.nan return tfd
def get_tfd(source_1, source_2, file_in, seed): confab_tfd_uniform = [] confab_tfd_EI = [] confab_tfd_LCB = [] bo_tfd_confab = [] bo_tfd_uniform = [] bo_tfd_EI = [] bo_tfd_LCB = [] bo_check_EI = [] bo_check_LCB = [] confab_check_EI = [] confab_check_LCB = [] bo_target = [] confab_target = [] for i in range(len(file_in)): print(file_in.iloc[i,0]) if file_in.iloc[i,2] == "Yes": if "200" in str(file_in.iloc[i,1]): basenames = file_in.iloc[i,0] + '/' + file_in.iloc[i,1] + '.sdf' inputs = os.path.join(source_2, basenames) ref_mol = Chem.SDMolSupplier(inputs) else: basenames = file_in.iloc[i,0] +'/' + file_in.iloc[i,1] + '.sdf' inputs = os.path.join(source_1, basenames) ref_mol = Chem.SDMolSupplier(inputs) bo_target.append(file_in.iloc[i,0]) # read EI_bayes EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) EI_input = os.path.join(source_1, EI_bases) EI_mol = Chem.SDMolSupplier(EI_input) # read LCB_bayes LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) LCB_input = os.path.join(source_1, LCB_bases) LCB_mol = Chem.SDMolSupplier(LCB_input) # read uniform uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) uniform_input = os.path.join(source_1, uniform_bases) uniform_mol = Chem.SDMolSupplier(uniform_input) # read confab confab_bases = file_in.iloc[i,0] + "/confab.sdf" confab_input = os.path.join(source_1, confab_bases) confab_mol = Chem.SDMolSupplier(confab_input) bo_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0])) bo_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0])) bo_tfd_confab.append(TFP.GetTFDBetweenMolecules(confab_mol[0], ref_mol[0])) bo_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0])) else: basenames = file_in.iloc[i,0] + '/confab.sdf' inputs = os.path.join(source_1, basenames) ref_mol = Chem.SDMolSupplier(inputs) confab_target.append(file_in.iloc[i,0]) # read EI_bayes EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) EI_input = os.path.join(source_1, EI_bases) EI_mol = Chem.SDMolSupplier(EI_input) # read LCB_bayes LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) LCB_input = os.path.join(source_1, LCB_bases) LCB_mol = Chem.SDMolSupplier(LCB_input) # read uniform uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) uniform_input = os.path.join(source_1, uniform_bases) uniform_mol = Chem.SDMolSupplier(uniform_input) confab_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0])) confab_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0])) confab_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0])) bo_data = pd.DataFrame({"target": bo_target, "Uniform": bo_tfd_uniform, "EI": bo_tfd_EI, "LCB": bo_tfd_LCB, "Confab": bo_tfd_confab, "N_rot": 5}, columns = ["target", "Uniform", "EI", "LCB", "Confab", "N_rot"]) confab_data = pd.DataFrame({'target': confab_target, "Uniform": confab_tfd_uniform, "EI": confab_tfd_EI, "LCB": confab_tfd_LCB, "N_rot":5}, columns = ["target","Uniform","EI","LCB", "N_rot"]) return confab_data, bo_data
def calc_tfd(ref_mol, query_mol, conf_id_tag): """ Calculate Torsion Fingerprint Deviation between two molecular structures. RDKit is required for TFD calculation. References ---------- Modified from the following code: https://github.com/MobleyLab/off-ffcompare TFD reference: https://pubs.acs.org/doi/10.1021/ci2002318 Parameters ---------- ref_mol : OEMol query_mol : OEMol conf_id_tag : string label of the SD tag that should be the same for matching conformers in different files Returns ------- tfd : float Torsion Fingerprint Deviation between ref and query molecules """ # convert refmol to one readable by RDKit ref_rdmol = reader.rdmol_from_oemol(ref_mol) # convert querymol to one readable by RDKit que_rdmol = reader.rdmol_from_oemol(query_mol) # check if the molecules are the same # tfd requires the two molecules must be instances of the same molecule rsmiles = Chem.MolToSmiles(ref_rdmol) qsmiles = Chem.MolToSmiles(que_rdmol) if rsmiles != qsmiles: print(f"- WARNING: The reference mol \'{ref_mol.GetTitle()}\' and " f"query mol \'{query_mol.GetTitle()}\' do NOT have the same " "SMILES strings as determined by RDKit MolToSmiles. It is " "possible that they did not have matching SMILES even before " "conversion from OEMol to RDKit mol. Listing in order the " "QCArchive SMILES string, RDKit SMILES for ref mol, and " "RDKit SMILES for query mol:" f"\n {oechem.OEGetSDData(ref_mol, conf_id_tag)}" f"\n {rsmiles}\n {qsmiles}") tfd = np.nan # calculate the TFD else: try: tfd = TorsionFingerprints.GetTFDBetweenMolecules( ref_rdmol, que_rdmol) # triggered for molecules such as urea except IndexError: print( f"- Error calculating TFD on molecule '{ref_mol.GetTitle()}'." " Possibly no non-terminal rotatable bonds found.") tfd = np.nan return tfd
if pred is None: # in case of failure entry2RMSD[refEntry] = '' entry2TFD[refEntry] = '' continue predEntry = pred.GetProp('_Name') assert(refEntry == predEntry) try: rmsd = AllChem.GetBestRMS(ref, pred) except: rmsd = '' try: m = Chem.MolFromSmiles(Chem.MolToSmiles(ref)) ref = AllChem.AssignBondOrdersFromTemplate(m, ref) pred = AllChem.AssignBondOrdersFromTemplate(m, pred) tfd = TorsionFingerprints.GetTFDBetweenMolecules(ref, pred) except: tfd = '' entry2RMSD[refEntry] = rmsd entry2TFD[refEntry] = tfd # See https://baoilleach.blogspot.com/2010/11/automorphisms-isomorphisms-symmetry.html print("Entry,SMILES,RMSD,Bond error,Angle error,Torsion error,TFD,Stereo correct") for ref, pred in zip(pybel.readfile("sdf", refFileName), pybel.readfile("sdf", predFileName)): refMol = ref.OBMol predMol = pred.OBMol refEntry = refMol.GetTitle() predEntry = predMol.GetTitle() assert refEntry == predEntry