예제 #1
0
    def __init__(self, mol_config: MolConfig, max_steps=200):
        super(ConformerEnv, self).__init__()
        logging.info('initializing conformer environment')
        self.config = copy.deepcopy(mol_config)
        self.max_steps = max_steps
        self.total_reward = 0
        self.current_step = 0

        self.step_info = {}
        self.episode_info = {}

        self.mol = self.config.mol

        # set mol to have exactly one conformer
        if self.mol.GetNumConformers() != 1:
            logging.warn(
                "Input molecule to environment should have exactly one conformer, none or more than one detected."
            )
            self.mol.RemoveAllConformers()
            if Chem.EmbedMolecule(self.mol,
                                  randomSeed=self.config.seed,
                                  useRandomCoords=True) == -1:
                raise Exception(
                    'Unable to embed molecule with conformer using rdkit')
        self.conf = self.mol.GetConformer()
        nonring, ring = TorsionFingerprints.CalculateTorsionLists(self.mol)
        self.nonring = [list(atoms[0]) for atoms, ang in nonring]

        self.reset()
예제 #2
0
def butina_clustering_m(rdkit_mol, difference_matrix='tfd', threshold=0.001):
    """ Clustering conformers with RDKit's Butina algorithem """

    # calculate difference matrix
    if difference_matrix.lower() == 'tfd':
        diffmat = TorsionFingerprints.GetTFDMatrix(rdkit_mol)

    if difference_matrix.lower() == 'rms':
        diffmat = AllChem.GetConformerRMSMatrix(rdkit_mol, prealigned=False)

    # cluster conformers
    num_confs = rdkit_mol.GetNumConformers()
    clt = Butina.ClusterData(diffmat,
                             num_confs,
                             threshold,
                             isDistData=True,
                             reordering=True)

    # new conformers
    centroid_idx = [c[0] for c in clt]  # centroid indexes.

    new_rdkit_mol = copy.deepcopy(rdkit_mol)
    new_rdkit_mol.RemoveAllConformers()

    for idx in centroid_idx:
        centroid_conf = rdkit_mol.GetConformer(idx)
        new_rdkit_mol.AddConformer(centroid_conf, assignId=True)

    del rdkit_mol  # delete old mol, is this nessesary?

    return new_rdkit_mol
예제 #3
0
def TFD_for_oemols(ref_mol, query_mol):
    """
    This is the TFD_for_oemols script. 
    It makes use of RDKit's TFD calculation and the function rdmol_from_oemol.
    TFD_for_oemols takes in two OEMOLs. 
    It does not matter which mol is the ref mol and which is the querymol. 
    TFD metric is the same no matter which is the ref and which is the query.
    First, OEmols are made RDKit compatible. Then,
    TFD is computed and returned using RDKit's TorsionFingerprints
    Module. Takes one input reference mol2 and one input query mol2.
    
    Args: 
        ref_mol (oemol) An oemol that has already been read in. 
        query_mol (oemol) An oemol that has already been read in. 
    
    Returns: 
        tfd (float) The torsion fingerprint deviation between ref and query.
    """
    # converts refmol to one readable by RDKit
    rrdmol2 = rdmol_from_oemol(ref_mol)
    # converts querymol to one readable by RDKit
    qrdmol2 = rdmol_from_oemol(query_mol)
    # If there was a mistake in the conversion process, return -1
    if (Chem.MolToSmiles(qrdmol2) != Chem.MolToSmiles(rrdmol2)):
        tfd = -1
    else:
        # calculates the TFD
        try:
            tfd = TorsionFingerprints.GetTFDBetweenMolecules(rrdmol2, qrdmol2)
        except IndexError:
            tfd = 0
    return tfd
예제 #4
0
def prune_last_conformer(
        mol: Chem.Mol, tfd_thresh: float,
        energies: List[float]) -> Tuple[Chem.Mol, List[float]]:
    """Prunes the last conformer of the molecule.

    If no conformers in `mol` have a TFD (Torsional Fingerprint Deviation) with the last conformer of less than `tfd_thresh`,
    the last conformer is kept. Otherwise, the lowest energy conformer with TFD less than `tfd_thresh` is kept and all other conformers
    are discarded.

    Parameters
    ----------
    mol : RDKit Mol
        The molecule to be pruned. The conformers in the molecule should be ordered by ascending energy.
    tfd_thresh : float
        The minimum threshold for TFD between conformers.
    energies : list of float
        A list of all the energies of the conformers in `mol`.

    Returns
    -------
    mol : RDKit Mol
        The updated molecule after pruning, with conformers sorted by ascending energy.
    energies : list of float
        A list of all the energies of the conformers in `mol` after pruning and sorting by ascending energy.
    """
    if tfd_thresh < 0 or mol.GetNumConformers() <= 1:
        return mol, energies

    idx = bisect.bisect(energies[:-1], energies[-1])
    tfd = TorsionFingerprints.GetTFDBetweenConformers(
        mol,
        range(0,
              mol.GetNumConformers() - 1), [mol.GetNumConformers() - 1],
        useWeights=False)
    tfd = np.array(tfd)

    # if lower energy conformer is within threshold, drop new conf
    if not np.all(tfd[:idx] >= tfd_thresh):
        energies = energies[:-1]
        mol.RemoveConformer(mol.GetNumConformers() - 1)
        return mol, energies
    else:
        keep = list(range(0, idx))
        keep.append(mol.GetNumConformers() - 1)
        keep += [
            x for x in range(idx,
                             mol.GetNumConformers() - 1)
            if tfd[x] >= tfd_thresh
        ]

        new = Chem.Mol(mol)
        new.RemoveAllConformers()
        for i in keep:
            conf = mol.GetConformer(i)
            new.AddConformer(conf, assignId=True)

        return new, [energies[i] for i in keep]
예제 #5
0
def tfd_matrix(mol: Chem.Mol) -> np.array:
    """Calculates the TFD matrix for all conformers in a molecule.
    """
    tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=False)
    n = int(np.sqrt(len(tfd) * 2)) + 1
    idx = np.tril_indices(n, k=-1, m=n)
    matrix = np.zeros((n, n))
    matrix[idx] = tfd
    matrix += np.transpose(matrix)
    return matrix
예제 #6
0
def cluster_conformers(mol, mode="RMSD", threshold=2.0):
    if mode == "TFD":
        dmat = TorsionFingerprints.GetTFDMatrix(mol)
    else:
        dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=False)
    rms_clusters = Butina.ClusterData(dmat,
                                      mol.GetNumConformers(),
                                      threshold,
                                      isDistData=True,
                                      reordering=True)
    return rms_clusters
예제 #7
0
  def testTorsionFingerprintsColinearBonds(self):
    # test that single bonds adjacent to triple bonds are ignored
    mol = Chem.MolFromSmiles('CCC#CCC')
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol,
                                                                           ignoreColinearBonds=True)
    self.assertEqual(len(tors_list), 0)
    weights = TorsionFingerprints.CalculateTorsionWeights(mol, ignoreColinearBonds=True)
    self.assertEqual(len(weights), 0)

    # test that they are not ignored, but alternative atoms searched for
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(
      mol, ignoreColinearBonds=False)
    self.assertEqual(len(tors_list), 1)
    self.assertEqual(tors_list[0][0][0], (0, 1, 4, 5))
    weights = TorsionFingerprints.CalculateTorsionWeights(mol, ignoreColinearBonds=False)
    self.assertEqual(len(weights), 1)

    # test that single bonds adjacent to terminal triple bonds are always ignored
    mol = Chem.MolFromSmiles('C#CCC')
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol,
                                                                           ignoreColinearBonds=True)
    self.assertEqual(len(tors_list), 0)
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(
      mol, ignoreColinearBonds=False)
    self.assertEqual(len(tors_list), 0)
예제 #8
0
  def testTorsionFingerprintsAtomReordering(self):
    # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD
    refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand.pdb')
    ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1')
    mol1 = Chem.MolFromPDBFile(refFile)
    mol1 = AllChem.AssignBondOrdersFromTemplate(ref, mol1)

    refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand_reordered.pdb')
    mol2 = Chem.MolFromPDBFile(refFile)
    mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2)

    tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol1, mol2)
    self.assertEqual(tfd, 0.0)
예제 #9
0
def calc_tfd(ref_mol, query_mol):
    """
    Calculate Torsion Fingerprint Deviation between two molecular structures.
    RDKit is required for TFD calculation.

    References
    ----------
    Modified from the following code:
    https://github.com/MobleyLab/benchmarkff/03_analysis/compare_ffs.py

    TFD reference:
    https://pubs.acs.org/doi/10.1021/ci2002318

    Parameters
    ----------
    ref_mol : RDKit RDMol
    query_mol : RDKit RDMol

    Returns
    -------
    tfd : float
        Torsion Fingerprint Deviation between ref and query molecules

    """
    # check if the molecules are the same
    # tfd requires the two molecules must be instances of the same molecule
    rsmiles = Chem.MolToSmiles(ref_mol)
    qsmiles = Chem.MolToSmiles(query_mol)
    if rsmiles != qsmiles:
        print(f"- WARNING: The reference mol {ref_mol.GetProp('_Name')} and "
              f"query mol {query_mol.GetProp('_Name')} do NOT have the same "
              f"SMILES strings as determined by RDKit MolToSmiles. "
              f"\n {rsmiles}\n {qsmiles}")
        tfd = np.nan

    # calculate the TFD
    else:
        try:
            tfd = TorsionFingerprints.GetTFDBetweenMolecules(
                ref_mol, query_mol)
        # triggered for molecules such as urea
        except IndexError:
            print(
                f"- Error calculating TFD on molecule {ref_mol.GetProp('_Name')}."
                " Possibly no non-terminal rotatable bonds found.")
            tfd = np.nan

    return tfd
예제 #10
0
def rdkit_tfd(mol):

    kernel = TorsionFingerprints.GetTFDMatrix(mol)

    return kernel
예제 #11
0
for mol in mols:
    if mol != None:
        mol = Chem.AddHs(mol)
        conf = AllChem.EmbedMultipleConfs(mol,
                                          numConfs=int(N),
                                          pruneRmsThresh=float(RMS),
                                          useExpTorsionAnglePrefs=True,
                                          useBasicKnowledge=True,
                                          numThreads=int(nbthread))
        if len(conf) > 0:
            Chem.rdMolAlign.AlignMolConformers(mol)
            AllChem.UFFOptimizeMoleculeConfs(mol, numThreads=int(nbthread))
            ## Here new code to discard identical conformers around an axis of symmetry (not supported by pruneRmsThresh in the previous fct)
            matrix = TorsionFingerprints.GetTFDMatrix(mol,
                                                      useWeights=False,
                                                      maxDev='equal',
                                                      symmRadius=2,
                                                      ignoreColinearBonds=True)
            conf_clusters = Butina.ClusterData(matrix, len(conf), cutoff, True)
            confnb = 1
            for cluster in conf_clusters:
                writer = Chem.SDWriter(output_folder + "/" +
                                       mol.GetProp("_Name") + "_conf_" +
                                       str(confnb) + ".sdf")
                writer.write(mol, confId=cluster[0])  # output only centroid
                writer.close()
                confnb += 1
        else:
            # not able to make conformers
            print("Could not generate any conformers for %s" %
                  (mol.GetProp("_Name")))
예제 #12
0
 def testGithub4720(self):
     # exceptions with highly-coordinated atoms
     mol = Chem.MolFromSmiles('S(F)(F)(F)(F)(Cl)c1ccccc1')
     tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(
         mol)
     self.assertEqual(len(tors_list), 1)
예제 #13
0
 def calculate(self, mol):
     yield tuple(
         Torsion(*mol.get_atoms(atoms[0])) for atoms, _ in
         (TorsionFingerprints.CalculateTorsionLists(mol.to_rdkit_mol())[0]))
예제 #14
0
def calc_tfd(ref_mol, query_mol, conf_id_tag):
    """
    Calculate Torsion Fingerprint Deviation between two molecular structures.
    RDKit is required for TFD calculation.

    References
    ----------
    Modified from the following code:
    https://github.com/MobleyLab/off-ffcompare

    TFD reference:
    https://pubs.acs.org/doi/10.1021/ci2002318

    Parameters
    ----------
    ref_mol : OEMol
    query_mol : OEMol
    conf_id_tag : string
        label of the SD tag that should be the same for matching conformers
        in different files

    Returns
    -------
    tfd : float
        Torsion Fingerprint Deviation between ref and query molecules

    """
    # convert refmol to one readable by RDKit
    ref_rdmol = reader.rdmol_from_oemol(ref_mol)

    # convert querymol to one readable by RDKit
    que_rdmol = reader.rdmol_from_oemol(query_mol)

    # check if the molecules are the same
    # tfd requires the two molecules must be instances of the same molecule
    rsmiles = Chem.MolToSmiles(ref_rdmol)
    qsmiles = Chem.MolToSmiles(que_rdmol)
    if rsmiles != qsmiles:
        print(f"- WARNING: The reference mol \'{ref_mol.GetTitle()}\' and "
              f"query mol \'{query_mol.GetTitle()}\' do NOT have the same "
              "SMILES strings as determined by RDKit MolToSmiles. It is "
              "possible that they did not have matching SMILES even before "
              "conversion from OEMol to RDKit mol. Listing in order the "
              "QCArchive SMILES string, RDKit SMILES for ref mol, and "
              "RDKit SMILES for query mol:"
              f"\n {oechem.OEGetSDData(ref_mol, conf_id_tag)}"
              f"\n {rsmiles}\n {qsmiles}")
        tfd = np.nan

    # calculate the TFD
    else:
        try:
            tfd = TorsionFingerprints.GetTFDBetweenMolecules(
                ref_rdmol, que_rdmol)
        # triggered for molecules such as urea
        except IndexError:
            print(
                f"- Error calculating TFD on molecule '{ref_mol.GetTitle()}'."
                " Possibly no non-terminal rotatable bonds found.")
            tfd = np.nan

    return tfd
def get_tfd(mol):
    return TorsionFingerprints.GetTFDBetweenConformers(mol, [0], [1])
예제 #16
0
def get_tfd(source_1, source_2, file_in, seed):
    
    confab_tfd_uniform = []
    confab_tfd_EI = []
    confab_tfd_LCB = []    
    
    bo_tfd_confab = []
    bo_tfd_uniform = []
    bo_tfd_EI = []
    bo_tfd_LCB = []

    bo_check_EI = []
    bo_check_LCB = []
    confab_check_EI = []
    confab_check_LCB = []

    bo_target = []
    confab_target = []

    for i in range(len(file_in)):
        print(file_in.iloc[i,0])
        if file_in.iloc[i,2] == "Yes":
            if "200" in str(file_in.iloc[i,1]):         
                basenames = file_in.iloc[i,0] + '/' + file_in.iloc[i,1] + '.sdf'
                inputs = os.path.join(source_2, basenames)
                ref_mol = Chem.SDMolSupplier(inputs)

            else:
                basenames = file_in.iloc[i,0] +'/' + file_in.iloc[i,1] + '.sdf'
                inputs  = os.path.join(source_1, basenames)
                ref_mol = Chem.SDMolSupplier(inputs)     

            bo_target.append(file_in.iloc[i,0])

            # read EI_bayes
            EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) 
            EI_input = os.path.join(source_1, EI_bases)
            EI_mol = Chem.SDMolSupplier(EI_input)

            # read LCB_bayes
            LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) 
            LCB_input = os.path.join(source_1, LCB_bases)
            LCB_mol = Chem.SDMolSupplier(LCB_input)

            # read uniform
            uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) 
            uniform_input = os.path.join(source_1, uniform_bases)
            uniform_mol = Chem.SDMolSupplier(uniform_input)

            # read confab
            confab_bases = file_in.iloc[i,0] + "/confab.sdf" 
            confab_input = os.path.join(source_1, confab_bases)
            confab_mol = Chem.SDMolSupplier(confab_input)

            
            bo_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0]))
            bo_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0]))
            bo_tfd_confab.append(TFP.GetTFDBetweenMolecules(confab_mol[0], ref_mol[0]))
            bo_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0]))
            
	
        else:
            basenames = file_in.iloc[i,0] + '/confab.sdf'
            inputs = os.path.join(source_1, basenames)
            ref_mol = Chem.SDMolSupplier(inputs)
            confab_target.append(file_in.iloc[i,0])

            # read EI_bayes
            EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) 
            EI_input = os.path.join(source_1, EI_bases)
            EI_mol = Chem.SDMolSupplier(EI_input)

            # read LCB_bayes
            LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) 
            LCB_input = os.path.join(source_1, LCB_bases)
            LCB_mol = Chem.SDMolSupplier(LCB_input)

            # read uniform
            uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) 
            uniform_input = os.path.join(source_1, uniform_bases)
            uniform_mol = Chem.SDMolSupplier(uniform_input)


            confab_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0]))
            confab_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0]))
            confab_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0]))

    bo_data = pd.DataFrame({"target": bo_target, "Uniform": bo_tfd_uniform, "EI": bo_tfd_EI, "LCB": bo_tfd_LCB, "Confab": bo_tfd_confab, "N_rot": 5}, columns = ["target", "Uniform", "EI", "LCB", "Confab", "N_rot"])
    confab_data = pd.DataFrame({'target': confab_target, "Uniform": confab_tfd_uniform, "EI": confab_tfd_EI, "LCB": confab_tfd_LCB, "N_rot":5}, columns = ["target","Uniform","EI","LCB", "N_rot"])   
    return  confab_data, bo_data
예제 #17
0
  def testTorsionFingerprints(self):
    # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD
    refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1DWD_ligand.pdb')
    ref = Chem.MolFromSmiles(
      'NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1')
    mol = Chem.MolFromPDBFile(refFile)
    mol = AllChem.AssignBondOrdersFromTemplate(ref, mol)

    # the torsion lists
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol)
    self.assertEqual(len(tors_list), 11)
    self.assertEqual(len(tors_list_rings), 4)
    self.assertAlmostEqual(tors_list[-1][1], 180.0, 4)
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, maxDev='spec')
    self.assertAlmostEqual(tors_list[-1][1], 90.0, 4)
    self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionLists, mol, maxDev='test')
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, symmRadius=0)
    self.assertEqual(len(tors_list[0][0]), 2)

    # the weights
    weights = TorsionFingerprints.CalculateTorsionWeights(mol)
    self.assertAlmostEqual(weights[4], 1.0)
    self.assertEqual(len(weights), len(tors_list + tors_list_rings))
    weights = TorsionFingerprints.CalculateTorsionWeights(mol, 15, 14)
    self.assertAlmostEqual(weights[3], 1.0)
    self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionWeights, mol, 15, 3)

    # the torsion angles
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol)
    torsions = TorsionFingerprints.CalculateTorsionAngles(mol, tors_list, tors_list_rings)
    self.assertEqual(len(weights), len(torsions))
    self.assertAlmostEqual(torsions[2][0][0], 232.5346, 4)

    # the torsion fingerprint deviation
    tfd = TorsionFingerprints.CalculateTFD(torsions, torsions)
    self.assertAlmostEqual(tfd, 0.0)
    refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1PPC_ligand.pdb')
    mol2 = Chem.MolFromPDBFile(refFile)
    mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2)
    torsions2 = TorsionFingerprints.CalculateTorsionAngles(mol2, tors_list, tors_list_rings)
    weights = TorsionFingerprints.CalculateTorsionWeights(mol)
    tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2, weights=weights)
    self.assertAlmostEqual(tfd, 0.0691, 4)
    tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2)
    self.assertAlmostEqual(tfd, 0.1115, 4)

    # the wrapper functions
    tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, mol2)
    self.assertAlmostEqual(tfd, 0.0691, 4)

    mol.AddConformer(mol2.GetConformer(), assignId=True)
    mol.AddConformer(mol2.GetConformer(), assignId=True)
    tfd = TorsionFingerprints.GetTFDBetweenConformers(mol, confIds1=[0], confIds2=[1, 2])
    self.assertEqual(len(tfd), 2)
    self.assertAlmostEqual(tfd[0], 0.0691, 4)

    tfdmat = TorsionFingerprints.GetTFDMatrix(mol)
    self.assertEqual(len(tfdmat), 3)
예제 #18
0
    if pred is None:  # in case of failure
        entry2RMSD[refEntry] = ''
        entry2TFD[refEntry] = ''
        continue

    predEntry = pred.GetProp('_Name')
    assert(refEntry == predEntry)
    try:
        rmsd = AllChem.GetBestRMS(ref, pred)
    except:
        rmsd = ''
    try:
        m = Chem.MolFromSmiles(Chem.MolToSmiles(ref))
        ref = AllChem.AssignBondOrdersFromTemplate(m, ref)
        pred = AllChem.AssignBondOrdersFromTemplate(m, pred)
        tfd = TorsionFingerprints.GetTFDBetweenMolecules(ref, pred)
    except:
        tfd = ''
    entry2RMSD[refEntry] = rmsd
    entry2TFD[refEntry] = tfd

# See https://baoilleach.blogspot.com/2010/11/automorphisms-isomorphisms-symmetry.html
print("Entry,SMILES,RMSD,Bond error,Angle error,Torsion error,TFD,Stereo correct")
for ref, pred in zip(pybel.readfile("sdf", refFileName),
                     pybel.readfile("sdf", predFileName)):
    refMol = ref.OBMol
    predMol = pred.OBMol

    refEntry = refMol.GetTitle()
    predEntry = predMol.GetTitle()
    assert refEntry == predEntry