Beispiel #1
0
 def test18TautomerEnumeratorResultIter(self):
     smi = "Cc1nnc(NC(=O)N2CCN(Cc3ccc(F)cc3)C(=O)C2)s1"
     mol = Chem.MolFromSmiles(smi)
     self.assertIsNotNone(mol)
     te = rdMolStandardize.TautomerEnumerator()
     res = te.Enumerate(mol)
     res_it = iter(res)
     i = 0
     while 1:
         try:
             t = next(res_it)
         except StopIteration:
             break
         self.assertEqual(Chem.MolToSmiles(t), Chem.MolToSmiles(res[i]))
         i += 1
     self.assertEqual(i, len(res))
     res_it = iter(res)
     i = -len(res)
     while 1:
         try:
             t = next(res_it)
         except StopIteration:
             break
         self.assertEqual(Chem.MolToSmiles(t), Chem.MolToSmiles(res[i]))
         i += 1
     self.assertEqual(i, 0)
 def testBasic(self):
     m = Chem.MolFromSmiles('Oc1c(cccc3)c3nc2ccncc12')
     enumerator = rdMolStandardize.TautomerEnumerator()
     canon = enumerator.Canonicalize(m)
     reord = MolStandardize.ReorderTautomers(m)[0]
     canonSmile = Chem.MolToSmiles(canon)
     reordSmile = Chem.MolToSmiles(reord)
     self.assertEquals(canonSmile, reordSmile)
    def test13Tautomers(self):
        enumerator = rdMolStandardize.TautomerEnumerator()
        m = Chem.MolFromSmiles("C1(=CCCCC1)O")
        ctaut = enumerator.Canonicalize(m)
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")

        params = rdMolStandardize.CleanupParameters()
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        m = Chem.MolFromSmiles("C1(=CCCCC1)O")
        ctaut = enumerator.Canonicalize(m)
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")

        tauts = enumerator.Enumerate(m)
        self.assertEqual(len(tauts), 2)
        ctauts = list(sorted(Chem.MolToSmiles(x) for x in tauts))
        self.assertEqual(ctauts, ['O=C1CCCCC1', 'OC1=CCCCC1'])

        def scorefunc1(mol):
            ' stupid tautomer scoring function '
            p = Chem.MolFromSmarts('[OH]')
            return len(mol.GetSubstructMatches(p))

        def scorefunc2(mol):
            ' stupid tautomer scoring function '
            p = Chem.MolFromSmarts('O=C')
            return len(mol.GetSubstructMatches(p))

        m = Chem.MolFromSmiles("C1(=CCCCC1)O")
        ctaut = enumerator.Canonicalize(m, scorefunc1)
        self.assertEqual(Chem.MolToSmiles(ctaut), "OC1=CCCCC1")
        ctaut = enumerator.Canonicalize(m, scorefunc2)
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")
        # make sure lambdas work
        ctaut = enumerator.Canonicalize(
            m, lambda x: len(x.GetSubstructMatches(Chem.MolFromSmarts('C=O'))))
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")

        # make sure we behave if we return something bogus from the scoring function
        with self.assertRaises(TypeError):
            ctaut = enumerator.Canonicalize(m, lambda x: 'fail')

        self.assertEqual(
            enumerator.ScoreTautomer(Chem.MolFromSmiles('N=c1[nH]cccc1')), 99)
        self.assertEqual(
            enumerator.ScoreTautomer(Chem.MolFromSmiles('Nc1ncccc1')), 100)
Beispiel #4
0
def enumerate_tautomers(mol: Chem.rdchem.Mol, n_variants: int = 20):
    """Enumerate the possible tautomers of the current molecule.

    Original source: the `openff-toolkit` lib.

    Args:
        mol: The molecule whose state we should enumerate.
        n_variants: The maximum amount of molecules that should be returned.
    """
    # safety first
    mol = copy_mol(mol)

    enumerator = rdMolStandardize.TautomerEnumerator()
    enumerator.SetMaxTautomers(n_variants)
    tautomers = enumerator.Enumerate(mol)
    return list(tautomers)
Beispiel #5
0
    def __init__(self,
                 max_num_atoms,
                 max_num_tautomers,
                 include_stereoinfo,
                 verbosity=0):

        self.max_num_atoms = max_num_atoms
        self.max_num_tautomers = max_num_tautomers
        self.include_stereoinfo = include_stereoinfo
        self.verbosity = verbosity

        ## Load new tautomer enumarator/canonicalizer
        self.tautomerizer = rdMolStandardize.TautomerEnumerator()
        self.tautomerizer.SetMaxTautomers(self.max_num_tautomers)
        self.tautomerizer.SetRemoveSp3Stereo(
            False)  # Keep stereo information of keto/enol tautomerization
Beispiel #6
0
    def test14TautomerDetails(self):
        enumerator = rdMolStandardize.TautomerEnumerator()
        m = Chem.MolFromSmiles("c1ccccc1CN=c1[nH]cccc1")
        taut_res = enumerator.Enumerate(m)
        self.assertEqual(len(taut_res.tautomers), 2)
        self.assertEqual(taut_res.modifiedAtoms, (7, 9))
        self.assertEqual(len(taut_res.modifiedBonds), 7)
        self.assertEqual(taut_res.modifiedBonds, (7, 8, 9, 10, 11, 12, 14))

        taut_res = enumerator.Enumerate(m)
        self.assertEqual(len(taut_res.tautomers), 2)
        self.assertEqual(taut_res.modifiedAtoms, (7, 9))

        taut_res = enumerator.Enumerate(m)
        self.assertEqual(len(taut_res.tautomers), 2)
        self.assertEqual(len(taut_res.modifiedBonds), 7)
        self.assertEqual(taut_res.modifiedBonds, (7, 8, 9, 10, 11, 12, 14))
Beispiel #7
0
def ReorderTautomers(molecule):
    """Returns the list of the molecule's tautomers
    so that the canonical one as determined by the canonical
    scoring system in TautomerCanonicalizer appears first.

    :param molecule: An RDKit Molecule object.
    :return: A list of Molecule objects.
    """
    enumerator = rdMolStandardize.TautomerEnumerator()
    canon = enumerator.Canonicalize(molecule)
    csmi = Chem.MolToSmiles(canon)
    res = [canon]
    tauts = enumerator.Enumerate(molecule)
    smis = [Chem.MolToSmiles(x) for x in tauts]
    stpl = sorted((x, y) for x, y in zip(smis, tauts) if x != csmi)
    res += [y for x, y in stpl]
    return res
def reorderTautomers(rdkit_mol):
    # Find canonical taut
    enumerator = rdMolStandardize.TautomerEnumerator()
    canon = enumerator.Canonicalize(rdkit_mol)
    csmi = Chem.MolToSmiles(canon)
    res = [canon]

    # Find all unique tauts
    tauts = enumerator.Enumerate(rdkit_mol)
    smis = [Chem.MolToSmiles(x) for x in tauts]
    smis, uniqueIdx = np.unique(smis, return_index=True)
    tauts = [tauts[idx] for idx in uniqueIdx.tolist()]

    # Sort according to highest taut score
    scores = [enumerator.ScoreTautomer(x) for x in tauts]
    stpl = sorted(list(zip(smis, tauts, scores)),
                  key=itemgetter(2),
                  reverse=True)
    res += [y for x, y, z in stpl if x != csmi]
    return res
Beispiel #9
0
    def test14TautomerDetails(self):
        enumerator = rdMolStandardize.TautomerEnumerator()
        m = Chem.MolFromSmiles("c1ccccc1CN=c1[nH]cccc1")
        modatoms = []
        modbonds = []
        tauts = enumerator.Enumerate(m,
                                     modifiedAtoms=modatoms,
                                     modifiedBonds=modbonds)
        self.assertEqual(len(tauts), 2)
        self.assertEqual(modatoms, [7, 9])
        self.assertEqual(len(modbonds), 7)
        self.assertEqual(modbonds, [7, 8, 9, 10, 11, 12, 14])

        modatoms = []
        tauts = enumerator.Enumerate(m, modifiedAtoms=modatoms)
        self.assertEqual(len(tauts), 2)
        self.assertEqual(modatoms, [7, 9])

        modbonds = []
        tauts = enumerator.Enumerate(m, modifiedBonds=modbonds)
        self.assertEqual(len(tauts), 2)
        self.assertEqual(len(modbonds), 7)
        self.assertEqual(modbonds, [7, 8, 9, 10, 11, 12, 14])
Beispiel #10
0
    def test17PickCanonicalCIPChangeOnChiralCenter(self):
        def get_canonical_taut(res):
            best_idx = max([
                (rdMolStandardize.TautomerEnumerator.ScoreTautomer(t), i)
                for i, t in enumerate(res.tautomers)
            ])[1]
            return res.tautomers[best_idx]

        smi = "CC\\C=C(/O)[C@@H](C)C(C)=O"
        mol = Chem.MolFromSmiles(smi)
        self.assertIsNotNone(mol)
        self.assertEqual(
            mol.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(mol.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")

        # here the chirality disappears as the chiral center is itself involved in tautomerism
        te = rdMolStandardize.TautomerEnumerator()
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_UNSPECIFIED)
        self.assertFalse(can_taut.GetAtomWithIdx(5).HasProp("_CIPCode"))
        self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)C(C)C(C)=O")

        # here the chirality stays even if the chiral center is itself involved in tautomerism
        # because of the tautomerRemoveSp3Stereo parameter being set to false
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)[C@@H](C)C(C)=O")

        # here the chirality disappears as the chiral center is itself involved in tautomerism
        # the reassignStereo setting has no influence
        te = rdMolStandardize.TautomerEnumerator()
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_UNSPECIFIED)
        self.assertFalse(best_taut.GetAtomWithIdx(5).HasProp("_CIPCode"))
        self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)C(C)C(C)=O")

        # here the chirality disappears as the chiral center is itself involved in tautomerism
        # the reassignStereo setting has no influence
        params = rdMolStandardize.CleanupParameters()
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_UNSPECIFIED)
        self.assertFalse(best_taut.GetAtomWithIdx(5).HasProp("_CIPCode"))
        self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)C(C)C(C)=O")

        # here the chirality stays even if the chiral center is itself involved in tautomerism
        # because of the tautomerRemoveSp3Stereo parameter being set to false
        # as reassignStereo by default is true, the CIP code has  been recomputed
        # and therefore it is now S (correct)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@@H](C)C(C)=O")

        # here the chirality stays even if the chiral center is itself involved in tautomerism
        # because of the tautomerRemoveSp3Stereo parameter being set to false
        # as reassignStereo is false, the CIP code has not been recomputed
        # and therefore it is still R (incorrect)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@@H](C)C(C)=O")

        smi = "CC\\C=C(/O)[C@@](CC)(C)C(C)=O"
        mol = Chem.MolFromSmiles(smi)
        self.assertIsNotNone(mol)
        self.assertEqual(mol.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(
            mol.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)

        # here the chirality stays no matter how tautomerRemoveSp3Stereo
        # is set as the chiral center is not involved in tautomerism
        te = rdMolStandardize.TautomerEnumerator()
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(can_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(can_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # as reassignStereo by default is true, the CIP code has been recomputed
        # and therefore it is now R (correct)
        te = rdMolStandardize.TautomerEnumerator()
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # as reassignStereo is false, the CIP code has not been recomputed
        # and therefore it is still S (incorrect)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # as reassignStereo by default is true, the CIP code has  been recomputed
        # and therefore it is now R (correct)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # here the chirality stays even if the tautomerRemoveSp3Stereo parameter
        # is set to false as the chiral center is not involved in tautomerism
        # as reassignStereo is false, the CIP code has not been recomputed
        # and therefore it is still S (incorrect)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")
Beispiel #11
0
    def test16EnumeratorCallback(self):
        class MyTautomerEnumeratorCallback(
                rdMolStandardize.TautomerEnumeratorCallback):
            def __init__(self, parent, timeout_ms):
                super().__init__()
                self._parent = parent
                self._timeout = timedelta(milliseconds=timeout_ms)
                self._start_time = datetime.now()

            def __call__(self, mol, res):
                self._parent.assertTrue(isinstance(mol, Chem.Mol))
                self._parent.assertTrue(
                    isinstance(res, rdMolStandardize.TautomerEnumeratorResult))
                return (datetime.now() - self._start_time < self._timeout)

        class MyBrokenCallback(rdMolStandardize.TautomerEnumeratorCallback):
            pass

        class MyBrokenCallback2(rdMolStandardize.TautomerEnumeratorCallback):
            __call__ = 1

        # Test a structure with hundreds of tautomers.
        smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O"
        m68 = Chem.MolFromSmiles(smi68)

        params = rdMolStandardize.CleanupParameters()
        params.maxTransforms = 10000
        params.maxTautomers = 10000
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        enumerator.SetCallback(MyTautomerEnumeratorCallback(self, 50.0))
        res68 = enumerator.Enumerate(m68)
        # either the enumeration was canceled due to timeout
        # or it has completed very quickly
        hasReachedTimeout = (
            len(res68.tautomers) < 375 and res68.status
            == rdMolStandardize.TautomerEnumeratorStatus.Canceled)
        hasCompleted = (len(res68.tautomers) == 375 and res68.status
                        == rdMolStandardize.TautomerEnumeratorStatus.Completed)
        if hasReachedTimeout:
            print("Enumeration was canceled due to timeout (50 ms)",
                  file=sys.stderr)
        if hasCompleted:
            print("Enumeration has completed", file=sys.stderr)
        self.assertTrue(hasReachedTimeout or hasCompleted)
        self.assertTrue(hasReachedTimeout ^ hasCompleted)

        enumerator = rdMolStandardize.TautomerEnumerator(params)
        enumerator.SetCallback(MyTautomerEnumeratorCallback(self, 10000.0))
        res68 = enumerator.Enumerate(m68)
        # either the enumeration completed
        # or it ran very slowly and was canceled due to timeout
        hasReachedTimeout = (
            len(res68.tautomers) < 375 and res68.status
            == rdMolStandardize.TautomerEnumeratorStatus.Canceled)
        hasCompleted = (len(res68.tautomers) == 375 and res68.status
                        == rdMolStandardize.TautomerEnumeratorStatus.Completed)
        if hasReachedTimeout:
            print("Enumeration was canceled due to timeout (10 s)",
                  file=sys.stderr)
        if hasCompleted:
            print("Enumeration has completed", file=sys.stderr)
        self.assertTrue(hasReachedTimeout or hasCompleted)
        self.assertTrue(hasReachedTimeout ^ hasCompleted)

        enumerator = rdMolStandardize.TautomerEnumerator(params)
        with self.assertRaises(AttributeError):
            enumerator.SetCallback(MyBrokenCallback())
        with self.assertRaises(AttributeError):
            enumerator.SetCallback(MyBrokenCallback2())
Beispiel #12
0
    def test15EnumeratorParams(self):
        # Test a structure with hundreds of tautomers.
        smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O"
        m68 = Chem.MolFromSmiles(smi68)

        enumerator = rdMolStandardize.TautomerEnumerator()
        res68 = enumerator.Enumerate(m68)
        self.assertEqual(len(res68), 292)
        self.assertEqual(len(res68.tautomers), len(res68))
        self.assertEqual(
            res68.status,
            rdMolStandardize.TautomerEnumeratorStatus.MaxTransformsReached)

        params = rdMolStandardize.CleanupParameters()
        params.maxTautomers = 50
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res68 = enumerator.Enumerate(m68)
        self.assertEqual(len(res68), 50)
        self.assertEqual(
            res68.status,
            rdMolStandardize.TautomerEnumeratorStatus.MaxTautomersReached)

        sAlaSmi = "C[C@H](N)C(=O)O"
        sAla = Chem.MolFromSmiles(sAlaSmi)
        # test remove (S)-Ala stereochemistry
        self.assertEqual(
            sAla.GetAtomWithIdx(1).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
        self.assertEqual(sAla.GetAtomWithIdx(1).GetProp("_CIPCode"), "S")
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = True
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(sAla)
        for taut in res:
            self.assertEqual(
                taut.GetAtomWithIdx(1).GetChiralTag(),
                Chem.ChiralType.CHI_UNSPECIFIED)
            self.assertFalse(taut.GetAtomWithIdx(1).HasProp("_CIPCode"))
        for taut in res.tautomers:
            self.assertEqual(
                taut.GetAtomWithIdx(1).GetChiralTag(),
                Chem.ChiralType.CHI_UNSPECIFIED)
            self.assertFalse(taut.GetAtomWithIdx(1).HasProp("_CIPCode"))
        for i, taut in enumerate(res):
            self.assertEqual(Chem.MolToSmiles(taut),
                             Chem.MolToSmiles(res.tautomers[i]))
        self.assertEqual(len(res), len(res.smiles))
        self.assertEqual(len(res), len(res.tautomers))
        self.assertEqual(len(res), len(res()))
        self.assertEqual(len(res), len(res.smilesTautomerMap))
        for i, taut in enumerate(res.tautomers):
            self.assertEqual(Chem.MolToSmiles(taut), Chem.MolToSmiles(res[i]))
            self.assertEqual(Chem.MolToSmiles(taut), res.smiles[i])
            self.assertEqual(
                Chem.MolToSmiles(taut),
                Chem.MolToSmiles(res.smilesTautomerMap.values()[i].tautomer))
        for i, k in enumerate(res.smilesTautomerMap.keys()):
            self.assertEqual(k, res.smiles[i])
        for i, v in enumerate(res.smilesTautomerMap.values()):
            self.assertEqual(Chem.MolToSmiles(v.tautomer),
                             Chem.MolToSmiles(res[i]))
        for i, (k, v) in enumerate(res.smilesTautomerMap.items()):
            self.assertEqual(k, res.smiles[i])
            self.assertEqual(Chem.MolToSmiles(v.tautomer),
                             Chem.MolToSmiles(res[i]))
        for i, smiles in enumerate(res.smiles):
            self.assertEqual(smiles, Chem.MolToSmiles(res[i]))
            self.assertEqual(smiles, res.smilesTautomerMap.keys()[i])
        self.assertEqual(Chem.MolToSmiles(res.tautomers[-1]),
                         Chem.MolToSmiles(res[-1]))
        self.assertEqual(Chem.MolToSmiles(res[-1]),
                         Chem.MolToSmiles(res[len(res) - 1]))
        self.assertEqual(Chem.MolToSmiles(res.tautomers[-1]),
                         Chem.MolToSmiles(res.tautomers[len(res) - 1]))
        with self.assertRaises(IndexError):
            res[len(res)]
        with self.assertRaises(IndexError):
            res[-len(res) - 1]
        with self.assertRaises(IndexError):
            res.tautomers[len(res)]
        with self.assertRaises(IndexError):
            res.tautomers[-len(res.tautomers) - 1]

        # test retain (S)-Ala stereochemistry
        self.assertEqual(
            sAla.GetAtomWithIdx(1).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
        self.assertEqual(sAla.GetAtomWithIdx(1).GetProp("_CIPCode"), "S")
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(sAla)
        for taut in res:
            tautAtom = taut.GetAtomWithIdx(1)
            if (tautAtom.GetHybridization() == Chem.HybridizationType.SP3):
                self.assertEqual(tautAtom.GetChiralTag(),
                                 Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
                self.assertTrue(tautAtom.HasProp("_CIPCode"))
                self.assertEqual(tautAtom.GetProp("_CIPCode"), "S")
            else:
                self.assertFalse(tautAtom.HasProp("_CIPCode"))
                self.assertEqual(tautAtom.GetChiralTag(),
                                 Chem.ChiralType.CHI_UNSPECIFIED)

        eEnolSmi = "C/C=C/O"
        eEnol = Chem.MolFromSmiles(eEnolSmi)
        self.assertEqual(
            eEnol.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOE)
        # test remove enol E stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = True
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(eEnol)
        for taut in res.tautomers:
            self.assertEqual(
                taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREONONE)
        # test retain enol E stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = False
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(eEnol)
        for taut in res.tautomers:
            if (taut.GetBondWithIdx(1).GetBondType() == Chem.BondType.DOUBLE):
                self.assertEqual(
                    taut.GetBondWithIdx(1).GetStereo(),
                    Chem.BondStereo.STEREOE)

        zEnolSmi = "C/C=C\\O"
        zEnol = Chem.MolFromSmiles(zEnolSmi)
        self.assertEqual(
            zEnol.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOZ)
        # test remove enol Z stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = True
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(zEnol)
        for taut in res:
            self.assertEqual(
                taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREONONE)
        # test retain enol Z stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = False
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(zEnol)
        for taut in res:
            if (taut.GetBondWithIdx(1).GetBondType() == Chem.BondType.DOUBLE):
                self.assertEqual(
                    taut.GetBondWithIdx(1).GetStereo(),
                    Chem.BondStereo.STEREOZ)
    def test21UpdateFromJSON(self):
        params = rdMolStandardize.CleanupParameters()
        # note: these actual parameters aren't useful... they are for testing
        rdMolStandardize.UpdateParamsFromJSON(
            params, """{
    "normalizationData":[
      {"name":"silly 1","smarts":"[Cl:1]>>[F:1]"},
      {"name":"silly 2","smarts":"[Br:1]>>[F:1]"}
    ],
    "acidbaseData":[
      {"name":"-CO2H","acid":"C(=O)[OH]","base":"C(=O)[O-]"},
      {"name":"phenol","acid":"c[OH]","base":"c[O-]"}
    ],
    "fragmentData":[
      {"name":"hydrogen", "smarts":"[H]"}, 
      {"name":"fluorine", "smarts":"[F]"}, 
      {"name":"chlorine", "smarts":"[Cl]"}
    ],
    "tautomerTransformData":[
      {"name":"1,3 (thio)keto/enol f","smarts":"[CX4!H0]-[C]=[O,S,Se,Te;X1]","bonds":"","charges":""},
      {"name":"1,3 (thio)keto/enol r","smarts":"[O,S,Se,Te;X2!H0]-[C]=[C]"}
    ]}""")

        m = Chem.MolFromSmiles("CCC=O")
        te = rdMolStandardize.TautomerEnumerator(params)
        tauts = [Chem.MolToSmiles(x) for x in te.Enumerate(m)]
        self.assertEqual(tauts, ["CC=CO", "CCC=O"])
        self.assertEqual(
            Chem.MolToSmiles(rdMolStandardize.CanonicalTautomer(m, params)),
            "CCC=O")
        # now with defaults
        te = rdMolStandardize.TautomerEnumerator()
        tauts = [Chem.MolToSmiles(x) for x in te.Enumerate(m)]
        self.assertEqual(tauts, ["CC=CO", "CCC=O"])
        self.assertEqual(
            Chem.MolToSmiles(rdMolStandardize.CanonicalTautomer(m)), "CCC=O")

        m = Chem.MolFromSmiles('ClCCCBr')
        nm = rdMolStandardize.Normalize(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "FCCCF")
        # now with defaults
        nm = rdMolStandardize.Normalize(m)
        self.assertEqual(Chem.MolToSmiles(nm), "ClCCCBr")

        m = Chem.MolFromSmiles('c1cc([O-])cc(C(=O)O)c1')
        nm = rdMolStandardize.Reionize(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "O=C([O-])c1cccc(O)c1")
        # now with defaults
        nm = rdMolStandardize.Reionize(m)
        self.assertEqual(Chem.MolToSmiles(nm), "O=C([O-])c1cccc(O)c1")

        m = Chem.MolFromSmiles('C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O')
        nm = rdMolStandardize.Reionize(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "O=S([O-])c1ccc(S(=O)(=O)O)cc1")
        # now with defaults
        nm = rdMolStandardize.Reionize(m)
        self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")

        m = Chem.MolFromSmiles('[F-].[Cl-].[Br-].CC')
        nm = rdMolStandardize.RemoveFragments(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "CC.[Br-]")
        # now with defaults
        nm = rdMolStandardize.RemoveFragments(m)
        self.assertEqual(Chem.MolToSmiles(nm), "CC")
 def testLength(self):
     m = Chem.MolFromSmiles('Oc1c(cccc3)c3nc2ccncc12')
     enumerator = rdMolStandardize.TautomerEnumerator()
     tauts = enumerator.Enumerate(m)
     reordtauts = MolStandardize.ReorderTautomers(m)
     self.assertEquals(len(reordtauts), len(tauts))
Beispiel #15
0
if __name__ == '__main__':
    import argparse
    # argparse.ArgumentParser(prog=None, usage=None, description=None, epilog=None, parents=[], formatter_class=argparse.HelpFormatter, prefix_chars='-', fromfile_prefix_chars=None, argument_default=None, conflict_handler='error', add_help=True, allow_abbrev=True, exit_on_error=True)
    parser = argparse.ArgumentParser(description='')
    # parser.add_argument(name or flags...[, action][, nargs][, const][, default][, type][, choices][, required][, help][, metavar][, dest])
    parser.add_argument('-m', '--mol')
    parser.add_argument('-s', '--smiles')
    parser.add_argument('-t',
                        '--tautomers',
                        help='Generate all tautomers',
                        action='store_true')
    args = parser.parse_args()

    if args.mol is not None:
        m = Chem.MolFromMolFile(args.mol)
        outname = os.path.splitext(args.mol)[0] + ".sdf"
    if args.smiles is not None:
        m = Chem.MolFromSmiles(args.smiles)
        outname = args.smiles + ".sdf"
    if args.tautomers:
        enumerator = rdMolStandardize.TautomerEnumerator()
        tauts = enumerator.Enumerate(m)
        w = Chem.SDWriter(outname)
        for i, taut in enumerate(tauts):
            taut = fixmol(taut)
            w.write(taut)
        sys.exit(0)
    m = fixmol(m)
    w = Chem.SDWriter(outname)
    w.write(m)
Beispiel #16
0
def structure_standardization(smi: str) -> str:
    """
    Standardization function to clean up smiles with RDKit. First, the input smiles is converted into a mol object.
    Not-readable SMILES are written to the log file. The molecule size is checked by the number of atoms (non-hydrogen).
    If the molecule has more than 100 non-hydrogen atoms, the compound is discarded and written in the log file.
    Molecules with number of non-hydrogen atoms <= 100 are standardized with the MolVS toolkit
    (https://molvs.readthedocs.io/en/latest/index.html) relying on RDKit. Molecules which failed the standardization
    process are saved in the log file. The remaining standardized structures are converted back into their canonical
    SMILES format.
    :param smi: Input SMILES from the given structure data file T4
    :return: smi_clean: Cleaned and standardized canonical SMILES of the given input SMILES.


    Args:
        smi (str): Non-standardized smiles string

    Returns:
        str: standardized smiles string
    """

    # tautomer.TAUTOMER_TRANSFORMS = update_tautomer_rules()
    # importlib.reload(MolVS_standardizer)
    # param = ReadConfig()
    standardization_param = ConfigDict.get_parameters()["standardization"]

    max_num_atoms = standardization_param["max_num_atoms"]
    max_num_tautomers = standardization_param["max_num_tautomers"]
    include_stereoinfo = standardization_param["include_stereoinfo"]

    ## Load new tautomer enumarator/canonicalizer
    tautomerizer = rdMolStandardize.TautomerEnumerator()
    tautomerizer.SetMaxTautomers(max_num_tautomers)
    tautomerizer.SetRemoveSp3Stereo(
        False)  # Keep stereo information of keto/enol tautomerization

    def isotope_parent(mol: Chem.Mol) -> Chem.Mol:
        """
        Isotope parent from MOLVS
        Return the isotope parent of a given molecule.
        The isotope parent has all atoms replaced with the most abundant isotope for that element.
        Args:
            mol (Chem.Mol): input rdkit mol object

        Returns:
            Chem.Mol: isotope parent rdkit mol object
        """
        mol = copy.deepcopy(mol)
        # Replace isotopes with common weight
        for atom in mol.GetAtoms():
            atom.SetIsotope(0)
        return mol

    def my_standardizer(mol: Chem.Mol) -> Chem.Mol:
        """
        MolVS implementation of standardization

        Args:
            mol (Chem.Mol): non-standardized rdkit mol object

        Returns:
            Chem.Mol: stndardized rdkit mol object
        """
        mol = copy.deepcopy(mol)
        Chem.SanitizeMol(mol)
        mol = Chem.RemoveHs(mol)
        disconnector = rdMolStandardize.MetalDisconnector()
        mol = disconnector.Disconnect(mol)
        normalizer = rdMolStandardize.Normalizer()
        mol = normalizer.normalize(mol)
        reionizer = rdMolStandardize.Reionizer()
        mol = reionizer.reionize(mol)
        Chem.AssignStereochemistry(mol, force=True, cleanIt=True)
        # TODO: Check this removes symmetric stereocenters
        return mol

    mol = MolFromSmiles(smi)  # Read SMILES and convert it to RDKit mol object.
    if (mol is not None
        ):  # Check, if the input SMILES has been converted into a mol object.
        if (
                mol.GetNumAtoms() <= max_num_atoms
        ):  # check size of the molecule based on the non-hydrogen atom count.
            try:

                mol = rdMolStandardize.ChargeParent(
                    mol)  # standardize molecules using MolVS and RDKit
                mol = isotope_parent(mol)
                if include_stereoinfo is False:
                    Chem.RemoveStereochemistry(mol)
                    mol = tautomerizer.Canonicalize(mol)
                    mol_clean = my_standardizer(mol)
                    smi_clean = MolToSmiles(
                        mol_clean)  # convert mol object back to SMILES
                else:
                    mol = tautomerizer.Canonicalize(mol)
                    mol_clean = my_standardizer(mol)
                    smi_clean = MolToSmiles(mol_clean)
            except (ValueError, AttributeError) as e:
                smi_clean = np.nan
                logging.error(
                    "Standardization error, " + smi + ", Error Type: " + str(e)
                )  # write failed molecules during standardization to log file

        else:
            smi_clean = np.nan
            logging.error("Molecule too large, " + smi)

    else:
        smi_clean = np.nan
        logging.error("Reading Error, " + smi)

    return smi_clean