Beispiel #1
0
    def test10NormalizeFromData(self):
        data = """//	Name	SMIRKS
Nitro to N+(O-)=O	[N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3]
Sulfone to S(=O)(=O)	[S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3])
Pyridine oxide to n+O-	[n:1]=[O:2]>>[n+:1][O-:2]
// Azide to N=N+=N-	[*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4]
"""
        normalizer1 = rdMolStandardize.Normalizer()
        params = rdMolStandardize.CleanupParameters()
        normalizer2 = rdMolStandardize.NormalizerFromData(data, params)

        imol = Chem.MolFromSmiles("O=N(=O)CCN=N#N", sanitize=False)
        mol1 = normalizer1.normalize(imol)
        mol2 = normalizer2.normalize(imol)
        self.assertEqual(Chem.MolToSmiles(imol), "N#N=NCCN(=O)=O")
        self.assertEqual(Chem.MolToSmiles(mol1), "[N-]=[N+]=NCC[N+](=O)[O-]")
        self.assertEqual(Chem.MolToSmiles(mol2), "N#N=NCC[N+](=O)[O-]")
    def test13Tautomers(self):
        enumerator = rdMolStandardize.TautomerEnumerator()
        m = Chem.MolFromSmiles("C1(=CCCCC1)O")
        ctaut = enumerator.Canonicalize(m)
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")

        params = rdMolStandardize.CleanupParameters()
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        m = Chem.MolFromSmiles("C1(=CCCCC1)O")
        ctaut = enumerator.Canonicalize(m)
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")

        tauts = enumerator.Enumerate(m)
        self.assertEqual(len(tauts), 2)
        ctauts = list(sorted(Chem.MolToSmiles(x) for x in tauts))
        self.assertEqual(ctauts, ['O=C1CCCCC1', 'OC1=CCCCC1'])

        def scorefunc1(mol):
            ' stupid tautomer scoring function '
            p = Chem.MolFromSmarts('[OH]')
            return len(mol.GetSubstructMatches(p))

        def scorefunc2(mol):
            ' stupid tautomer scoring function '
            p = Chem.MolFromSmarts('O=C')
            return len(mol.GetSubstructMatches(p))

        m = Chem.MolFromSmiles("C1(=CCCCC1)O")
        ctaut = enumerator.Canonicalize(m, scorefunc1)
        self.assertEqual(Chem.MolToSmiles(ctaut), "OC1=CCCCC1")
        ctaut = enumerator.Canonicalize(m, scorefunc2)
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")
        # make sure lambdas work
        ctaut = enumerator.Canonicalize(
            m, lambda x: len(x.GetSubstructMatches(Chem.MolFromSmarts('C=O'))))
        self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1")

        # make sure we behave if we return something bogus from the scoring function
        with self.assertRaises(TypeError):
            ctaut = enumerator.Canonicalize(m, lambda x: 'fail')

        self.assertEqual(
            enumerator.ScoreTautomer(Chem.MolFromSmiles('N=c1[nH]cccc1')), 99)
        self.assertEqual(
            enumerator.ScoreTautomer(Chem.MolFromSmiles('Nc1ncccc1')), 100)
Beispiel #3
0
 def test19NormalizeFromParams(self):
     params = rdMolStandardize.CleanupParameters()
     params.normalizationsFile = "ThisFileDoesNotExist.txt"
     with self.assertRaises(OSError):
         rdMolStandardize.NormalizerFromParams(params)
Beispiel #4
0
    def test17PickCanonicalCIPChangeOnChiralCenter(self):
        def get_canonical_taut(res):
            best_idx = max([
                (rdMolStandardize.TautomerEnumerator.ScoreTautomer(t), i)
                for i, t in enumerate(res.tautomers)
            ])[1]
            return res.tautomers[best_idx]

        smi = "CC\\C=C(/O)[C@@H](C)C(C)=O"
        mol = Chem.MolFromSmiles(smi)
        self.assertIsNotNone(mol)
        self.assertEqual(
            mol.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(mol.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")

        # here the chirality disappears as the chiral center is itself involved in tautomerism
        te = rdMolStandardize.TautomerEnumerator()
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_UNSPECIFIED)
        self.assertFalse(can_taut.GetAtomWithIdx(5).HasProp("_CIPCode"))
        self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)C(C)C(C)=O")

        # here the chirality stays even if the chiral center is itself involved in tautomerism
        # because of the tautomerRemoveSp3Stereo parameter being set to false
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)[C@@H](C)C(C)=O")

        # here the chirality disappears as the chiral center is itself involved in tautomerism
        # the reassignStereo setting has no influence
        te = rdMolStandardize.TautomerEnumerator()
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_UNSPECIFIED)
        self.assertFalse(best_taut.GetAtomWithIdx(5).HasProp("_CIPCode"))
        self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)C(C)C(C)=O")

        # here the chirality disappears as the chiral center is itself involved in tautomerism
        # the reassignStereo setting has no influence
        params = rdMolStandardize.CleanupParameters()
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_UNSPECIFIED)
        self.assertFalse(best_taut.GetAtomWithIdx(5).HasProp("_CIPCode"))
        self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)C(C)C(C)=O")

        # here the chirality stays even if the chiral center is itself involved in tautomerism
        # because of the tautomerRemoveSp3Stereo parameter being set to false
        # as reassignStereo by default is true, the CIP code has  been recomputed
        # and therefore it is now S (correct)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@@H](C)C(C)=O")

        # here the chirality stays even if the chiral center is itself involved in tautomerism
        # because of the tautomerRemoveSp3Stereo parameter being set to false
        # as reassignStereo is false, the CIP code has not been recomputed
        # and therefore it is still R (incorrect)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 8)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@@H](C)C(C)=O")

        smi = "CC\\C=C(/O)[C@@](CC)(C)C(C)=O"
        mol = Chem.MolFromSmiles(smi)
        self.assertIsNotNone(mol)
        self.assertEqual(mol.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(
            mol.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)

        # here the chirality stays no matter how tautomerRemoveSp3Stereo
        # is set as the chiral center is not involved in tautomerism
        te = rdMolStandardize.TautomerEnumerator()
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(can_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        can_taut = te.Canonicalize(mol)
        self.assertIsNotNone(can_taut)
        self.assertEqual(
            can_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(can_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # as reassignStereo by default is true, the CIP code has been recomputed
        # and therefore it is now R (correct)
        te = rdMolStandardize.TautomerEnumerator()
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # as reassignStereo is false, the CIP code has not been recomputed
        # and therefore it is still S (incorrect)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # as reassignStereo by default is true, the CIP code has  been recomputed
        # and therefore it is now R (correct)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")

        # here the chirality stays even if the tautomerRemoveSp3Stereo parameter
        # is set to false as the chiral center is not involved in tautomerism
        # as reassignStereo is false, the CIP code has not been recomputed
        # and therefore it is still S (incorrect)
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        params.tautomerReassignStereo = False
        te = rdMolStandardize.TautomerEnumerator(params)
        res = te.Enumerate(mol)
        self.assertEqual(res.status,
                         rdMolStandardize.TautomerEnumeratorStatus.Completed)
        self.assertEqual(len(res.tautomers), 4)
        best_taut = get_canonical_taut(res)
        self.assertIsNotNone(best_taut)
        self.assertEqual(
            best_taut.GetAtomWithIdx(5).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CW)
        self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S")
        self.assertEqual(Chem.MolToSmiles(best_taut),
                         "CCCC(=O)[C@](C)(CC)C(C)=O")
Beispiel #5
0
    def test16EnumeratorCallback(self):
        class MyTautomerEnumeratorCallback(
                rdMolStandardize.TautomerEnumeratorCallback):
            def __init__(self, parent, timeout_ms):
                super().__init__()
                self._parent = parent
                self._timeout = timedelta(milliseconds=timeout_ms)
                self._start_time = datetime.now()

            def __call__(self, mol, res):
                self._parent.assertTrue(isinstance(mol, Chem.Mol))
                self._parent.assertTrue(
                    isinstance(res, rdMolStandardize.TautomerEnumeratorResult))
                return (datetime.now() - self._start_time < self._timeout)

        class MyBrokenCallback(rdMolStandardize.TautomerEnumeratorCallback):
            pass

        class MyBrokenCallback2(rdMolStandardize.TautomerEnumeratorCallback):
            __call__ = 1

        # Test a structure with hundreds of tautomers.
        smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O"
        m68 = Chem.MolFromSmiles(smi68)

        params = rdMolStandardize.CleanupParameters()
        params.maxTransforms = 10000
        params.maxTautomers = 10000
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        enumerator.SetCallback(MyTautomerEnumeratorCallback(self, 50.0))
        res68 = enumerator.Enumerate(m68)
        # either the enumeration was canceled due to timeout
        # or it has completed very quickly
        hasReachedTimeout = (
            len(res68.tautomers) < 375 and res68.status
            == rdMolStandardize.TautomerEnumeratorStatus.Canceled)
        hasCompleted = (len(res68.tautomers) == 375 and res68.status
                        == rdMolStandardize.TautomerEnumeratorStatus.Completed)
        if hasReachedTimeout:
            print("Enumeration was canceled due to timeout (50 ms)",
                  file=sys.stderr)
        if hasCompleted:
            print("Enumeration has completed", file=sys.stderr)
        self.assertTrue(hasReachedTimeout or hasCompleted)
        self.assertTrue(hasReachedTimeout ^ hasCompleted)

        enumerator = rdMolStandardize.TautomerEnumerator(params)
        enumerator.SetCallback(MyTautomerEnumeratorCallback(self, 10000.0))
        res68 = enumerator.Enumerate(m68)
        # either the enumeration completed
        # or it ran very slowly and was canceled due to timeout
        hasReachedTimeout = (
            len(res68.tautomers) < 375 and res68.status
            == rdMolStandardize.TautomerEnumeratorStatus.Canceled)
        hasCompleted = (len(res68.tautomers) == 375 and res68.status
                        == rdMolStandardize.TautomerEnumeratorStatus.Completed)
        if hasReachedTimeout:
            print("Enumeration was canceled due to timeout (10 s)",
                  file=sys.stderr)
        if hasCompleted:
            print("Enumeration has completed", file=sys.stderr)
        self.assertTrue(hasReachedTimeout or hasCompleted)
        self.assertTrue(hasReachedTimeout ^ hasCompleted)

        enumerator = rdMolStandardize.TautomerEnumerator(params)
        with self.assertRaises(AttributeError):
            enumerator.SetCallback(MyBrokenCallback())
        with self.assertRaises(AttributeError):
            enumerator.SetCallback(MyBrokenCallback2())
Beispiel #6
0
    def test15EnumeratorParams(self):
        # Test a structure with hundreds of tautomers.
        smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O"
        m68 = Chem.MolFromSmiles(smi68)

        enumerator = rdMolStandardize.TautomerEnumerator()
        res68 = enumerator.Enumerate(m68)
        self.assertEqual(len(res68), 292)
        self.assertEqual(len(res68.tautomers), len(res68))
        self.assertEqual(
            res68.status,
            rdMolStandardize.TautomerEnumeratorStatus.MaxTransformsReached)

        params = rdMolStandardize.CleanupParameters()
        params.maxTautomers = 50
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res68 = enumerator.Enumerate(m68)
        self.assertEqual(len(res68), 50)
        self.assertEqual(
            res68.status,
            rdMolStandardize.TautomerEnumeratorStatus.MaxTautomersReached)

        sAlaSmi = "C[C@H](N)C(=O)O"
        sAla = Chem.MolFromSmiles(sAlaSmi)
        # test remove (S)-Ala stereochemistry
        self.assertEqual(
            sAla.GetAtomWithIdx(1).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
        self.assertEqual(sAla.GetAtomWithIdx(1).GetProp("_CIPCode"), "S")
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = True
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(sAla)
        for taut in res:
            self.assertEqual(
                taut.GetAtomWithIdx(1).GetChiralTag(),
                Chem.ChiralType.CHI_UNSPECIFIED)
            self.assertFalse(taut.GetAtomWithIdx(1).HasProp("_CIPCode"))
        for taut in res.tautomers:
            self.assertEqual(
                taut.GetAtomWithIdx(1).GetChiralTag(),
                Chem.ChiralType.CHI_UNSPECIFIED)
            self.assertFalse(taut.GetAtomWithIdx(1).HasProp("_CIPCode"))
        for i, taut in enumerate(res):
            self.assertEqual(Chem.MolToSmiles(taut),
                             Chem.MolToSmiles(res.tautomers[i]))
        self.assertEqual(len(res), len(res.smiles))
        self.assertEqual(len(res), len(res.tautomers))
        self.assertEqual(len(res), len(res()))
        self.assertEqual(len(res), len(res.smilesTautomerMap))
        for i, taut in enumerate(res.tautomers):
            self.assertEqual(Chem.MolToSmiles(taut), Chem.MolToSmiles(res[i]))
            self.assertEqual(Chem.MolToSmiles(taut), res.smiles[i])
            self.assertEqual(
                Chem.MolToSmiles(taut),
                Chem.MolToSmiles(res.smilesTautomerMap.values()[i].tautomer))
        for i, k in enumerate(res.smilesTautomerMap.keys()):
            self.assertEqual(k, res.smiles[i])
        for i, v in enumerate(res.smilesTautomerMap.values()):
            self.assertEqual(Chem.MolToSmiles(v.tautomer),
                             Chem.MolToSmiles(res[i]))
        for i, (k, v) in enumerate(res.smilesTautomerMap.items()):
            self.assertEqual(k, res.smiles[i])
            self.assertEqual(Chem.MolToSmiles(v.tautomer),
                             Chem.MolToSmiles(res[i]))
        for i, smiles in enumerate(res.smiles):
            self.assertEqual(smiles, Chem.MolToSmiles(res[i]))
            self.assertEqual(smiles, res.smilesTautomerMap.keys()[i])
        self.assertEqual(Chem.MolToSmiles(res.tautomers[-1]),
                         Chem.MolToSmiles(res[-1]))
        self.assertEqual(Chem.MolToSmiles(res[-1]),
                         Chem.MolToSmiles(res[len(res) - 1]))
        self.assertEqual(Chem.MolToSmiles(res.tautomers[-1]),
                         Chem.MolToSmiles(res.tautomers[len(res) - 1]))
        with self.assertRaises(IndexError):
            res[len(res)]
        with self.assertRaises(IndexError):
            res[-len(res) - 1]
        with self.assertRaises(IndexError):
            res.tautomers[len(res)]
        with self.assertRaises(IndexError):
            res.tautomers[-len(res.tautomers) - 1]

        # test retain (S)-Ala stereochemistry
        self.assertEqual(
            sAla.GetAtomWithIdx(1).GetChiralTag(),
            Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
        self.assertEqual(sAla.GetAtomWithIdx(1).GetProp("_CIPCode"), "S")
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveSp3Stereo = False
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(sAla)
        for taut in res:
            tautAtom = taut.GetAtomWithIdx(1)
            if (tautAtom.GetHybridization() == Chem.HybridizationType.SP3):
                self.assertEqual(tautAtom.GetChiralTag(),
                                 Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
                self.assertTrue(tautAtom.HasProp("_CIPCode"))
                self.assertEqual(tautAtom.GetProp("_CIPCode"), "S")
            else:
                self.assertFalse(tautAtom.HasProp("_CIPCode"))
                self.assertEqual(tautAtom.GetChiralTag(),
                                 Chem.ChiralType.CHI_UNSPECIFIED)

        eEnolSmi = "C/C=C/O"
        eEnol = Chem.MolFromSmiles(eEnolSmi)
        self.assertEqual(
            eEnol.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOE)
        # test remove enol E stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = True
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(eEnol)
        for taut in res.tautomers:
            self.assertEqual(
                taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREONONE)
        # test retain enol E stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = False
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(eEnol)
        for taut in res.tautomers:
            if (taut.GetBondWithIdx(1).GetBondType() == Chem.BondType.DOUBLE):
                self.assertEqual(
                    taut.GetBondWithIdx(1).GetStereo(),
                    Chem.BondStereo.STEREOE)

        zEnolSmi = "C/C=C\\O"
        zEnol = Chem.MolFromSmiles(zEnolSmi)
        self.assertEqual(
            zEnol.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOZ)
        # test remove enol Z stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = True
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(zEnol)
        for taut in res:
            self.assertEqual(
                taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREONONE)
        # test retain enol Z stereochemistry
        params = rdMolStandardize.CleanupParameters()
        params.tautomerRemoveBondStereo = False
        enumerator = rdMolStandardize.TautomerEnumerator(params)
        res = enumerator.Enumerate(zEnol)
        for taut in res:
            if (taut.GetBondWithIdx(1).GetBondType() == Chem.BondType.DOUBLE):
                self.assertEqual(
                    taut.GetBondWithIdx(1).GetStereo(),
                    Chem.BondStereo.STEREOZ)
    def test21UpdateFromJSON(self):
        params = rdMolStandardize.CleanupParameters()
        # note: these actual parameters aren't useful... they are for testing
        rdMolStandardize.UpdateParamsFromJSON(
            params, """{
    "normalizationData":[
      {"name":"silly 1","smarts":"[Cl:1]>>[F:1]"},
      {"name":"silly 2","smarts":"[Br:1]>>[F:1]"}
    ],
    "acidbaseData":[
      {"name":"-CO2H","acid":"C(=O)[OH]","base":"C(=O)[O-]"},
      {"name":"phenol","acid":"c[OH]","base":"c[O-]"}
    ],
    "fragmentData":[
      {"name":"hydrogen", "smarts":"[H]"}, 
      {"name":"fluorine", "smarts":"[F]"}, 
      {"name":"chlorine", "smarts":"[Cl]"}
    ],
    "tautomerTransformData":[
      {"name":"1,3 (thio)keto/enol f","smarts":"[CX4!H0]-[C]=[O,S,Se,Te;X1]","bonds":"","charges":""},
      {"name":"1,3 (thio)keto/enol r","smarts":"[O,S,Se,Te;X2!H0]-[C]=[C]"}
    ]}""")

        m = Chem.MolFromSmiles("CCC=O")
        te = rdMolStandardize.TautomerEnumerator(params)
        tauts = [Chem.MolToSmiles(x) for x in te.Enumerate(m)]
        self.assertEqual(tauts, ["CC=CO", "CCC=O"])
        self.assertEqual(
            Chem.MolToSmiles(rdMolStandardize.CanonicalTautomer(m, params)),
            "CCC=O")
        # now with defaults
        te = rdMolStandardize.TautomerEnumerator()
        tauts = [Chem.MolToSmiles(x) for x in te.Enumerate(m)]
        self.assertEqual(tauts, ["CC=CO", "CCC=O"])
        self.assertEqual(
            Chem.MolToSmiles(rdMolStandardize.CanonicalTautomer(m)), "CCC=O")

        m = Chem.MolFromSmiles('ClCCCBr')
        nm = rdMolStandardize.Normalize(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "FCCCF")
        # now with defaults
        nm = rdMolStandardize.Normalize(m)
        self.assertEqual(Chem.MolToSmiles(nm), "ClCCCBr")

        m = Chem.MolFromSmiles('c1cc([O-])cc(C(=O)O)c1')
        nm = rdMolStandardize.Reionize(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "O=C([O-])c1cccc(O)c1")
        # now with defaults
        nm = rdMolStandardize.Reionize(m)
        self.assertEqual(Chem.MolToSmiles(nm), "O=C([O-])c1cccc(O)c1")

        m = Chem.MolFromSmiles('C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O')
        nm = rdMolStandardize.Reionize(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "O=S([O-])c1ccc(S(=O)(=O)O)cc1")
        # now with defaults
        nm = rdMolStandardize.Reionize(m)
        self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1")

        m = Chem.MolFromSmiles('[F-].[Cl-].[Br-].CC')
        nm = rdMolStandardize.RemoveFragments(m, params)
        self.assertEqual(Chem.MolToSmiles(nm), "CC.[Br-]")
        # now with defaults
        nm = rdMolStandardize.RemoveFragments(m)
        self.assertEqual(Chem.MolToSmiles(nm), "CC")
    def test7Fragment(self):
        fragremover = rdMolStandardize.FragmentRemover()
        mol = Chem.MolFromSmiles("CN(C)C.Cl.Cl.Br")
        nm = fragremover.remove(mol)
        self.assertEqual(Chem.MolToSmiles(nm), "CN(C)C")

        lfragchooser = rdMolStandardize.LargestFragmentChooser()
        mol2 = Chem.MolFromSmiles("[N+](=O)([O-])[O-].[CH3+]")
        nm2 = lfragchooser.choose(mol2)
        self.assertEqual(Chem.MolToSmiles(nm2), "O=[N+]([O-])[O-]")

        lfragchooser2 = rdMolStandardize.LargestFragmentChooser(
            preferOrganic=True)
        nm3 = lfragchooser2.choose(mol2)
        self.assertEqual(Chem.MolToSmiles(nm3), "[CH3+]")

        fragremover = rdMolStandardize.FragmentRemover(skip_if_all_match=True)
        mol = Chem.MolFromSmiles("[Na+].Cl.Cl.Br")
        nm = fragremover.remove(mol)
        self.assertEqual(nm.GetNumAtoms(), mol.GetNumAtoms())

        smi3 = "CNC[C@@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O.c1cc2c(cc1C(=O)O)oc(n2)c3cc(cc(c3)Cl)Cl"

        lfParams = rdMolStandardize.CleanupParameters()
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol3 = Chem.MolFromSmiles(smi3)
        lfrag3 = lfrag_params.choose(mol3)
        self.assertEqual(Chem.MolToSmiles(lfrag3),
                         "CNC[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO")

        lfParams = rdMolStandardize.CleanupParameters()
        lfParams.largestFragmentChooserCountHeavyAtomsOnly = True
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol3 = Chem.MolFromSmiles(smi3)
        lfrag3 = lfrag_params.choose(mol3)
        self.assertEqual(Chem.MolToSmiles(lfrag3),
                         "O=C(O)c1ccc2nc(-c3cc(Cl)cc(Cl)c3)oc2c1")

        lfParams = rdMolStandardize.CleanupParameters()
        lfParams.largestFragmentChooserUseAtomCount = False
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol3 = Chem.MolFromSmiles(smi3)
        lfrag3 = lfrag_params.choose(mol3)
        self.assertEqual(Chem.MolToSmiles(lfrag3),
                         "O=C(O)c1ccc2nc(-c3cc(Cl)cc(Cl)c3)oc2c1")

        smi4 = "CC.O=[Pb]=O"

        lfParams = rdMolStandardize.CleanupParameters()
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol4 = Chem.MolFromSmiles(smi4)
        lfrag4 = lfrag_params.choose(mol4)
        self.assertEqual(Chem.MolToSmiles(lfrag4), "CC")

        lfParams = rdMolStandardize.CleanupParameters()
        lfParams.largestFragmentChooserCountHeavyAtomsOnly = True
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol4 = Chem.MolFromSmiles(smi4)
        lfrag4 = lfrag_params.choose(mol4)
        self.assertEqual(Chem.MolToSmiles(lfrag4), "O=[Pb]=O")

        lfParams = rdMolStandardize.CleanupParameters()
        lfParams.largestFragmentChooserUseAtomCount = False
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol4 = Chem.MolFromSmiles(smi4)
        lfrag4 = lfrag_params.choose(mol4)
        self.assertEqual(Chem.MolToSmiles(lfrag4), "O=[Pb]=O")

        lfParams = rdMolStandardize.CleanupParameters()
        lfParams.largestFragmentChooserCountHeavyAtomsOnly = True
        lfParams.preferOrganic = True
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol4 = Chem.MolFromSmiles(smi4)
        lfrag4 = lfrag_params.choose(mol4)
        self.assertEqual(Chem.MolToSmiles(lfrag4), "CC")

        lfParams = rdMolStandardize.CleanupParameters()
        lfParams.largestFragmentChooserUseAtomCount = False
        lfParams.preferOrganic = True
        lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams)
        mol4 = Chem.MolFromSmiles(smi4)
        lfrag4 = lfrag_params.choose(mol4)
        self.assertEqual(Chem.MolToSmiles(lfrag4), "CC")
Beispiel #9
0
Trivalent O	[*:1]=[O;X2;v3;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3]
Sulfoxide to -S+(O-)	[!O:1][S+0;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]
// this form addresses a pathological case that came up a few times in testing:
Sulfoxide to -S+(O-) 2	[!O:1][SH1+1;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]
Trivalent S	[O:1]=[S;D2;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3]
// Note that the next one doesn't work propertly because repeated appplications
// don't carry the cations from the previous rounds through. This should be
// fixed by implementing single-molecule transformations, but that's a longer-term
// project
//Alkaline oxide to ions	[Li,Na,K;+0:1]-[O+0:2]>>([*+1:1].[O-:2])
Bad amide tautomer1	[C:1]([OH1;D1:2])=;!@[NH1:3]>>[C:1](=[OH0:2])-[NH2:3]
Bad amide tautomer2	[C:1]([OH1;D1:2])=;!@[NH0:3]>>[C:1](=[OH0:2])-[NH1:3]
Halogen with no neighbors	[F,Cl,Br,I;X0;+0:1]>>[*-1:1]
Odd pyridine/pyridazine oxide structure	[C,N;-;D2,D3:1]-[N+2;D3:2]-[O-;D1:3]>>[*-0:1]=[*+1:2]-[*-:3]
"""
_normalizer_params = rdMolStandardize.CleanupParameters()
_normalizer = rdMolStandardize.NormalizerFromData(_normalization_transforms,
                                                  _normalizer_params)

_alkoxide_pattern = Chem.MolFromSmarts('[Li,Na,K;+0]-[#7,#8;+0]')


def normalize_mol(m):
    """
    """
    Chem.FastFindRings(m)
    if m.HasSubstructMatch(_alkoxide_pattern):
        m = Chem.RWMol(m)
        for match in m.GetSubstructMatches(_alkoxide_pattern):
            m.RemoveBond(match[0], match[1])
            m.GetAtomWithIdx(match[0]).SetFormalCharge(1)