def test10NormalizeFromData(self): data = """// Name SMIRKS Nitro to N+(O-)=O [N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3] Sulfone to S(=O)(=O) [S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3]) Pyridine oxide to n+O- [n:1]=[O:2]>>[n+:1][O-:2] // Azide to N=N+=N- [*,H:1][N:2]=[N:3]#[N:4]>>[*,H:1][N:2]=[N+:3]=[N-:4] """ normalizer1 = rdMolStandardize.Normalizer() params = rdMolStandardize.CleanupParameters() normalizer2 = rdMolStandardize.NormalizerFromData(data, params) imol = Chem.MolFromSmiles("O=N(=O)CCN=N#N", sanitize=False) mol1 = normalizer1.normalize(imol) mol2 = normalizer2.normalize(imol) self.assertEqual(Chem.MolToSmiles(imol), "N#N=NCCN(=O)=O") self.assertEqual(Chem.MolToSmiles(mol1), "[N-]=[N+]=NCC[N+](=O)[O-]") self.assertEqual(Chem.MolToSmiles(mol2), "N#N=NCC[N+](=O)[O-]")
def test13Tautomers(self): enumerator = rdMolStandardize.TautomerEnumerator() m = Chem.MolFromSmiles("C1(=CCCCC1)O") ctaut = enumerator.Canonicalize(m) self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1") params = rdMolStandardize.CleanupParameters() enumerator = rdMolStandardize.TautomerEnumerator(params) m = Chem.MolFromSmiles("C1(=CCCCC1)O") ctaut = enumerator.Canonicalize(m) self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1") tauts = enumerator.Enumerate(m) self.assertEqual(len(tauts), 2) ctauts = list(sorted(Chem.MolToSmiles(x) for x in tauts)) self.assertEqual(ctauts, ['O=C1CCCCC1', 'OC1=CCCCC1']) def scorefunc1(mol): ' stupid tautomer scoring function ' p = Chem.MolFromSmarts('[OH]') return len(mol.GetSubstructMatches(p)) def scorefunc2(mol): ' stupid tautomer scoring function ' p = Chem.MolFromSmarts('O=C') return len(mol.GetSubstructMatches(p)) m = Chem.MolFromSmiles("C1(=CCCCC1)O") ctaut = enumerator.Canonicalize(m, scorefunc1) self.assertEqual(Chem.MolToSmiles(ctaut), "OC1=CCCCC1") ctaut = enumerator.Canonicalize(m, scorefunc2) self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1") # make sure lambdas work ctaut = enumerator.Canonicalize( m, lambda x: len(x.GetSubstructMatches(Chem.MolFromSmarts('C=O')))) self.assertEqual(Chem.MolToSmiles(ctaut), "O=C1CCCCC1") # make sure we behave if we return something bogus from the scoring function with self.assertRaises(TypeError): ctaut = enumerator.Canonicalize(m, lambda x: 'fail') self.assertEqual( enumerator.ScoreTautomer(Chem.MolFromSmiles('N=c1[nH]cccc1')), 99) self.assertEqual( enumerator.ScoreTautomer(Chem.MolFromSmiles('Nc1ncccc1')), 100)
def test19NormalizeFromParams(self): params = rdMolStandardize.CleanupParameters() params.normalizationsFile = "ThisFileDoesNotExist.txt" with self.assertRaises(OSError): rdMolStandardize.NormalizerFromParams(params)
def test17PickCanonicalCIPChangeOnChiralCenter(self): def get_canonical_taut(res): best_idx = max([ (rdMolStandardize.TautomerEnumerator.ScoreTautomer(t), i) for i, t in enumerate(res.tautomers) ])[1] return res.tautomers[best_idx] smi = "CC\\C=C(/O)[C@@H](C)C(C)=O" mol = Chem.MolFromSmiles(smi) self.assertIsNotNone(mol) self.assertEqual( mol.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(mol.GetAtomWithIdx(5).GetProp("_CIPCode"), "R") # here the chirality disappears as the chiral center is itself involved in tautomerism te = rdMolStandardize.TautomerEnumerator() can_taut = te.Canonicalize(mol) self.assertIsNotNone(can_taut) self.assertEqual( can_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_UNSPECIFIED) self.assertFalse(can_taut.GetAtomWithIdx(5).HasProp("_CIPCode")) self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)C(C)C(C)=O") # here the chirality stays even if the chiral center is itself involved in tautomerism # because of the tautomerRemoveSp3Stereo parameter being set to false params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False te = rdMolStandardize.TautomerEnumerator(params) can_taut = te.Canonicalize(mol) self.assertIsNotNone(can_taut) self.assertEqual( can_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S") self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)[C@@H](C)C(C)=O") # here the chirality disappears as the chiral center is itself involved in tautomerism # the reassignStereo setting has no influence te = rdMolStandardize.TautomerEnumerator() res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 8) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_UNSPECIFIED) self.assertFalse(best_taut.GetAtomWithIdx(5).HasProp("_CIPCode")) self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)C(C)C(C)=O") # here the chirality disappears as the chiral center is itself involved in tautomerism # the reassignStereo setting has no influence params = rdMolStandardize.CleanupParameters() params.tautomerReassignStereo = False te = rdMolStandardize.TautomerEnumerator(params) res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 8) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_UNSPECIFIED) self.assertFalse(best_taut.GetAtomWithIdx(5).HasProp("_CIPCode")) self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)C(C)C(C)=O") # here the chirality stays even if the chiral center is itself involved in tautomerism # because of the tautomerRemoveSp3Stereo parameter being set to false # as reassignStereo by default is true, the CIP code has been recomputed # and therefore it is now S (correct) params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False te = rdMolStandardize.TautomerEnumerator(params) res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 8) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S") self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)[C@@H](C)C(C)=O") # here the chirality stays even if the chiral center is itself involved in tautomerism # because of the tautomerRemoveSp3Stereo parameter being set to false # as reassignStereo is false, the CIP code has not been recomputed # and therefore it is still R (incorrect) params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False params.tautomerReassignStereo = False te = rdMolStandardize.TautomerEnumerator(params) res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 8) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R") self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)[C@@H](C)C(C)=O") smi = "CC\\C=C(/O)[C@@](CC)(C)C(C)=O" mol = Chem.MolFromSmiles(smi) self.assertIsNotNone(mol) self.assertEqual(mol.GetAtomWithIdx(5).GetProp("_CIPCode"), "S") self.assertEqual( mol.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) # here the chirality stays no matter how tautomerRemoveSp3Stereo # is set as the chiral center is not involved in tautomerism te = rdMolStandardize.TautomerEnumerator() can_taut = te.Canonicalize(mol) self.assertIsNotNone(can_taut) self.assertEqual( can_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R") self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)[C@](C)(CC)C(C)=O") params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False te = rdMolStandardize.TautomerEnumerator(params) can_taut = te.Canonicalize(mol) self.assertIsNotNone(can_taut) self.assertEqual( can_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(can_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R") self.assertEqual(Chem.MolToSmiles(can_taut), "CCCC(=O)[C@](C)(CC)C(C)=O") # as reassignStereo by default is true, the CIP code has been recomputed # and therefore it is now R (correct) te = rdMolStandardize.TautomerEnumerator() res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 4) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R") self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)[C@](C)(CC)C(C)=O") # as reassignStereo is false, the CIP code has not been recomputed # and therefore it is still S (incorrect) params = rdMolStandardize.CleanupParameters() params.tautomerReassignStereo = False te = rdMolStandardize.TautomerEnumerator(params) res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 4) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S") self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)[C@](C)(CC)C(C)=O") # as reassignStereo by default is true, the CIP code has been recomputed # and therefore it is now R (correct) params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False te = rdMolStandardize.TautomerEnumerator(params) res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 4) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "R") self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)[C@](C)(CC)C(C)=O") # here the chirality stays even if the tautomerRemoveSp3Stereo parameter # is set to false as the chiral center is not involved in tautomerism # as reassignStereo is false, the CIP code has not been recomputed # and therefore it is still S (incorrect) params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False params.tautomerReassignStereo = False te = rdMolStandardize.TautomerEnumerator(params) res = te.Enumerate(mol) self.assertEqual(res.status, rdMolStandardize.TautomerEnumeratorStatus.Completed) self.assertEqual(len(res.tautomers), 4) best_taut = get_canonical_taut(res) self.assertIsNotNone(best_taut) self.assertEqual( best_taut.GetAtomWithIdx(5).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CW) self.assertEqual(best_taut.GetAtomWithIdx(5).GetProp("_CIPCode"), "S") self.assertEqual(Chem.MolToSmiles(best_taut), "CCCC(=O)[C@](C)(CC)C(C)=O")
def test16EnumeratorCallback(self): class MyTautomerEnumeratorCallback( rdMolStandardize.TautomerEnumeratorCallback): def __init__(self, parent, timeout_ms): super().__init__() self._parent = parent self._timeout = timedelta(milliseconds=timeout_ms) self._start_time = datetime.now() def __call__(self, mol, res): self._parent.assertTrue(isinstance(mol, Chem.Mol)) self._parent.assertTrue( isinstance(res, rdMolStandardize.TautomerEnumeratorResult)) return (datetime.now() - self._start_time < self._timeout) class MyBrokenCallback(rdMolStandardize.TautomerEnumeratorCallback): pass class MyBrokenCallback2(rdMolStandardize.TautomerEnumeratorCallback): __call__ = 1 # Test a structure with hundreds of tautomers. smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O" m68 = Chem.MolFromSmiles(smi68) params = rdMolStandardize.CleanupParameters() params.maxTransforms = 10000 params.maxTautomers = 10000 enumerator = rdMolStandardize.TautomerEnumerator(params) enumerator.SetCallback(MyTautomerEnumeratorCallback(self, 50.0)) res68 = enumerator.Enumerate(m68) # either the enumeration was canceled due to timeout # or it has completed very quickly hasReachedTimeout = ( len(res68.tautomers) < 375 and res68.status == rdMolStandardize.TautomerEnumeratorStatus.Canceled) hasCompleted = (len(res68.tautomers) == 375 and res68.status == rdMolStandardize.TautomerEnumeratorStatus.Completed) if hasReachedTimeout: print("Enumeration was canceled due to timeout (50 ms)", file=sys.stderr) if hasCompleted: print("Enumeration has completed", file=sys.stderr) self.assertTrue(hasReachedTimeout or hasCompleted) self.assertTrue(hasReachedTimeout ^ hasCompleted) enumerator = rdMolStandardize.TautomerEnumerator(params) enumerator.SetCallback(MyTautomerEnumeratorCallback(self, 10000.0)) res68 = enumerator.Enumerate(m68) # either the enumeration completed # or it ran very slowly and was canceled due to timeout hasReachedTimeout = ( len(res68.tautomers) < 375 and res68.status == rdMolStandardize.TautomerEnumeratorStatus.Canceled) hasCompleted = (len(res68.tautomers) == 375 and res68.status == rdMolStandardize.TautomerEnumeratorStatus.Completed) if hasReachedTimeout: print("Enumeration was canceled due to timeout (10 s)", file=sys.stderr) if hasCompleted: print("Enumeration has completed", file=sys.stderr) self.assertTrue(hasReachedTimeout or hasCompleted) self.assertTrue(hasReachedTimeout ^ hasCompleted) enumerator = rdMolStandardize.TautomerEnumerator(params) with self.assertRaises(AttributeError): enumerator.SetCallback(MyBrokenCallback()) with self.assertRaises(AttributeError): enumerator.SetCallback(MyBrokenCallback2())
def test15EnumeratorParams(self): # Test a structure with hundreds of tautomers. smi68 = "[H][C](CO)(NC(=O)C1=C(O)C(O)=CC=C1)C(O)=O" m68 = Chem.MolFromSmiles(smi68) enumerator = rdMolStandardize.TautomerEnumerator() res68 = enumerator.Enumerate(m68) self.assertEqual(len(res68), 292) self.assertEqual(len(res68.tautomers), len(res68)) self.assertEqual( res68.status, rdMolStandardize.TautomerEnumeratorStatus.MaxTransformsReached) params = rdMolStandardize.CleanupParameters() params.maxTautomers = 50 enumerator = rdMolStandardize.TautomerEnumerator(params) res68 = enumerator.Enumerate(m68) self.assertEqual(len(res68), 50) self.assertEqual( res68.status, rdMolStandardize.TautomerEnumeratorStatus.MaxTautomersReached) sAlaSmi = "C[C@H](N)C(=O)O" sAla = Chem.MolFromSmiles(sAlaSmi) # test remove (S)-Ala stereochemistry self.assertEqual( sAla.GetAtomWithIdx(1).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CCW) self.assertEqual(sAla.GetAtomWithIdx(1).GetProp("_CIPCode"), "S") params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = True enumerator = rdMolStandardize.TautomerEnumerator(params) res = enumerator.Enumerate(sAla) for taut in res: self.assertEqual( taut.GetAtomWithIdx(1).GetChiralTag(), Chem.ChiralType.CHI_UNSPECIFIED) self.assertFalse(taut.GetAtomWithIdx(1).HasProp("_CIPCode")) for taut in res.tautomers: self.assertEqual( taut.GetAtomWithIdx(1).GetChiralTag(), Chem.ChiralType.CHI_UNSPECIFIED) self.assertFalse(taut.GetAtomWithIdx(1).HasProp("_CIPCode")) for i, taut in enumerate(res): self.assertEqual(Chem.MolToSmiles(taut), Chem.MolToSmiles(res.tautomers[i])) self.assertEqual(len(res), len(res.smiles)) self.assertEqual(len(res), len(res.tautomers)) self.assertEqual(len(res), len(res())) self.assertEqual(len(res), len(res.smilesTautomerMap)) for i, taut in enumerate(res.tautomers): self.assertEqual(Chem.MolToSmiles(taut), Chem.MolToSmiles(res[i])) self.assertEqual(Chem.MolToSmiles(taut), res.smiles[i]) self.assertEqual( Chem.MolToSmiles(taut), Chem.MolToSmiles(res.smilesTautomerMap.values()[i].tautomer)) for i, k in enumerate(res.smilesTautomerMap.keys()): self.assertEqual(k, res.smiles[i]) for i, v in enumerate(res.smilesTautomerMap.values()): self.assertEqual(Chem.MolToSmiles(v.tautomer), Chem.MolToSmiles(res[i])) for i, (k, v) in enumerate(res.smilesTautomerMap.items()): self.assertEqual(k, res.smiles[i]) self.assertEqual(Chem.MolToSmiles(v.tautomer), Chem.MolToSmiles(res[i])) for i, smiles in enumerate(res.smiles): self.assertEqual(smiles, Chem.MolToSmiles(res[i])) self.assertEqual(smiles, res.smilesTautomerMap.keys()[i]) self.assertEqual(Chem.MolToSmiles(res.tautomers[-1]), Chem.MolToSmiles(res[-1])) self.assertEqual(Chem.MolToSmiles(res[-1]), Chem.MolToSmiles(res[len(res) - 1])) self.assertEqual(Chem.MolToSmiles(res.tautomers[-1]), Chem.MolToSmiles(res.tautomers[len(res) - 1])) with self.assertRaises(IndexError): res[len(res)] with self.assertRaises(IndexError): res[-len(res) - 1] with self.assertRaises(IndexError): res.tautomers[len(res)] with self.assertRaises(IndexError): res.tautomers[-len(res.tautomers) - 1] # test retain (S)-Ala stereochemistry self.assertEqual( sAla.GetAtomWithIdx(1).GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CCW) self.assertEqual(sAla.GetAtomWithIdx(1).GetProp("_CIPCode"), "S") params = rdMolStandardize.CleanupParameters() params.tautomerRemoveSp3Stereo = False enumerator = rdMolStandardize.TautomerEnumerator(params) res = enumerator.Enumerate(sAla) for taut in res: tautAtom = taut.GetAtomWithIdx(1) if (tautAtom.GetHybridization() == Chem.HybridizationType.SP3): self.assertEqual(tautAtom.GetChiralTag(), Chem.ChiralType.CHI_TETRAHEDRAL_CCW) self.assertTrue(tautAtom.HasProp("_CIPCode")) self.assertEqual(tautAtom.GetProp("_CIPCode"), "S") else: self.assertFalse(tautAtom.HasProp("_CIPCode")) self.assertEqual(tautAtom.GetChiralTag(), Chem.ChiralType.CHI_UNSPECIFIED) eEnolSmi = "C/C=C/O" eEnol = Chem.MolFromSmiles(eEnolSmi) self.assertEqual( eEnol.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOE) # test remove enol E stereochemistry params = rdMolStandardize.CleanupParameters() params.tautomerRemoveBondStereo = True enumerator = rdMolStandardize.TautomerEnumerator(params) res = enumerator.Enumerate(eEnol) for taut in res.tautomers: self.assertEqual( taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREONONE) # test retain enol E stereochemistry params = rdMolStandardize.CleanupParameters() params.tautomerRemoveBondStereo = False enumerator = rdMolStandardize.TautomerEnumerator(params) res = enumerator.Enumerate(eEnol) for taut in res.tautomers: if (taut.GetBondWithIdx(1).GetBondType() == Chem.BondType.DOUBLE): self.assertEqual( taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOE) zEnolSmi = "C/C=C\\O" zEnol = Chem.MolFromSmiles(zEnolSmi) self.assertEqual( zEnol.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOZ) # test remove enol Z stereochemistry params = rdMolStandardize.CleanupParameters() params.tautomerRemoveBondStereo = True enumerator = rdMolStandardize.TautomerEnumerator(params) res = enumerator.Enumerate(zEnol) for taut in res: self.assertEqual( taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREONONE) # test retain enol Z stereochemistry params = rdMolStandardize.CleanupParameters() params.tautomerRemoveBondStereo = False enumerator = rdMolStandardize.TautomerEnumerator(params) res = enumerator.Enumerate(zEnol) for taut in res: if (taut.GetBondWithIdx(1).GetBondType() == Chem.BondType.DOUBLE): self.assertEqual( taut.GetBondWithIdx(1).GetStereo(), Chem.BondStereo.STEREOZ)
def test21UpdateFromJSON(self): params = rdMolStandardize.CleanupParameters() # note: these actual parameters aren't useful... they are for testing rdMolStandardize.UpdateParamsFromJSON( params, """{ "normalizationData":[ {"name":"silly 1","smarts":"[Cl:1]>>[F:1]"}, {"name":"silly 2","smarts":"[Br:1]>>[F:1]"} ], "acidbaseData":[ {"name":"-CO2H","acid":"C(=O)[OH]","base":"C(=O)[O-]"}, {"name":"phenol","acid":"c[OH]","base":"c[O-]"} ], "fragmentData":[ {"name":"hydrogen", "smarts":"[H]"}, {"name":"fluorine", "smarts":"[F]"}, {"name":"chlorine", "smarts":"[Cl]"} ], "tautomerTransformData":[ {"name":"1,3 (thio)keto/enol f","smarts":"[CX4!H0]-[C]=[O,S,Se,Te;X1]","bonds":"","charges":""}, {"name":"1,3 (thio)keto/enol r","smarts":"[O,S,Se,Te;X2!H0]-[C]=[C]"} ]}""") m = Chem.MolFromSmiles("CCC=O") te = rdMolStandardize.TautomerEnumerator(params) tauts = [Chem.MolToSmiles(x) for x in te.Enumerate(m)] self.assertEqual(tauts, ["CC=CO", "CCC=O"]) self.assertEqual( Chem.MolToSmiles(rdMolStandardize.CanonicalTautomer(m, params)), "CCC=O") # now with defaults te = rdMolStandardize.TautomerEnumerator() tauts = [Chem.MolToSmiles(x) for x in te.Enumerate(m)] self.assertEqual(tauts, ["CC=CO", "CCC=O"]) self.assertEqual( Chem.MolToSmiles(rdMolStandardize.CanonicalTautomer(m)), "CCC=O") m = Chem.MolFromSmiles('ClCCCBr') nm = rdMolStandardize.Normalize(m, params) self.assertEqual(Chem.MolToSmiles(nm), "FCCCF") # now with defaults nm = rdMolStandardize.Normalize(m) self.assertEqual(Chem.MolToSmiles(nm), "ClCCCBr") m = Chem.MolFromSmiles('c1cc([O-])cc(C(=O)O)c1') nm = rdMolStandardize.Reionize(m, params) self.assertEqual(Chem.MolToSmiles(nm), "O=C([O-])c1cccc(O)c1") # now with defaults nm = rdMolStandardize.Reionize(m) self.assertEqual(Chem.MolToSmiles(nm), "O=C([O-])c1cccc(O)c1") m = Chem.MolFromSmiles('C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O') nm = rdMolStandardize.Reionize(m, params) self.assertEqual(Chem.MolToSmiles(nm), "O=S([O-])c1ccc(S(=O)(=O)O)cc1") # now with defaults nm = rdMolStandardize.Reionize(m) self.assertEqual(Chem.MolToSmiles(nm), "O=S(O)c1ccc(S(=O)(=O)[O-])cc1") m = Chem.MolFromSmiles('[F-].[Cl-].[Br-].CC') nm = rdMolStandardize.RemoveFragments(m, params) self.assertEqual(Chem.MolToSmiles(nm), "CC.[Br-]") # now with defaults nm = rdMolStandardize.RemoveFragments(m) self.assertEqual(Chem.MolToSmiles(nm), "CC")
def test7Fragment(self): fragremover = rdMolStandardize.FragmentRemover() mol = Chem.MolFromSmiles("CN(C)C.Cl.Cl.Br") nm = fragremover.remove(mol) self.assertEqual(Chem.MolToSmiles(nm), "CN(C)C") lfragchooser = rdMolStandardize.LargestFragmentChooser() mol2 = Chem.MolFromSmiles("[N+](=O)([O-])[O-].[CH3+]") nm2 = lfragchooser.choose(mol2) self.assertEqual(Chem.MolToSmiles(nm2), "O=[N+]([O-])[O-]") lfragchooser2 = rdMolStandardize.LargestFragmentChooser( preferOrganic=True) nm3 = lfragchooser2.choose(mol2) self.assertEqual(Chem.MolToSmiles(nm3), "[CH3+]") fragremover = rdMolStandardize.FragmentRemover(skip_if_all_match=True) mol = Chem.MolFromSmiles("[Na+].Cl.Cl.Br") nm = fragremover.remove(mol) self.assertEqual(nm.GetNumAtoms(), mol.GetNumAtoms()) smi3 = "CNC[C@@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O.c1cc2c(cc1C(=O)O)oc(n2)c3cc(cc(c3)Cl)Cl" lfParams = rdMolStandardize.CleanupParameters() lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol3 = Chem.MolFromSmiles(smi3) lfrag3 = lfrag_params.choose(mol3) self.assertEqual(Chem.MolToSmiles(lfrag3), "CNC[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserCountHeavyAtomsOnly = True lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol3 = Chem.MolFromSmiles(smi3) lfrag3 = lfrag_params.choose(mol3) self.assertEqual(Chem.MolToSmiles(lfrag3), "O=C(O)c1ccc2nc(-c3cc(Cl)cc(Cl)c3)oc2c1") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserUseAtomCount = False lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol3 = Chem.MolFromSmiles(smi3) lfrag3 = lfrag_params.choose(mol3) self.assertEqual(Chem.MolToSmiles(lfrag3), "O=C(O)c1ccc2nc(-c3cc(Cl)cc(Cl)c3)oc2c1") smi4 = "CC.O=[Pb]=O" lfParams = rdMolStandardize.CleanupParameters() lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) self.assertEqual(Chem.MolToSmiles(lfrag4), "CC") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserCountHeavyAtomsOnly = True lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) self.assertEqual(Chem.MolToSmiles(lfrag4), "O=[Pb]=O") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserUseAtomCount = False lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) self.assertEqual(Chem.MolToSmiles(lfrag4), "O=[Pb]=O") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserCountHeavyAtomsOnly = True lfParams.preferOrganic = True lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) self.assertEqual(Chem.MolToSmiles(lfrag4), "CC") lfParams = rdMolStandardize.CleanupParameters() lfParams.largestFragmentChooserUseAtomCount = False lfParams.preferOrganic = True lfrag_params = rdMolStandardize.LargestFragmentChooser(lfParams) mol4 = Chem.MolFromSmiles(smi4) lfrag4 = lfrag_params.choose(mol4) self.assertEqual(Chem.MolToSmiles(lfrag4), "CC")
Trivalent O [*:1]=[O;X2;v3;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3] Sulfoxide to -S+(O-) [!O:1][S+0;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4] // this form addresses a pathological case that came up a few times in testing: Sulfoxide to -S+(O-) 2 [!O:1][SH1+1;D3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4] Trivalent S [O:1]=[S;D2;+0:2]-[#6:3]>>[*:1]=[*+1:2]-[*:3] // Note that the next one doesn't work propertly because repeated appplications // don't carry the cations from the previous rounds through. This should be // fixed by implementing single-molecule transformations, but that's a longer-term // project //Alkaline oxide to ions [Li,Na,K;+0:1]-[O+0:2]>>([*+1:1].[O-:2]) Bad amide tautomer1 [C:1]([OH1;D1:2])=;!@[NH1:3]>>[C:1](=[OH0:2])-[NH2:3] Bad amide tautomer2 [C:1]([OH1;D1:2])=;!@[NH0:3]>>[C:1](=[OH0:2])-[NH1:3] Halogen with no neighbors [F,Cl,Br,I;X0;+0:1]>>[*-1:1] Odd pyridine/pyridazine oxide structure [C,N;-;D2,D3:1]-[N+2;D3:2]-[O-;D1:3]>>[*-0:1]=[*+1:2]-[*-:3] """ _normalizer_params = rdMolStandardize.CleanupParameters() _normalizer = rdMolStandardize.NormalizerFromData(_normalization_transforms, _normalizer_params) _alkoxide_pattern = Chem.MolFromSmarts('[Li,Na,K;+0]-[#7,#8;+0]') def normalize_mol(m): """ """ Chem.FastFindRings(m) if m.HasSubstructMatch(_alkoxide_pattern): m = Chem.RWMol(m) for match in m.GetSubstructMatches(_alkoxide_pattern): m.RemoveBond(match[0], match[1]) m.GetAtomWithIdx(match[0]).SetFormalCharge(1)