def test3Reactions(self): txt = """BoronicAcid\t[$(B-!@[#6])](O)(O)\tBoronic Acid\t[#6:1][B:2]([O:3])[O:4]>>[#6:1].[B:2]([O:3])[O:4] BoronicAcid.Aromatic\t[$(B-!@c)](O)(O)\tAromatic\t[c:1][B:2]([O:3])[O:4]>>[c:1].[B:2]([O:3])[O:4] BoronicAcid.Aliphatic\t[$(B-!@C)](O)(O)\tAliphatic\t[C:1][B:2]([O:3])[O:4]>>[C:1].[B:2]([O:3])[O:4] """ hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.assertTrue(hierarchy) self.assertEqual(len(hierarchy), 1) self.assertEqual(len(hierarchy[0].children), 2) self.assertNotEqual(hierarchy[0].rxnSmarts, '') self.assertNotEqual(hierarchy[0].children[0].rxnSmarts, '')
def test3Reactions(self): txt = """BoronicAcid\t[$(B-!@[#6])](O)(O)\tBoronic Acid\t[#6:1][B:2]([O:3])[O:4]>>[#6:1].[B:2]([O:3])[O:4] BoronicAcid.Aromatic\t[$(B-!@c)](O)(O)\tAromatic\t[c:1][B:2]([O:3])[O:4]>>[c:1].[B:2]([O:3])[O:4] BoronicAcid.Aliphatic\t[$(B-!@C)](O)(O)\tAliphatic\t[C:1][B:2]([O:3])[O:4]>>[C:1].[B:2]([O:3])[O:4] """ hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.failUnless(hierarchy) self.failUnless(len(hierarchy) == 1) self.failUnless(len(hierarchy[0].children) == 2) self.failUnless(hierarchy[0].rxnSmarts != '') self.failUnless(hierarchy[0].children[0].rxnSmarts != '')
def test2Comments(self): txt = """ AcidChloride\tC(=O)Cl\tAcid Chloride AcidChloride.Benzoyl\tC(=O)(Cl)c1ccccc1\tBenzoyl Amine\tN\tAmine Amine.Primary\t[N;H2]\tPrimary //Amine.Primary.Aromatic\t[N;H2][a]\tPrimary Aromatic Amine.Aromatic\tN[a]\tAromatic """ hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.assertTrue(hierarchy) self.assertEqual(len(hierarchy), 2) self.assertEqual(len(hierarchy[0]), 2) self.assertEqual(len(hierarchy[1]), 3)
def test4Hs(self): hierarchy = FunctionalGroups.BuildFuncGroupHierarchy() inName = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'NCI_5K_TPSA.csv') with open(inName, 'r') as inF: ms = [Chem.MolFromSmiles(x.split(',')[0]) for x in inF if x[0] != '#'] for m in ms: mh = Chem.AddHs(m) fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy) fph = FunctionalGroups.CreateMolFingerprint(mh, hierarchy) if fp != fph: print(Chem.MolToSmiles(m)) print(fp.ToBitString()) print(fph.ToBitString()) self.assertEqual(fp, fph)
def test4Hs(self): hierarchy = FunctionalGroups.BuildFuncGroupHierarchy() inName = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'NCI_5K_TPSA.csv') inF = open(inName, 'r') lines = inF.readlines() ms = [ Chem.MolFromSmiles(x.split(',')[0]) for x in lines if x[0] != '#' ] for m in ms: mh = Chem.AddHs(m) fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy) fph = FunctionalGroups.CreateMolFingerprint(mh, hierarchy) if fp != fph: print fp.ToBitString() print fph.ToBitString() self.failUnlessEqual(fp, fph)
def RetrieveFunctionalGroupsInfo(): """Retrieve functional groups information""" MiscUtil.PrintInfo( "\nRetrieving data from default RDKit functional groups hierarchy file Functional_Group_Hierarchy.txt..." ) FunctionalGroupNamesFile = OptionsInfo["GroupNamesFile"] FunctionalGroupsNodes = FunctionalGroups.BuildFuncGroupHierarchy( FunctionalGroupNamesFile) FunctionalGroupsMap['Names'] = [] FunctionalGroupsMap['SMARTSPattern'] = {} RetrieveDataFromFunctionalGroupsHierarchy(FunctionalGroupsNodes) if not len(FunctionalGroupsMap['Names']): MiscUtil.PrintError( "Failed to retrieve any functional group names and SMARTS patterns..." ) MiscUtil.PrintInfo( "Total number of functional groups present functional group hierarchy: %d" % (len(FunctionalGroupsMap['Names'])))
from rdkit.Chem import Draw from rdkit.Chem import FunctionalGroups from time import time import csv start = time() # open target csv file csvfile = open("fg_smils.csv", 'a') header = ["pubchemId","functionalGroup", "SMILES"] writer = csv.writer(csvfile) writer.writerow(header) smilesfile = "CID-SMILES" fgs = FunctionalGroups.BuildFuncGroupHierarchy() #for filename in filenames: suppl = Chem.SmilesMolSupplier(smilesfile, delimiter=",", smilesColumn=1, nameColumn=0, titleLine=False ) mols = [x for x in suppl if x is not None] del suppl print("processing %s with %d valid compounds" % (filename, (len(mols)))) #Draw.MolsToGridImage(mols[:20], molsPerRow=4, legends=[x.GetProp('_Name') for x in mols]) '''
def test1Basics(self): txt = """ AcidChloride\tC(=O)Cl\tAcid Chloride AcidChloride.Benzoyl\tC(=O)(Cl)c1ccccc1\tBenzoyl Amine\tN\tAmine Amine.Primary\t[N;H2]\tPrimary Amine.Primary.Aromatic\t[N;H2][a]\tPrimary Aromatic Amine.Aromatic\tN[a]\tAromatic """ hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.assertTrue(hierarchy) self.assertEqual(len(hierarchy), 2) self.assertEqual(len(hierarchy[0]), 2) self.assertEqual(len(hierarchy[1]), 4) self.assertEqual(hierarchy[0].name, 'Acid Chloride') self.assertEqual(hierarchy[0].children[0].name, 'Benzoyl') self.assertEqual(hierarchy[0].label, 'AcidChloride') self.assertEqual(hierarchy[0].rxnSmarts, '') m = Chem.MolFromSmiles('ClC(=O)CCCNc1ccccc1') fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy) self.assertEqual(fp, [1, 0, 1, 0, 0, 1]) m = Chem.MolFromSmiles('OC(=O)CCC') fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy) self.assertEqual(fp, [0, 0, 0, 0, 0, 0]) # make sure we get the same hierarchy on the second call: hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.assertTrue(hierarchy) self.assertEqual(len(hierarchy), 2) self.assertEqual(len(hierarchy[0]), 2) self.assertEqual(len(hierarchy[1]), 4) self.assertEqual(hierarchy[0].name, 'Acid Chloride') self.assertEqual(hierarchy[0].children[0].name, 'Benzoyl') self.assertEqual(hierarchy[0].label, 'AcidChloride') self.assertEqual(hierarchy[0].rxnSmarts, '') # if we edit this hierarchy it doesn't affect the global one: hierarchy.pop(0) self.assertEqual(len(hierarchy[0]), 4) hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.assertTrue(hierarchy) self.assertEqual(len(hierarchy), 2) self.assertEqual(len(hierarchy[0]), 2) self.assertEqual(len(hierarchy[1]), 4) self.assertEqual(hierarchy[0].name, 'Acid Chloride') self.assertEqual(hierarchy[0].children[0].name, 'Benzoyl') self.assertEqual(hierarchy[0].label, 'AcidChloride') self.assertEqual(hierarchy[0].rxnSmarts, '') # and if we edit the global one and don't force, we get the edited one: FunctionalGroups.hierarchy.pop(0) self.assertEqual(len(FunctionalGroups.hierarchy[0]), 4) hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt) self.assertTrue(hierarchy) self.assertEqual(len(hierarchy), 1) self.assertEqual(len(hierarchy[0]), 4) # but a force gets us back: hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt, force=True) self.assertEqual(len(hierarchy), 2) self.assertEqual(len(hierarchy[0]), 2) self.assertEqual(len(hierarchy[1]), 4)