def test4Hs(self):
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy()

    inName = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'NCI_5K_TPSA.csv')
    with open(inName, 'r') as inF:
      ms = [Chem.MolFromSmiles(x.split(',')[0]) for x in inF if x[0] != '#']
    for m in ms:
      mh = Chem.AddHs(m)
      fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy)
      fph = FunctionalGroups.CreateMolFingerprint(mh, hierarchy)
      if fp != fph:
        print(Chem.MolToSmiles(m))
        print(fp.ToBitString())
        print(fph.ToBitString())
      self.assertEqual(fp, fph)
    def test4Hs(self):
        hierarchy = FunctionalGroups.BuildFuncGroupHierarchy()

        inName = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                              'NCI_5K_TPSA.csv')
        inF = open(inName, 'r')
        lines = inF.readlines()
        ms = [
            Chem.MolFromSmiles(x.split(',')[0]) for x in lines if x[0] != '#'
        ]
        for m in ms:
            mh = Chem.AddHs(m)
            fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy)
            fph = FunctionalGroups.CreateMolFingerprint(mh, hierarchy)
            if fp != fph:
                print fp.ToBitString()
                print fph.ToBitString()
            self.failUnlessEqual(fp, fph)
 def test3Reactions(self):
   txt = """BoronicAcid\t[$(B-!@[#6])](O)(O)\tBoronic Acid\t[#6:1][B:2]([O:3])[O:4]>>[#6:1].[B:2]([O:3])[O:4]
 BoronicAcid.Aromatic\t[$(B-!@c)](O)(O)\tAromatic\t[c:1][B:2]([O:3])[O:4]>>[c:1].[B:2]([O:3])[O:4]
 BoronicAcid.Aliphatic\t[$(B-!@C)](O)(O)\tAliphatic\t[C:1][B:2]([O:3])[O:4]>>[C:1].[B:2]([O:3])[O:4]
 """
   hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
   self.assertTrue(hierarchy)
   self.assertEqual(len(hierarchy), 1)
   self.assertEqual(len(hierarchy[0].children), 2)
   self.assertNotEqual(hierarchy[0].rxnSmarts, '')
   self.assertNotEqual(hierarchy[0].children[0].rxnSmarts, '')
   def test3Reactions(self):
       txt = """BoronicAcid\t[$(B-!@[#6])](O)(O)\tBoronic Acid\t[#6:1][B:2]([O:3])[O:4]>>[#6:1].[B:2]([O:3])[O:4]
 BoronicAcid.Aromatic\t[$(B-!@c)](O)(O)\tAromatic\t[c:1][B:2]([O:3])[O:4]>>[c:1].[B:2]([O:3])[O:4]
 BoronicAcid.Aliphatic\t[$(B-!@C)](O)(O)\tAliphatic\t[C:1][B:2]([O:3])[O:4]>>[C:1].[B:2]([O:3])[O:4]
 """
       hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
       self.failUnless(hierarchy)
       self.failUnless(len(hierarchy) == 1)
       self.failUnless(len(hierarchy[0].children) == 2)
       self.failUnless(hierarchy[0].rxnSmarts != '')
       self.failUnless(hierarchy[0].children[0].rxnSmarts != '')
  def test2Comments(self):
    txt = """
AcidChloride\tC(=O)Cl\tAcid Chloride
  AcidChloride.Benzoyl\tC(=O)(Cl)c1ccccc1\tBenzoyl
Amine\tN\tAmine
  Amine.Primary\t[N;H2]\tPrimary
    //Amine.Primary.Aromatic\t[N;H2][a]\tPrimary Aromatic
  Amine.Aromatic\tN[a]\tAromatic
"""
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
    self.assertTrue(hierarchy)
    self.assertEqual(len(hierarchy), 2)
    self.assertEqual(len(hierarchy[0]), 2)
    self.assertEqual(len(hierarchy[1]), 3)
Beispiel #6
0
def RetrieveFunctionalGroupsInfo():
    """Retrieve functional groups information"""

    MiscUtil.PrintInfo(
        "\nRetrieving data from default RDKit functional groups hierarchy file Functional_Group_Hierarchy.txt..."
    )

    FunctionalGroupNamesFile = OptionsInfo["GroupNamesFile"]
    FunctionalGroupsNodes = FunctionalGroups.BuildFuncGroupHierarchy(
        FunctionalGroupNamesFile)

    FunctionalGroupsMap['Names'] = []
    FunctionalGroupsMap['SMARTSPattern'] = {}

    RetrieveDataFromFunctionalGroupsHierarchy(FunctionalGroupsNodes)

    if not len(FunctionalGroupsMap['Names']):
        MiscUtil.PrintError(
            "Failed to retrieve any functional group names and SMARTS patterns..."
        )

    MiscUtil.PrintInfo(
        "Total number of functional groups present functional group hierarchy: %d"
        % (len(FunctionalGroupsMap['Names'])))
from rdkit.Chem import Draw
from rdkit.Chem import FunctionalGroups

from time import time
import csv

start = time()
# open target csv file
csvfile = open("fg_smils.csv", 'a')
header = ["pubchemId","functionalGroup", "SMILES"]
writer = csv.writer(csvfile)
writer.writerow(header)

smilesfile = "CID-SMILES"

fgs = FunctionalGroups.BuildFuncGroupHierarchy()


#for filename in filenames:
    
suppl = Chem.SmilesMolSupplier(smilesfile, delimiter=",",
                               smilesColumn=1, nameColumn=0, titleLine=False )

mols = [x for x in suppl if x is not None]
del suppl

print("processing %s with %d valid compounds" % (filename, (len(mols))))

#Draw.MolsToGridImage(mols[:20], molsPerRow=4, legends=[x.GetProp('_Name') for x in mols])

'''
  def test1Basics(self):
    txt = """
AcidChloride\tC(=O)Cl\tAcid Chloride
  AcidChloride.Benzoyl\tC(=O)(Cl)c1ccccc1\tBenzoyl
Amine\tN\tAmine
  Amine.Primary\t[N;H2]\tPrimary
    Amine.Primary.Aromatic\t[N;H2][a]\tPrimary Aromatic
  Amine.Aromatic\tN[a]\tAromatic
"""
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
    self.assertTrue(hierarchy)
    self.assertEqual(len(hierarchy), 2)
    self.assertEqual(len(hierarchy[0]), 2)
    self.assertEqual(len(hierarchy[1]), 4)
    self.assertEqual(hierarchy[0].name, 'Acid Chloride')
    self.assertEqual(hierarchy[0].children[0].name, 'Benzoyl')
    self.assertEqual(hierarchy[0].label, 'AcidChloride')
    self.assertEqual(hierarchy[0].rxnSmarts, '')
    m = Chem.MolFromSmiles('ClC(=O)CCCNc1ccccc1')
    fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy)
    self.assertEqual(fp, [1, 0, 1, 0, 0, 1])

    m = Chem.MolFromSmiles('OC(=O)CCC')
    fp = FunctionalGroups.CreateMolFingerprint(m, hierarchy)
    self.assertEqual(fp, [0, 0, 0, 0, 0, 0])

    # make sure we get the same hierarchy on the second call:
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
    self.assertTrue(hierarchy)
    self.assertEqual(len(hierarchy), 2)
    self.assertEqual(len(hierarchy[0]), 2)
    self.assertEqual(len(hierarchy[1]), 4)
    self.assertEqual(hierarchy[0].name, 'Acid Chloride')
    self.assertEqual(hierarchy[0].children[0].name, 'Benzoyl')
    self.assertEqual(hierarchy[0].label, 'AcidChloride')
    self.assertEqual(hierarchy[0].rxnSmarts, '')

    # if we edit this hierarchy it doesn't affect the global one:
    hierarchy.pop(0)
    self.assertEqual(len(hierarchy[0]), 4)
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
    self.assertTrue(hierarchy)
    self.assertEqual(len(hierarchy), 2)
    self.assertEqual(len(hierarchy[0]), 2)
    self.assertEqual(len(hierarchy[1]), 4)
    self.assertEqual(hierarchy[0].name, 'Acid Chloride')
    self.assertEqual(hierarchy[0].children[0].name, 'Benzoyl')
    self.assertEqual(hierarchy[0].label, 'AcidChloride')
    self.assertEqual(hierarchy[0].rxnSmarts, '')

    # and if we edit the global one and don't force, we get the edited one:
    FunctionalGroups.hierarchy.pop(0)
    self.assertEqual(len(FunctionalGroups.hierarchy[0]), 4)
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt)
    self.assertTrue(hierarchy)
    self.assertEqual(len(hierarchy), 1)
    self.assertEqual(len(hierarchy[0]), 4)

    # but a force gets us back:
    hierarchy = FunctionalGroups.BuildFuncGroupHierarchy(data=txt, force=True)
    self.assertEqual(len(hierarchy), 2)
    self.assertEqual(len(hierarchy[0]), 2)
    self.assertEqual(len(hierarchy[1]), 4)