Ejemplo n.º 1
0
 def test3FPgenerator(self):
     smiLines = open(self.smiName, 'r').readlines()
     fparams = FragmentCatalog.FragCatParams(1, 6, self.fName)
     fcat = FragmentCatalog.FragCatalog(fparams)
     fgen = FragmentCatalog.FragCatGenerator()
     suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
     smiles = []
     for mol in suppl:
         nent = fgen.AddFragsFromMol(mol, fcat)
         smiles.append(Chem.MolToSmiles(mol))
     assert fcat.GetNumEntries() == 21
     assert fcat.GetFPLength() == 21, fcat.GetFPLength()
     fpgen = FragmentCatalog.FragFPGenerator()
     obits = [3, 2, 3, 3, 2, 3, 5, 5, 5, 4, 5, 6]
     obls = [(0, 1, 2), (1, 3), (1, 4, 5), (1, 6, 7), (0, 8), (0, 6, 9),
             (0, 1, 2, 3, 10), (0, 1, 2, 8, 11), (1, 3, 4, 5, 12),
             (1, 4, 5, 13), (1, 3, 6, 7, 14), (0, 1, 6, 7, 9, 15)]
     for i in range(len(smiles)):
         smi = smiles[i]
         mol = Chem.MolFromSmiles(smi)
         fp = fpgen.GetFPForMol(mol, fcat)
         if i < len(obits):
             assert fp.GetNumOnBits() == obits[i], '%s: %s' % (
                 smi, str(fp.GetOnBits()))
         obl = fp.GetOnBits()
         if i < len(obls):
             assert tuple(obl) == obls[i], '%s: %s' % (smi, obl)
Ejemplo n.º 2
0
 def test4Serialize(self):
     smiLines = open(self.smiName, 'r').readlines()
     fparams = FragmentCatalog.FragCatParams(1, 6, self.fName)
     fcat = FragmentCatalog.FragCatalog(fparams)
     fgen = FragmentCatalog.FragCatGenerator()
     suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
     smiles = []
     for mol in suppl:
         nent = fgen.AddFragsFromMol(mol, fcat)
         smiles.append(Chem.MolToSmiles(mol))
     assert fcat.GetNumEntries() == 21
     assert fcat.GetFPLength() == 21, fcat.GetFPLength()
     pkl = cPickle.dumps(fcat)
     fcat2 = cPickle.loads(pkl)
     assert fcat2.GetNumEntries() == 21
     assert fcat2.GetFPLength() == 21, fcat2.GetFPLength()
     fpgen = FragmentCatalog.FragFPGenerator()
     for i in range(len(smiles)):
         smi = smiles[i]
         mol = Chem.MolFromSmiles(smi)
         fp1 = fpgen.GetFPForMol(mol, fcat)
         fp2 = fpgen.GetFPForMol(mol, fcat2)
         assert fp1.GetNumOnBits() == fp2.GetNumOnBits()
         obl1 = fp1.GetOnBits()
         obl2 = fp2.GetOnBits()
         assert tuple(obl1) == tuple(obl2)
Ejemplo n.º 3
0
  def setUp(self):
    self.smiList = ["S(SC1=NC2=CC=CC=C2S1)C3=NC4=C(S3)C=CC=C4", "CC1=CC(=O)C=CC1=O",
                    "OC1=C(Cl)C=C(C=C1[N+]([O-])=O)[N+]([O-])=O", "[O-][N+](=O)C1=CNC(=N)S1",
                    "NC1=CC2=C(C=C1)C(=O)C3=C(C=CC=C3)C2=O",
                    "OC(=O)C1=C(C=CC=C1)C2=C3C=CC(=O)C(=C3OC4=C2C=CC(=C4Br)O)Br",
                    "CN(C)C1=C(Cl)C(=O)C2=C(C=CC=C2)C1=O",
                    "CC1=C(C2=C(C=C1)C(=O)C3=CC=CC=C3C2=O)[N+]([O-])=O", "CC(=NO)C(C)=NO"]
    self.smiList2 = ['OCCC',
                     'CCC',
                     'C=CC',
                     'OC=CC',
                     'CC(O)C',
                     'C=C(O)C',
                     'OCCCC',
                     'CC(O)CC',
                     'C=CCC',
                     'CC=CC',
                     'OC=CCC',
                     'CC=C(O)C',
                     'OCC=CC',
                     'C=C(O)CC',
                     'C=CC(O)C',
                     'C=CCCO', ]
    self.list2Acts = [1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1]
    self.list2Obls = [(0, 1, 2), (1, 3), (1, 4, 5), (1, 6, 7), (0, 8), (0, 6, 9), (0, 1, 2, 3, 10),
                      (0, 1, 2, 8, 11), (1, 3, 4, 5, 12), (1, 4, 5, 13), (1, 3, 6, 7, 14),
                      (0, 1, 6, 7, 9, 15)]

    ffile = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
    self.catParams = FragmentCatalog.FragCatParams(1, 6, ffile)
    self.fragCat = FragmentCatalog.FragCatalog(self.catParams)
    self.fgen = FragmentCatalog.FragCatGenerator()
Ejemplo n.º 4
0
def calculate_fragments(smiles):
    """
    Objective: Create fragments and import them into Neo4j based on our ontology
    Intent: This script is based on Adam's "mol_frag.ipynb" file in his deepml branch, which is based on rdkit's
            https://www.rdkit.org/docs/GettingStartedInPython.html. I still need some council on this one since we can
            tune how much fragment this script can generate for one SMILES. Also, everything (line 69 to 77)
            needs to be under a for loop or else it will break (as in not generating the correct amount of fragments,
            usually much less than the actual amount). I'm not sure why
    :param smiles:
    :return:
    """
    fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
    fparams = FragmentCatalog.FragCatParams(
        0, 4, fName)  # I need more research and tuning on this one
    fcat = FragmentCatalog.FragCatalog(
        fparams)  # The fragments are stored as entries
    fcgen = FragmentCatalog.FragCatGenerator()
    mol = MolFromSmiles(smiles)
    fcount = fcgen.AddFragsFromMol(mol, fcat)
    # print("This SMILES, %s, has %d fragments" % (smiles, fcount))
    frag_list = []
    for frag in range(fcount):
        frag_list.append(
            fcat.GetEntryDescription(frag))  # List of molecular fragments
    return frag_list
Ejemplo n.º 5
0
def generate_geneset():
    atoms = [6, 7, 8, 9, 5, 15, 16, 17]
    fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
    rdkitFrags = FragmentCatalog.FragCatParams(1, 5, fName)
    customFrags = FragmentCatalog.FragCatalog(rdkitFrags)
    fcgen = FragmentCatalog.FragCatGenerator()
    m = Chem.MolFromSmiles('CCCC')
    fcgen.AddFragsFromMol(m, customFrags)
    return GeneSet(atoms, rdkitFrags, customFrags)
Ejemplo n.º 6
0
 def test6DownEntries(self):
     fparams = FragmentCatalog.FragCatParams(1, 6, self.fName, 1.0e-8)
     fcat = FragmentCatalog.FragCatalog(fparams)
     fgen = FragmentCatalog.FragCatGenerator()
     suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
     for mol in suppl:
         nent = fgen.AddFragsFromMol(mol, fcat)
     assert fcat.GetNumEntries() == 21
     assert fcat.GetFPLength() == 21
     assert tuple(fcat.GetEntryDownIds(0)) == (2, 8, 9, 16)
     assert tuple(fcat.GetEntryDownIds(1)) == (2, 3, 5, 7)
Ejemplo n.º 7
0
 def test8Issue118(self):
   smiList = ['CCN(C(N)=O)N=O']
   fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
   suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList), ',', 0, -1, 0)
   fparams = FragmentCatalog.FragCatParams(2, 4, fName, 1.0e-8)
   cat = FragmentCatalog.FragCatalog(fparams)
   fgen = FragmentCatalog.FragCatGenerator()
   for mol in suppl:
     nent = fgen.AddFragsFromMol(mol, cat)
   self.assertEqual(cat.GetFPLength(), 1)
   self.assertEqual(cat.GetBitDescription(0), 'CCN(<-C(=O)N>)<-N=O>')
Ejemplo n.º 8
0
def BuildCatalog(suppl,
                 maxPts=-1,
                 groupFileName=None,
                 minPath=2,
                 maxPath=6,
                 reportFreq=10):
    """ builds a fragment catalog from a set of molecules in a delimited text block

    **Arguments**

      - suppl: a mol supplier

      - maxPts: (optional) if provided, this will set an upper bound on the
        number of points to be considered

      - groupFileName: (optional) name of the file containing functional group
        information

      - minPath, maxPath: (optional) names of the minimum and maximum path lengths
        to be considered

      - reportFreq: (optional) how often to display status information  

    **Returns**

      a FragmentCatalog
      
  """
    if groupFileName is None:
        groupFileName = os.path.join(RDConfig.RDDataDir,
                                     "FunctionalGroups.txt")

    fpParams = FragmentCatalog.FragCatParams(minPath, maxPath, groupFileName)
    catalog = FragmentCatalog.FragCatalog(fpParams)
    fgen = FragmentCatalog.FragCatGenerator()
    if maxPts > 0:
        nPts = maxPts
    else:
        if hasattr(suppl, '__len__'):
            nPts = len(suppl)
        else:
            nPts = -1
    for i, mol in enumerate(suppl):
        if i == nPts:
            break
        if i and not i % reportFreq:
            if nPts > -1:
                message('Done %d of %d, %d paths\n' %
                        (i, nPts, catalog.GetFPLength()))
            else:
                message('Done %d, %d paths\n' % (i, catalog.GetFPLength()))
        fgen.AddFragsFromMol(mol, catalog)
    return catalog
Ejemplo n.º 9
0
 def test5FPsize(self):
     smiLines = open(self.smiName, 'r').readlines()
     fparams = FragmentCatalog.FragCatParams(6, 6, self.fName)
     fcat = FragmentCatalog.FragCatalog(fparams)
     fgen = FragmentCatalog.FragCatGenerator()
     suppl = [Chem.MolFromSmiles('C1CCCOC1O')]
     for mol in suppl:
         nent = fgen.AddFragsFromMol(mol, fcat)
     assert fcat.GetFPLength() == 1
     for i in range(fcat.GetFPLength()):
         assert fcat.GetBitOrder(i) == 6
         assert fcat.GetBitDescription(
             i) == "C1CCOC<-O>C1", fcat.GetBitDescription(i)
         assert tuple(fcat.GetBitFuncGroupIds(i)) == (1, )
Ejemplo n.º 10
0
 def test2Generator(self):
   fparams = FragmentCatalog.FragCatParams(1, 6, self.fName, 1.0e-8)
   fcat = FragmentCatalog.FragCatalog(fparams)
   fgen = FragmentCatalog.FragCatGenerator()
   suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
   for mol in suppl:
     nent = fgen.AddFragsFromMol(mol, fcat)
   self.assertEqual(fcat.GetNumEntries(), 21)
   self.assertEqual(fcat.GetFPLength(), 21)
   for id in range(fcat.GetNumEntries()):
     self.assertEqual(fcat.GetEntryBitId(id), id)
     self.assertEqual(fcat.GetEntryOrder(id), fcat.GetBitOrder(id))
     self.assertEqual(fcat.GetEntryDescription(id), fcat.GetBitDescription(id))
     self.assertEqual(tuple(fcat.GetEntryFuncGroupIds(id)), tuple(fcat.GetBitFuncGroupIds(id)))
Ejemplo n.º 11
0
 def test5FPsize(self):
   with open(self.smiName, 'r') as smiF:
     smiLines = smiF.readlines()
   fparams = FragmentCatalog.FragCatParams(6, 6, self.fName)
   fcat = FragmentCatalog.FragCatalog(fparams)
   fgen = FragmentCatalog.FragCatGenerator()
   suppl = [Chem.MolFromSmiles('C1CCCOC1O')]
   for mol in suppl:
     nent = fgen.AddFragsFromMol(mol, fcat)
   self.assertEqual(fcat.GetFPLength(), 1)
   for i in range(fcat.GetFPLength()):
     self.assertEqual(fcat.GetBitOrder(i), 6)
     self.assertEqual(fcat.GetBitDescription(i), "C1CC<-O>OCC1")
     self.assertEqual(tuple(fcat.GetBitFuncGroupIds(i)), (1, ))
Ejemplo n.º 12
0
  def test7Issue116(self):
    smiList = ['Cc1ccccc1']
    suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList), ',', 0, -1, 0)
    fparams = FragmentCatalog.FragCatParams(2, 2, self.fName, 1.0e-8)
    cat = FragmentCatalog.FragCatalog(fparams)
    fgen = FragmentCatalog.FragCatGenerator()
    for mol in suppl:
      nent = fgen.AddFragsFromMol(mol, cat)
    self.assertEqual(cat.GetFPLength(), 2)
    self.assertEqual(cat.GetBitDescription(0), 'ccC')
    fpgen = FragmentCatalog.FragFPGenerator()
    mol = Chem.MolFromSmiles('Cc1ccccc1')
    fp = fpgen.GetFPForMol(mol, cat)
    self.assertEqual(fp[0], 1)
    self.assertEqual(fp[1], 1)

    mol = Chem.MolFromSmiles('c1ccccc1-c1ccccc1')
    fp = fpgen.GetFPForMol(mol, cat)
    self.assertEqual(fp[0], 0)
    self.assertEqual(fp[1], 1)
Ejemplo n.º 13
0
def generate_geneset():
    """
    Populates the GeneSet class with atoms and fragments to be used
    by the engine. As it stands these are hardcoded into the engine
    but will probably be adapted in future versions

    Parameters
    ----------
    None

    Returns
    ----------
    GeneSet : object
        returns an instance of the GeneSet class containing atoms,
        rdkit fragments, and custom fragments
    """
    atoms = [6, 7, 8, 9, 5, 15, 16, 17]
    fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
    rdkitFrags = FragmentCatalog.FragCatParams(1, 5, fName)
    customFrags = FragmentCatalog.FragCatalog(rdkitFrags)
    fcgen = FragmentCatalog.FragCatGenerator()
    m = Chem.MolFromSmiles('CCCC')
    fcgen.AddFragsFromMol(m, customFrags)
    return GeneSet(atoms, rdkitFrags, customFrags)
Ejemplo n.º 14
0
#!/usr/bin/env python

import os

from rdkit import Chem
from rdkit import RDConfig
from rdkit.Chem import FragmentCatalog

fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
fparams = FragmentCatalog.FragCatParams(1, 6, fName)
print('found %d functional groups in catalog' % (fparams.GetNumFuncGroups()))

fcat = FragmentCatalog.FragCatalog(fparams)
fcgen = FragmentCatalog.FragCatGenerator()

smiles = 'OCC=CC(=O)O'
m = Chem.MolFromSmiles(smiles)
print('examining molecule: ' + smiles)
frag_count = fcgen.AddFragsFromMol(m, fcat)
print('identified %d fragments' % (frag_count))

for m in range(frag_count):
    print(fcat.GetEntryDescription(m))