Ejemplo n.º 1
0
 def test3FPgenerator(self):
     smiLines = open(self.smiName, 'r').readlines()
     fparams = FragmentCatalog.FragCatParams(1, 6, self.fName)
     fcat = FragmentCatalog.FragCatalog(fparams)
     fgen = FragmentCatalog.FragCatGenerator()
     suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
     smiles = []
     for mol in suppl:
         nent = fgen.AddFragsFromMol(mol, fcat)
         smiles.append(Chem.MolToSmiles(mol))
     assert fcat.GetNumEntries() == 21
     assert fcat.GetFPLength() == 21, fcat.GetFPLength()
     fpgen = FragmentCatalog.FragFPGenerator()
     obits = [3, 2, 3, 3, 2, 3, 5, 5, 5, 4, 5, 6]
     obls = [(0, 1, 2), (1, 3), (1, 4, 5), (1, 6, 7), (0, 8), (0, 6, 9),
             (0, 1, 2, 3, 10), (0, 1, 2, 8, 11), (1, 3, 4, 5, 12),
             (1, 4, 5, 13), (1, 3, 6, 7, 14), (0, 1, 6, 7, 9, 15)]
     for i in range(len(smiles)):
         smi = smiles[i]
         mol = Chem.MolFromSmiles(smi)
         fp = fpgen.GetFPForMol(mol, fcat)
         if i < len(obits):
             assert fp.GetNumOnBits() == obits[i], '%s: %s' % (
                 smi, str(fp.GetOnBits()))
         obl = fp.GetOnBits()
         if i < len(obls):
             assert tuple(obl) == obls[i], '%s: %s' % (smi, obl)
Ejemplo n.º 2
0
 def test4Serialize(self):
     smiLines = open(self.smiName, 'r').readlines()
     fparams = FragmentCatalog.FragCatParams(1, 6, self.fName)
     fcat = FragmentCatalog.FragCatalog(fparams)
     fgen = FragmentCatalog.FragCatGenerator()
     suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
     smiles = []
     for mol in suppl:
         nent = fgen.AddFragsFromMol(mol, fcat)
         smiles.append(Chem.MolToSmiles(mol))
     assert fcat.GetNumEntries() == 21
     assert fcat.GetFPLength() == 21, fcat.GetFPLength()
     pkl = cPickle.dumps(fcat)
     fcat2 = cPickle.loads(pkl)
     assert fcat2.GetNumEntries() == 21
     assert fcat2.GetFPLength() == 21, fcat2.GetFPLength()
     fpgen = FragmentCatalog.FragFPGenerator()
     for i in range(len(smiles)):
         smi = smiles[i]
         mol = Chem.MolFromSmiles(smi)
         fp1 = fpgen.GetFPForMol(mol, fcat)
         fp2 = fpgen.GetFPForMol(mol, fcat2)
         assert fp1.GetNumOnBits() == fp2.GetNumOnBits()
         obl1 = fp1.GetOnBits()
         obl2 = fp2.GetOnBits()
         assert tuple(obl1) == tuple(obl2)
Ejemplo n.º 3
0
def ScoreMolecules(suppl, catalog, maxPts=-1, actName='', acts=None, nActs=2, reportFreq=10):
  """ scores the compounds in a supplier using a catalog

    **Arguments**

      - suppl: a mol supplier

      - catalog: the FragmentCatalog

      - maxPts: (optional) the maximum number of molecules to be
        considered

      - actName: (optional) the name of the molecule's activity property.
        If this is not provided, the molecule's last property will be used.

      - acts: (optional) a sequence of activity values (integers).
        If not provided, the activities will be read from the molecules.

      - nActs: (optional) number of possible activity values

      - reportFreq: (optional) how often to display status information

    **Returns**

      a 2-tuple:

        1) the results table (a 3D array of ints nBits x 2 x nActs)

        2) a list containing the on bit lists for each molecule

  """
  nBits = catalog.GetFPLength()
  resTbl = numpy.zeros((nBits, 2, nActs), numpy.int)
  obls = []

  if not actName and not acts:
    actName = suppl[0].GetPropNames()[-1]

  fpgen = FragmentCatalog.FragFPGenerator()
  suppl.reset()
  i = 1
  for mol in suppl:
    if i and not i % reportFreq:
      message('Done %d.\n' % (i))
    if mol:
      if not acts:
        act = int(mol.GetProp(actName))
      else:
        act = acts[i - 1]
      fp = fpgen.GetFPForMol(mol, catalog)
      obls.append([x for x in fp.GetOnBits()])
      for j in range(nBits):
        resTbl[j, 0, act] += 1
      for id_ in obls[i - 1]:
        resTbl[id_ - 1, 0, act] -= 1
        resTbl[id_ - 1, 1, act] += 1
    else:
      obls.append([])
    i += 1
  return resTbl, obls
Ejemplo n.º 4
0
def CalcGains(suppl,catalog,topN=-1,actName='',acts=None,
              nActs=2,reportFreq=10,biasList=None,collectFps=0):
  """ calculates info gains by constructing fingerprints
    *DOC*

    Returns a 2-tuple:
       1) gains matrix
       2) list of fingerprints
    
  """
  nBits = catalog.GetFPLength()
  if topN < 0:
    topN = nBits
  if not actName and not acts:
    actName = suppl[0].GetPropNames()[-1]

  gains = [0]*nBits
  if hasattr(suppl,'__len__'):
    nMols = len(suppl)
  else:
    nMols = -1
  fpgen = FragmentCatalog.FragFPGenerator()
  #ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.ENTROPY)
  if biasList:
    ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.BIASENTROPY)
    ranker.SetBiasList(biasList)
  else:
    ranker = InfoTheory.InfoBitRanker(nBits,nActs,InfoTheory.InfoType.ENTROPY)
  i = 0
  fps = []
  for mol in suppl:
    if not acts:
      try:
        act = int(mol.GetProp(actName))
      except KeyError:
        message('ERROR: Molecule has no property: %s\n'%(actName))
        message('\tAvailable properties are: %s\n'%(str(mol.GetPropNames())))
        raise KeyError(actName)
    else:
      act = acts[i]
    if i and not i%reportFreq:
      if nMols>0:
        message('Done %d of %d.\n'%(i,nMols))
      else:
        message('Done %d.\n'%(i))
    fp = fpgen.GetFPForMol(mol,catalog)
    ranker.AccumulateVotes(fp,act)
    i+=1;
    if collectFps:
      fps.append(fp)
  gains = ranker.GetTopN(topN)
  return gains,fps
Ejemplo n.º 5
0
 def _test5MoreComplex(self):
   lastIdx = 0
   ranges = {}
   suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList), ',', 0, -1, 0)
   for i, mol in enumerate(suppl):
     nEnt = self.fgen.AddFragsFromMol(mol, self.fragCat)
     ranges[i] = range(lastIdx, lastIdx + nEnt)
     lastIdx += nEnt
   # now make sure that those bits are contained in the signatures:
   fpgen = FragmentCatalog.FragFPGenerator()
   for i, mol in enumerate(suppl):
     fp = fpgen.GetFPForMol(mol, self.fragCat)
     for bit in ranges[i]:
       assert fp[bit], '%s: %s' % (Chem.MolToSmiles(mol), str(bit))
Ejemplo n.º 6
0
 def _testBits(self, fragCat):
   fpgen = FragmentCatalog.FragFPGenerator()
   obits = [3, 2, 3, 3, 2, 3, 5, 5, 5, 4, 5, 6]
   obls = self.list2Obls
   suppl = Chem.SmilesMolSupplierFromText('\n'.join(self.smiList2), ',', 0, -1, 0)
   i = 0
   for mol in suppl:
     fp = fpgen.GetFPForMol(mol, fragCat)
     if i < len(obits):
       smi = Chem.MolToSmiles(mol)
       assert fp.GetNumOnBits() == obits[i], '%s: %s' % (smi, str(fp.GetOnBits()))
     obl = fp.GetOnBits()
     if i < len(obls):
       assert tuple(obl) == obls[i], '%s: %s' % (smi, obl)
     i += 1
Ejemplo n.º 7
0
 def test9Issue116(self):
   smiList = ['Cc1ccccc1']
   suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList), ',', 0, -1, 0)
   cat = BuildFragmentCatalog.BuildCatalog(suppl, minPath=2, maxPath=2)
   assert cat.GetFPLength() == 2
   assert cat.GetBitDescription(0) == 'ccC'
   fpgen = FragmentCatalog.FragFPGenerator()
   mol = Chem.MolFromSmiles('Cc1ccccc1')
   fp = fpgen.GetFPForMol(mol, cat)
   assert fp[0]
   assert fp[1]
   mol = Chem.MolFromSmiles('c1ccccc1-c1ccccc1')
   fp = fpgen.GetFPForMol(mol, cat)
   assert not fp[0]
   assert fp[1]
Ejemplo n.º 8
0
  def test7Issue116(self):
    smiList = ['Cc1ccccc1']
    suppl = Chem.SmilesMolSupplierFromText('\n'.join(smiList), ',', 0, -1, 0)
    fparams = FragmentCatalog.FragCatParams(2, 2, self.fName, 1.0e-8)
    cat = FragmentCatalog.FragCatalog(fparams)
    fgen = FragmentCatalog.FragCatGenerator()
    for mol in suppl:
      nent = fgen.AddFragsFromMol(mol, cat)
    self.assertEqual(cat.GetFPLength(), 2)
    self.assertEqual(cat.GetBitDescription(0), 'ccC')
    fpgen = FragmentCatalog.FragFPGenerator()
    mol = Chem.MolFromSmiles('Cc1ccccc1')
    fp = fpgen.GetFPForMol(mol, cat)
    self.assertEqual(fp[0], 1)
    self.assertEqual(fp[1], 1)

    mol = Chem.MolFromSmiles('c1ccccc1-c1ccccc1')
    fp = fpgen.GetFPForMol(mol, cat)
    self.assertEqual(fp[0], 0)
    self.assertEqual(fp[1], 1)
Ejemplo n.º 9
0
ms = [Chem.MolFromSmiles('OCC(NC1CC1)CCC'), Chem.MolFromSmiles('OCC=CC(=O)O')]
# 片段存储器
fcat = FragmentCatalog.FragCatalog(fparams)

# 片段生成器
for m in ms:
    fcgen.AddFragsFromMol(m, fcat)

# 查看分子片段数量
num_entries = fcat.GetNumEntries()
print(num_entries)  # 17

# 存储器收集完所有片段后 , 再用它来生成分子指纹

# 创建一个片段指纹生成器:FragFPGenerator()
fpgen = FragmentCatalog.FragFPGenerator()
# 传入分子和存储器用于生成指纹:GetFPForMol(mol,fcat)
fp1 = fpgen.GetFPForMol(ms[1], fcat)
# 以字符串形式查看指纹:ToBitString()
print(fp1.ToBitString())  # 10000000000000011

# 查看指纹中哪些位是有效的:GetOnBits()
print(list(fp1.GetOnBits()))  # [0, 15, 16]

# 可以用处理一般分子指纹的方法来处理片段分子指纹,例如寻找相同的片段

# 先对分子指纹做“&”位运算,两个指纹结果都为1时,结果为1,否则为0
# 获取两个指纹中都出现的片段:GetOnBits()
# 查看片段信息:GetEnteyDescription()

fp0 = fpgen.GetFPForMol(ms[0], fcat)