Beispiel #1
0
 def Reset(self):
     if not self.__needReset:
         return
     self.__orVect = None
     if not self.__vects:
         return
     ks = list(iterkeys(self.__vects))
     self.__orVect = copy.copy(self.__vects[ks[0]])
     self.__numBits = self.__orVect.GetNumBits()
     for i in range(1, len(ks)):
         self.__orVect |= self.__vects[ks[i]]
     self.__needReset = False
Beispiel #2
0
 def Reset(self):
   if not self.__needReset:
     return
   self.__orVect = None
   if not self.__vects:
     return
   ks = list(iterkeys(self.__vects))
   self.__orVect = copy.copy(self.__vects[ks[0]])
   self.__numBits = self.__orVect.GetNumBits()
   for i in range(1, len(ks)):
     self.__orVect |= self.__vects[ks[i]]
   self.__needReset = False
Beispiel #3
0
#
from __future__ import print_function
from rdkit.six.moves import cPickle
from rdkit.six import iterkeys
from rdkit import DataStructs,Chem
from rdkit import Chem

similarityMethods={'RDK':DataStructs.ExplicitBitVect,
                   'AtomPairs':DataStructs.IntSparseIntVect,
                   'TopologicalTorsions':DataStructs.LongSparseIntVect,
                   'Pharm2D':DataStructs.SparseBitVect,
                   'Gobbi2D':DataStructs.SparseBitVect,
                   'Morgan':DataStructs.UIntSparseIntVect,
                   'Avalon':DataStructs.ExplicitBitVect,
                   }
supportedSimilarityMethods=list(iterkeys(similarityMethods))


class LayeredOptions:
  loadLayerFlags=0xFFFFFFFF
  searchLayerFlags=0x7
  minPath=1
  maxPath=6
  fpSize=1024
  wordSize=32
  nWords=fpSize//wordSize
  @staticmethod
  def GetFingerprint(mol,query=True):
    if query:
      flags=LayeredOptions.searchLayerFlags
    else:
Beispiel #4
0
def RecapDecompose(mol,
                   allNodes=None,
                   minFragmentSize=0,
                   onlyUseReactions=None):
    """ returns the recap decomposition for a molecule """
    mSmi = Chem.MolToSmiles(mol, 1)

    if allNodes is None:
        allNodes = {}
    if mSmi in allNodes:
        return allNodes[mSmi]

    res = RecapHierarchyNode(mol)
    res.smiles = mSmi
    activePool = {mSmi: res}
    allNodes[mSmi] = res
    while activePool:
        nSmi = next(iterkeys(activePool))
        node = activePool.pop(nSmi)
        if not node.mol: continue
        for rxnIdx, reaction in enumerate(reactions):
            if onlyUseReactions and rxnIdx not in onlyUseReactions:
                continue
            #print '  .',nSmi
            #print '         !!!!',rxnIdx,nSmi,reactionDefs[rxnIdx]
            ps = reaction.RunReactants((node.mol, ))
            #print '    ',len(ps)
            if ps:
                for prodSeq in ps:
                    seqOk = True
                    # we want to disqualify small fragments, so sort the product sequence by size
                    # and then look for "forbidden" fragments
                    tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx)
                            for idx, prod in enumerate(prodSeq)]
                    tSeq.sort()
                    ts = [(x, prodSeq[y]) for x, y in tSeq]
                    prodSeq = ts
                    for nats, prod in prodSeq:
                        try:
                            Chem.SanitizeMol(prod)
                        except:
                            continue
                        pSmi = Chem.MolToSmiles(prod, 1)
                        if minFragmentSize > 0:
                            nDummies = pSmi.count('*')
                            if nats - nDummies < minFragmentSize:
                                seqOk = False
                                break
                        # don't forget after replacing dummy atoms to remove any empty
                        # branches:
                        elif pSmi.replace('[*]',
                                          '').replace('()',
                                                      '') in ('', 'C', 'CC',
                                                              'CCC'):
                            seqOk = False
                            break
                        prod.pSmi = pSmi
                    if seqOk:
                        for nats, prod in prodSeq:
                            pSmi = prod.pSmi
                            #print '\t',nats,pSmi
                            if not pSmi in allNodes:
                                pNode = RecapHierarchyNode(prod)
                                pNode.smiles = pSmi
                                pNode.parents[nSmi] = weakref.proxy(node)
                                node.children[pSmi] = pNode
                                activePool[pSmi] = pNode
                                allNodes[pSmi] = pNode
                            else:
                                pNode = allNodes[pSmi]
                                pNode.parents[nSmi] = weakref.proxy(node)
                                node.children[pSmi] = pNode
                        #print '                >>an:',allNodes.keys()
    return res
Beispiel #5
0
def BRICSDecompose(mol,allNodes=None,minFragmentSize=1,onlyUseReactions=None,
                   silent=True,keepNonLeafNodes=False,singlePass=False,returnMols=False):
  """ returns the BRICS decomposition for a molecule

  >>> from rdkit import Chem
  >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
  >>> res = list(BRICSDecompose(m))
  >>> sorted(res)
  ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']

  >>> res = list(BRICSDecompose(m,returnMols=True))
  >>> res[0]
  <rdkit.Chem.rdchem.Mol object ...>
  >>> smis = [Chem.MolToSmiles(x,True) for x in res]
  >>> sorted(smis)
  ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']

  nexavar, an example from the paper (corrected):
  >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
  >>> res = list(BRICSDecompose(m))
  >>> sorted(res)
  ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F']

  it's also possible to keep pieces that haven't been fully decomposed:
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
  >>> sorted(res)
  ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC']

  >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
  >>> sorted(res)
  ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC']

  or to only do a single pass of decomposition:
  >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
  >>> res = list(BRICSDecompose(m,singlePass=True))
  >>> sorted(res)
  ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC']

  setting a minimum size for the fragments:
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2))
  >>> sorted(res)
  ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3))
  >>> sorted(res)
  ['CCCOCC', '[3*]OCC', '[4*]CCC']
  >>> res = list(BRICSDecompose(m,minFragmentSize=2))
  >>> sorted(res)
  ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']


  """
  global reactions
  mSmi = Chem.MolToSmiles(mol,1)
  
  if allNodes is None:
    allNodes=set()

  if mSmi in allNodes:
    return set()

  activePool={mSmi:mol}
  allNodes.add(mSmi)
  foundMols={mSmi:mol}
  for gpIdx,reactionGp in enumerate(reactions):
    newPool = {}
    while activePool:
      matched=False
      nSmi = next(iterkeys(activePool))
      mol = activePool.pop(nSmi)
      for rxnIdx,reaction in enumerate(reactionGp):
        if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions:
          continue
        if not silent:
          print('--------')
          print(smartsGps[gpIdx][rxnIdx])
        ps = reaction.RunReactants((mol,))
        if ps:
          if not silent: print(nSmi,'->',len(ps),'products')
          for prodSeq in ps:
            seqOk=True
            # we want to disqualify small fragments, so sort the product sequence by size
            tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)]
            tSeq.sort()
            for nats,idx in tSeq:
              prod = prodSeq[idx]
              try:
                Chem.SanitizeMol(prod)
              except:
                continue
              pSmi = Chem.MolToSmiles(prod,1)
              if minFragmentSize>0:
                nDummies = pSmi.count('*')
                if nats-nDummies<minFragmentSize:
                  seqOk=False
                  break
              prod.pSmi = pSmi
            ts = [(x,prodSeq[y]) for x,y in tSeq]
            prodSeq=ts
            if seqOk:
              matched=True
              for nats,prod in prodSeq:
                pSmi = prod.pSmi
                #print('\t',nats,pSmi)
                if pSmi not in allNodes:
                  if not singlePass:
                    activePool[pSmi] = prod
                  allNodes.add(pSmi)
                  foundMols[pSmi]=prod
      if singlePass or keepNonLeafNodes or not matched:
        newPool[nSmi]=mol
    activePool = newPool
  if not (singlePass or keepNonLeafNodes):
    if not returnMols:
      res = set(activePool.keys())
    else:
      res = activePool.values()
  else:
    if not returnMols:
      res = allNodes
    else:
      res = foundMols.values()
  return res
Beispiel #6
0
#
from __future__ import print_function
from rdkit.six.moves import cPickle
from rdkit.six import iterkeys
from rdkit import DataStructs, Chem
from rdkit import Chem

similarityMethods = {
    'RDK': DataStructs.ExplicitBitVect,
    'AtomPairs': DataStructs.IntSparseIntVect,
    'TopologicalTorsions': DataStructs.LongSparseIntVect,
    'Pharm2D': DataStructs.SparseBitVect,
    'Gobbi2D': DataStructs.SparseBitVect,
    'Morgan': DataStructs.UIntSparseIntVect
}
supportedSimilarityMethods = list(iterkeys(similarityMethods))


class LayeredOptions:
    loadLayerFlags = 0xFFFFFFFF
    searchLayerFlags = 0x7
    minPath = 1
    maxPath = 6
    fpSize = 1024
    wordSize = 32
    nWords = fpSize // wordSize

    @staticmethod
    def GetFingerprint(mol, query=True):
        if query:
            flags = LayeredOptions.searchLayerFlags
Beispiel #7
0
def RecapDecompose(mol,allNodes=None,minFragmentSize=0,onlyUseReactions=None):
  """ returns the recap decomposition for a molecule """
  mSmi = Chem.MolToSmiles(mol,1)

  if allNodes is None:
    allNodes={}
  if mSmi in allNodes:
    return allNodes[mSmi]

  res = RecapHierarchyNode(mol)
  res.smiles =mSmi
  activePool={mSmi:res}
  allNodes[mSmi]=res
  while activePool:
    nSmi = next(iterkeys(activePool))
    node = activePool.pop(nSmi)
    if not node.mol: continue
    for rxnIdx,reaction in enumerate(reactions):
      if onlyUseReactions and rxnIdx not in onlyUseReactions:
        continue
      #print '  .',nSmi
      #print '         !!!!',rxnIdx,nSmi,reactionDefs[rxnIdx]
      ps = reaction.RunReactants((node.mol,))
      #print '    ',len(ps)
      if ps:
        for prodSeq in ps:
          seqOk=True
          # we want to disqualify small fragments, so sort the product sequence by size
          # and then look for "forbidden" fragments
          tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)]
          tSeq.sort()
          ts=[(x,prodSeq[y]) for x,y in tSeq]
          prodSeq=ts
          for nats,prod in prodSeq:
            try:
              Chem.SanitizeMol(prod)
            except:
              continue
            pSmi = Chem.MolToSmiles(prod,1)
            if minFragmentSize>0:
              nDummies = pSmi.count('*')
              if nats-nDummies<minFragmentSize:
                seqOk=False
                break
            # don't forget after replacing dummy atoms to remove any empty
            # branches:
            elif pSmi.replace('[*]','').replace('()','') in ('','C','CC','CCC'):
              seqOk=False
              break
            prod.pSmi = pSmi
          if seqOk:
            for nats,prod in prodSeq:
              pSmi = prod.pSmi
              #print '\t',nats,pSmi
              if not pSmi in allNodes:
                pNode = RecapHierarchyNode(prod)
                pNode.smiles=pSmi
                pNode.parents[nSmi]=weakref.proxy(node)
                node.children[pSmi]=pNode
                activePool[pSmi] = pNode
                allNodes[pSmi]=pNode
              else:
                pNode=allNodes[pSmi]
                pNode.parents[nSmi]=weakref.proxy(node)
                node.children[pSmi]=pNode
            #print '                >>an:',allNodes.keys()
  return res
Beispiel #8
0
def BRICSDecompose(mol,
                   allNodes=None,
                   minFragmentSize=1,
                   onlyUseReactions=None,
                   silent=True,
                   keepNonLeafNodes=False,
                   singlePass=False,
                   returnMols=False):
    """ returns the BRICS decomposition for a molecule

  >>> from rdkit import Chem
  >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
  >>> res = list(BRICSDecompose(m))
  >>> sorted(res)
  ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']

  >>> res = list(BRICSDecompose(m,returnMols=True))
  >>> res[0]
  <rdkit.Chem.rdchem.Mol object ...>
  >>> smis = [Chem.MolToSmiles(x,True) for x in res]
  >>> sorted(smis)
  ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']

  nexavar, an example from the paper (corrected):
  >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
  >>> res = list(BRICSDecompose(m))
  >>> sorted(res)
  ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F']

  it's also possible to keep pieces that haven't been fully decomposed:
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
  >>> sorted(res)
  ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC']

  >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
  >>> sorted(res)
  ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC']

  or to only do a single pass of decomposition:
  >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
  >>> res = list(BRICSDecompose(m,singlePass=True))
  >>> sorted(res)
  ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC']

  setting a minimum size for the fragments:
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2))
  >>> sorted(res)
  ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3))
  >>> sorted(res)
  ['CCCOCC', '[3*]OCC', '[4*]CCC']
  >>> res = list(BRICSDecompose(m,minFragmentSize=2))
  >>> sorted(res)
  ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']


  """
    global reactions
    mSmi = Chem.MolToSmiles(mol, 1)

    if allNodes is None:
        allNodes = set()

    if mSmi in allNodes:
        return set()

    activePool = {mSmi: mol}
    allNodes.add(mSmi)
    foundMols = {mSmi: mol}
    for gpIdx, reactionGp in enumerate(reactions):
        newPool = {}
        while activePool:
            matched = False
            nSmi = next(iterkeys(activePool))
            mol = activePool.pop(nSmi)
            for rxnIdx, reaction in enumerate(reactionGp):
                if onlyUseReactions and (gpIdx,
                                         rxnIdx) not in onlyUseReactions:
                    continue
                if not silent:
                    print('--------')
                    print(smartsGps[gpIdx][rxnIdx])
                ps = reaction.RunReactants((mol, ))
                if ps:
                    if not silent:
                        print(nSmi, '->', len(ps), 'products')
                    for prodSeq in ps:
                        seqOk = True
                        # we want to disqualify small fragments, so sort the product sequence by size
                        tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx)
                                for idx, prod in enumerate(prodSeq)]
                        tSeq.sort()
                        for nats, idx in tSeq:
                            prod = prodSeq[idx]
                            try:
                                Chem.SanitizeMol(prod)
                            except Exception:
                                continue
                            pSmi = Chem.MolToSmiles(prod, 1)
                            if minFragmentSize > 0:
                                nDummies = pSmi.count('*')
                                if nats - nDummies < minFragmentSize:
                                    seqOk = False
                                    break
                            prod.pSmi = pSmi
                        ts = [(x, prodSeq[y]) for x, y in tSeq]
                        prodSeq = ts
                        if seqOk:
                            matched = True
                            for nats, prod in prodSeq:
                                pSmi = prod.pSmi
                                #print('\t',nats,pSmi)
                                if pSmi not in allNodes:
                                    if not singlePass:
                                        activePool[pSmi] = prod
                                    allNodes.add(pSmi)
                                    foundMols[pSmi] = prod
            if singlePass or keepNonLeafNodes or not matched:
                newPool[nSmi] = mol
        activePool = newPool
    if not (singlePass or keepNonLeafNodes):
        if not returnMols:
            res = set(activePool.keys())
        else:
            res = activePool.values()
    else:
        if not returnMols:
            res = allNodes
        else:
            res = foundMols.values()
    return res