Ejemplo n.º 1
0
def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False):
  """
  Calculates the Morgan fingerprint with the counts of atomId removed.

  Parameters:
    mol -- the molecule of interest
    radius -- the maximum radius
    fpType -- the type of Morgan fingerprint: 'count' or 'bv'
    atomId -- the atom to remove the counts for (if -1, no count is removed)
    nBits -- the size of the bit vector (only for fpType = 'bv')
    useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan
  """
  if fpType not in ['bv', 'count']: raise ValueError("Unknown Morgan fingerprint type")
  if not hasattr(mol, '_fpInfo'):
    info = {}
    # get the fingerprint
    if fpType == 'bv': molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, bitInfo=info)
    else: molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info)
    # construct the bit map
    if fpType == 'bv': bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())]
    else: bitmap = [[] for x in range(mol.GetNumAtoms())]
    for bit, es in info.iteritems():
      for at1, rad in es:
        if rad == 0: # for radius 0
          if fpType == 'bv': bitmap[at1][bit] = 1
          else: bitmap[at1].append(bit)
        else: # for radii > 0
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1)
          amap = {}
          submol = Chem.PathToSubmol(mol, env, atomMap=amap)
          for at2 in amap.keys():
            if fpType == 'bv': bitmap[at2][bit] = 1
            else: bitmap[at2].append(bit)
    mol._fpInfo = (molFp, bitmap)

  if atomId < 0:
    return mol._fpInfo[0]
  else: # remove the bits of atomId
    if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
    if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set")
    if fpType == 'bv':
      molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor
    else: # count
      molFp = copy.deepcopy(mol._fpInfo[0])
      # delete the bits with atomId
      for bit in mol._fpInfo[1][atomId]:
        molFp[bit] -= 1
    return molFp
Ejemplo n.º 2
0
    def test10BulkOps2(self):
        nbits = 10000
        bvs = []
        for bvi in range(10):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nbits):
                x = random.randrange(0, nbits)
                bv.SetBit(x)
            bvs.append(bv)
        bvs = tuple(bvs)
        sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
            self.failUnless(feq(sim, sims[i]))
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5)
            self.failUnless(feq(sim, sims[i]))
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))
Ejemplo n.º 3
0
  def test7FPS(self):
    bv = DataStructs.ExplicitBitVect(32)
    bv.SetBit(0)
    bv.SetBit(1)
    bv.SetBit(17)
    bv.SetBit(23)
    bv.SetBit(31)

    self.assertEqual(DataStructs.BitVectToFPSText(bv), "03008280")
    bv2 = DataStructs.CreateFromFPSText("03008280")
    self.assertEqual(bv, bv2)

    self.assertRaises(ValueError, lambda: DataStructs.CreateFromFPSText("030082801"))

    bv2 = DataStructs.CreateFromFPSText("")
    self.assertEqual(bv2.GetNumBits(), 0)
Ejemplo n.º 4
0
    def test10BulkOps3(self):
        nbits = 10000
        bvs = numpy.empty((10, ), DataStructs.ExplicitBitVect)
        for bvi in range(10):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nbits):
                x = random.randrange(0, nbits)
                bv.SetBit(x)
            bvs[bvi] = bv
        sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
            self.assertTrue(feq(sim, sims[i]))
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5)
            self.assertTrue(feq(sim, sims[i]))
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))
Ejemplo n.º 5
0
    def testNonUniqueCrash(self):
        from rdkit import DataStructs
        sz = 300
        nbits = 40
        nBitsToSet = int(nbits * .3)
        N = 8
        vs = []
        for i in range(sz):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nBitsToSet):
                val = int(nbits * random.random())
                bv.SetBit(val)
            vs.append(bv)
            vs.append(bv)

        def taniFunc(i, j, bvs=vs):
            d = 1 - DataStructs.FingerprintSimilarity(bvs[i], bvs[j])
            return d

        picker = rdSimDivPickers.MaxMinPicker()
        mm1 = picker.LazyPick(taniFunc, len(vs), N)
        self.assertEqual(len(mm1), N)
        picker = None

        picker = rdSimDivPickers.MaxMinPicker()
        mm2 = picker.LazyBitVectorPick(vs, len(vs), N)
        self.assertEqual(len(mm2), N)

        picker = rdSimDivPickers.MaxMinPicker()
        mm3 = picker.LazyBitVectorPick(vs, len(vs), N)
        self.assertEqual(len(mm3), N)

        # we get the occasional dupe randomly,
        # make sure we don't get three dupes in a row
        self.assertTrue(tuple(mm2) != tuple(mm1)) or (tuple(mm3) != tuple(mm1))
        picker = None

        ds = []
        nvs = len(vs)
        for i in range(nvs):
            for j in range(i + 1, nvs):
                d = taniFunc(i, j)
                ds.append(d)
        m = numpy.array(ds)
        picker = rdSimDivPickers.HierarchicalClusterPicker(
            rdSimDivPickers.ClusterMethod.WARD)
        p1 = list(picker.Pick(m, nvs, N))
Ejemplo n.º 6
0
def process_one_target(file, targets, top_n_matches, outfile=None):
    import rdkit
    import base64
    from rdkit import RDLogger
    from rdkit import DataStructs
    from pstats import SortKey
    import pickle
    from operator import itemgetter
    import csv

    target_results = {}

    read = pickle.load(open(file, 'rb'))

    fingerprint_set = []
    for sm, identifier, fp in read:
        # Next TRY here as some do not convert
        try:
            bv = DataStructs.ExplicitBitVect(base64.b64decode(fp))
        except:
            bv = None
        fingerprint_set += [(sm, bv, identifier)]

    for smile_target in targets:
        best_so_far = [('', 0.0) for index in range(top_n_matches)]
        bit_target = targets[smile_target]

        # Find scores for non-None fingerprints in fingerprint set
        scores = []
        for (smile, fingerprint, identifier) in fingerprint_set:
            try:
                score = DataStructs.TanimotoSimilarity(fingerprint, bit_target)
                scores += [(smile, score, identifier)]
            except:
                pass

        sorted_scores = sorted(scores, key=itemgetter(1))
        new_list = sorted_scores[-top_n_matches:]
        target_results[smile_target] = new_list

    if outfile:
        with open(outfile, 'wb') as f:
            pickle.dump(target_results, f)
        return outfile
    else:
        return (target_results)
Ejemplo n.º 7
0
    def generateIFP(self, ligand, protein):
        """Generates the complete IFP from each residue's bitstring"""
        IFPvector = DataStructs.ExplicitBitVect(
            len(self.interactions) * len(protein.residues))
        i = 0
        IFP = ''
        for residue in sorted(protein.residues, key=get_resnumber):
            bitstring = self.generateBitstring(ligand,
                                               protein.residues[residue])
            for bit in bitstring:
                if bit == '1':
                    IFPvector.SetBit(i)
                i += 1
            IFP += bitstring
        ligand.setIFP(IFP, IFPvector)

        return IFPvector
Ejemplo n.º 8
0
    def MapToClusterFP(self, fp):
        """ Map the fingerprint to a smaller sized (= number of clusters) fingerprint

        Each cluster get a bit in the new fingerprint and is turned on if any of the bits in
        the cluster are turned on in the original fingerprint"""

        ebv = DataStructs.ExplicitBitVect(self._nClusters)
        i = 0

        for cls in self._clusters:
            for bid in cls:
                if fp[bid]:
                    ebv.SetBit(i)
                    break
            i += 1

        return ebv
Ejemplo n.º 9
0
 def _BuildFp(self,data):
   data = list(data)
   pkl = str(data[self.fpCol])
   del data[self.fpCol]
   self._numProcessed+=1;
   try:
     if self._usePickles:
       newFp = cPickle.loads(pkl)
     else:
       newFp = DataStructs.ExplicitBitVect(pkl)
   except:
     import traceback
     traceback.print_exc()
     newFp = None
   if newFp:
     newFp._fieldsFromDb = data
   return newFp
Ejemplo n.º 10
0
 def _BuildFp(self, data):
     data = list(data)
     pkl = bytes(data[self.fpCol], encoding='Latin1')
     del data[self.fpCol]
     self._numProcessed += 1
     try:
         if self._usePickles:
             newFp = pickle.loads(pkl, encoding='bytes')
         else:
             newFp = DataStructs.ExplicitBitVect(pkl)
     except Exception:
         import traceback
         traceback.print_exc()
         newFp = None
     if newFp:
         newFp._fieldsFromDb = data
     return newFp
Ejemplo n.º 11
0
    def test4ebv(self):

        n = 30
        m = 2048
        dm = 800
        lst = []
        for i in range(n):
            v = DataStructs.ExplicitBitVect(m)
            for j in range(dm):
                v.SetBit(random.randrange(0, m))
            lst.append(v)

        dMat = rdmmc.GetTanimotoDistMat(lst)

        sMat = rdmmc.GetTanimotoSimMat(lst)

        for i in range(n * (n - 1) // 2):
            assert feq(sMat[i] + dMat[i], 1.0)
Ejemplo n.º 12
0
def CalculateFP3Fingerprint(mol: Chem.Mol,
                            rtype: str = 'bitstring') -> Tuple[str, dict, Any]:
    """Calculate FP3 fingerprints (55 bits).

    :param rtype: Type of output, may either be:
                  bitstring (default), returns a binary string
                  rdkit, return the native rdkit DataStructs
                  dict, for a dict of bits turned on
    """
    m = pybel.readstring('smi', Chem.MolToSmiles(mol))
    temp = m.calcfp('FP3').bits
    if rtype == 'dict':
        return {f'FP3_{i}': 1 for i in temp}
    bv = DataStructs.ExplicitBitVect(55)
    bv.SetBitsFromList([x - 1 for x in temp])
    if rtype == 'rdkit':
        return bv
    return bv.ToBitString()
Ejemplo n.º 13
0
 def __setstate__(self, pkl):
     self.__vects = {}
     self.__orVect = None
     self.__numBits = -1
     self.__needReset = True
     szI = struct.calcsize('I')
     offset = 0
     nToRead = struct.unpack('<I', pkl[offset:offset + szI])[0]
     offset += szI
     for i in range(nToRead):
         k = struct.unpack('<I', pkl[offset:offset + szI])[0]
         offset += szI
         l = struct.unpack('<I', pkl[offset:offset + szI])[0]
         offset += szI
         sz = struct.calcsize('%ds' % l)
         bv = DataStructs.ExplicitBitVect(
             struct.unpack('%ds' % l, pkl[offset:offset + sz])[0])
         offset += sz
         self.AddVect(k, bv)
Ejemplo n.º 14
0
  def __setstate__(self, pkl):
    if six.PY3 and isinstance(pkl, str):
      pkl = bytes(pkl, encoding='Latin1')

    self.__vects = {}
    self.__orVect = None
    self.__numBits = -1
    self.__needReset = True
    szI = struct.calcsize('I')
    offset = 0
    nToRead = struct.unpack('<I', pkl[offset:offset + szI])[0]
    offset += szI
    for _ in range(nToRead):
      k = struct.unpack('<I', pkl[offset:offset + szI])[0]
      offset += szI
      l = struct.unpack('<I', pkl[offset:offset + szI])[0]
      offset += szI
      sz = struct.calcsize('%ds' % l)
      bv = DataStructs.ExplicitBitVect(struct.unpack('%ds' % l, pkl[offset:offset + sz])[0])
      offset += sz
      self.AddVect(k, bv)
Ejemplo n.º 15
0
    def testNonUniqueCrash(self):
        from rdkit import DataStructs
        sz = 10
        nbits = 20
        nBitsToSet = int(nbits * .3)
        N = 12
        vs = []
        for i in range(sz):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nBitsToSet):
                val = int(nbits * random.random())
                bv.SetBit(val)
            vs.append(bv)
            vs.append(bv)

        def taniFunc(i, j, bvs=vs):
            d = 1 - DataStructs.FingerprintSimilarity(bvs[i], bvs[j])
            return d

        picker = rdSimDivPickers.MaxMinPicker()
        try:
            mm = picker.LazyPick(taniFunc, len(vs), N)
        except:
            ok = False
        else:
            ok = True
        self.failUnless(ok)
        self.failUnless(len(mm) == N)
        picker = None

        ds = []
        nvs = len(vs)
        for i in range(nvs):
            for j in range(i + 1, nvs):
                d = taniFunc(i, j)
                ds.append(d)
        m = numpy.array(ds)
        picker = rdSimDivPickers.HierarchicalClusterPicker(
            rdSimDivPickers.ClusterMethod.WARD)
        p1 = list(picker.Pick(m, nvs, N))
Ejemplo n.º 16
0
 def test11BulkNeighbors(self):
   nbits = 2048
   bvs = []
   for bvi in range(1000):
     bv = DataStructs.ExplicitBitVect(nbits)
     for j in range(nbits):
       x = random.randrange(0, nbits)
       bv.SetBit(x)
     bvs.append(bv)
   qs = bvs[:10]
   db = bvs[10:]
   for metric in ['Tanimoto','Cosine', 'Kulczynski', 'Dice', 'Sokal',
                  'McConnaughey', 'Asymmetric', 'BraunBlanquet', 'Russel',
                  'RogotGoldberg']:
     bulkSim = getattr(DataStructs,f'Bulk{metric}Similarity')
     nbrSim = getattr(DataStructs,f'{metric}SimilarityNeighbors')
     tgts = []
     for q in qs:
       sims = bulkSim(q,db)
       sim, idx = max((sim, -idx) for idx, sim in enumerate(sims))
       tgts.append((-idx,sim))
     nbrs = nbrSim(qs,db)
     self.assertEqual(tgts,nbrs)
Ejemplo n.º 17
0
    def test1Cluster(self):
        if BitClusterer is None:
            return
        cmg = rdInfoTheory.BitCorrMatGenerator()
        cmg.SetBitList(self.blist)
        for fp in self.fps:
            cmg.CollectVotes(fp)

        corrMat = cmg.GetCorrMatrix()

        bcl = BitClusterer.BitClusterer(self.blist, self.nbits / 2)
        bcl.ClusterBits(corrMat)
        cls = bcl.GetClusters()
        for cl in cls:
            assert len(cl) == 2
            assert (cl[0] + self.nbits / 2) == cl[1]

        tfp = DataStructs.ExplicitBitVect(self.nbits)
        obits = range(0, self.nbits / 4) + range(self.nbits / 2,
                                                 3 * self.nbits / 4)
        tfp.SetBitsFromList(obits)
        rvc = bcl.MapToClusterScores(tfp)
        assert len(rvc) == self.nbits / 2
        for i in range(self.nbits / 2):
            if i < self.nbits / 4:
                assert rvc[i] == 2
            else:
                assert rvc[i] == 0

        nfp = bcl.MapToClusterFP(tfp)
        assert len(nfp) == self.nbits / 2
        for i in range(self.nbits / 2):
            if i < self.nbits / 4:
                assert nfp[i]
            else:
                assert not nfp[i]
Ejemplo n.º 18
0
    def test1Cluster(self):
        cmg = rdInfoTheory.BitCorrMatGenerator()
        cmg.SetBitList(self.blist)
        for fp in self.fps:
            cmg.CollectVotes(fp)

        corrMat = cmg.GetCorrMatrix()

        bcl = BitClusterer.BitClusterer(self.blist, self.nbits // 2)
        bcl.ClusterBits(corrMat)
        cls = bcl.GetClusters()
        for cl in cls:
            self.assertEqual(len(cl), 2)
            self.assertEqual((cl[0] + self.nbits // 2), cl[1])
        bcl.SetClusters(cls)
        self.assertRaises(AssertionError, bcl.SetClusters, cls[:-1])

        tfp = DataStructs.ExplicitBitVect(self.nbits)
        obits = list(range(0, self.nbits // 4)) + list(
            range(self.nbits // 2, 3 * self.nbits // 4))
        tfp.SetBitsFromList(obits)
        rvc = bcl.MapToClusterScores(tfp)
        self.assertEqual(len(rvc), self.nbits // 2)
        for i in range(self.nbits // 2):
            if i < self.nbits // 4:
                self.assertEqual(rvc[i], 2)
            else:
                self.assertEqual(rvc[i], 0)

        nfp = bcl.MapToClusterFP(tfp)
        self.assertEqual(len(nfp), self.nbits // 2)
        for i in range(self.nbits // 2):
            if i < self.nbits // 4:
                self.assertTrue(nfp[i])
            else:
                self.assertFalse(nfp[i])
Ejemplo n.º 19
0
 def partialSimilarity(atomID):
     """ Determine similarity for the atoms set by atomID """
     # create empty fp
     modifiedFP = DataStructs.ExplicitBitVect(1024)
     modifiedFP.SetBitsFromList(aBits[atomID])
     return DataStructs.TverskySimilarity(subsFp, modifiedFP, 0, 1)
Ejemplo n.º 20
0
    fpq = [
        Chem.RDKFingerprint(x,
                            fpSize=2048,
                            minPath=7,
                            useBondOrder=False,
                            nBitsPerHash=1,
                            minSize=2048,
                            useHs=False) for x in MyMolList
    ]

    #fpq = [Chem.PatternFingerprint(x,fpSize=2048) for x in MyMolList]
    AllQBits = [list(fpqe.GetOnBits()) for fpqe in fpq]
    AllQBits = list(set([j for i in AllQBits for j in i]))
    if len(AllQBits) == 0:
        AllQBits = [2047]
    bv = DataStructs.ExplicitBitVect(2048)
    bv.SetBitsFromList(AllQBits)

    print " the number of compounds generated from the compound used to query: ", len(
        MyMolList)
    print len(AllQBits)
    print Chem.MolToSmiles(MyMol)
    print len(MyMolFp.GetOnBits())

    ################################  End Query Ensemble  Creation ################################################################
    ############################  Find the AntiSmash compounds with the most overlap with the query ensemble fingerprint  #########

    print " Culling the AntiSmash compounds "
    print " examining fingerprints : ", len(fps)
    MySearchList = []
    similarity = 0.17
Ejemplo n.º 21
0
 def test3Bounds(self):
     nbits = 10
     bv1 = DataStructs.ExplicitBitVect(nbits)
     bv1[0]
     with self.assertRaisesRegex(IndexError, ""):
         bv1[11]
Ejemplo n.º 22
0
 def test01BVWithAllOnes(self):
     bv1 = DataStructs.ExplicitBitVect(10, True)
     for i in range(10):
         assert bv1.GetBit(i) == 1