Example #1
0
def calculate_sims(fps1, fps2, simtype='tanimoto'):
    sim_mat = np.zeros((len(fps1), len(fps2)))  # ,dtype=np.float32)
    for i in range(len(fps1)):
        fp_i = fps1[i]
        if simtype == 'tanimoto':
            sims = DataStructs.BulkTanimotoSimilarity(fp_i, fps2)
        elif simtype == 'dice':
            sims = DataStructs.BulkDiceSimilarity(fp_i, fps2)
        sim_mat[i, :] = sims
    return sim_mat
def ClusterFps(Fp, cutoff):
    from rdkit import DataStructs
    from rdkit.ML.Cluster import Butina

    # first generate the distance matrix:
    dists = []
    nfps = len(Fp)
    for i in range(1,nfps):
#        sims = DataStructs.BulkTanimotoSimilarity(Fp[i], Fp[:i])
        sims = DataStructs.BulkDiceSimilarity(Fp[i], Fp[:i])
        dists.extend([1-x for x in sims])

    # now cluster the data:
    cs = Butina.ClusterData(dists, nfps, cutoff, isDistData=True)
    return cs
Example #3
0
    def test10BulkOps2(self):
        nbits = 10000
        bvs = []
        for bvi in range(10):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nbits):
                x = random.randrange(0, nbits)
                bv.SetBit(x)
            bvs.append(bv)
        bvs = tuple(bvs)
        sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
            self.failUnless(feq(sim, sims[i]))
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5)
            self.failUnless(feq(sim, sims[i]))
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))
Example #4
0
    def test10BulkOps3(self):
        nbits = 10000
        bvs = numpy.empty((10, ), DataStructs.ExplicitBitVect)
        for bvi in range(10):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nbits):
                x = random.randrange(0, nbits)
                bv.SetBit(x)
            bvs[bvi] = bv
        sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
            self.assertTrue(feq(sim, sims[i]))
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5)
            self.assertTrue(feq(sim, sims[i]))
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))
Example #5
0
def GetNeighborLists(probes,
                     topN,
                     pool,
                     simMetric=DataStructs.DiceSimilarity,
                     simThresh=-1.,
                     silent=False,
                     **kwargs):
    probeFps = [x[1] for x in probes]
    validProbes = [x for x in range(len(probeFps)) if probeFps[x] is not None]
    validFps = [probeFps[x] for x in validProbes]
    from rdkit.DataStructs.TopNContainer import TopNContainer
    if simThresh <= 0:
        nbrLists = [TopNContainer(topN) for x in range(len(probeFps))]
    else:
        nbrLists = [TopNContainer(-1) for x in range(len(probeFps))]

    nDone = 0
    for nm, fp in pool:
        nDone += 1
        if not silent and not nDone % 1000:
            logger.info('  searched %d rows' % nDone)
        if (simMetric == DataStructs.DiceSimilarity):
            scores = DataStructs.BulkDiceSimilarity(fp, validFps)
            for i, score in enumerate(scores):
                if score > simThresh:
                    nbrLists[validProbes[i]].Insert(score, nm)
        elif (simMetric == DataStructs.TanimotoSimilarity):
            scores = DataStructs.BulkTanimotoSimilarity(fp, validFps)
            for i, score in enumerate(scores):
                if score > simThresh:
                    nbrLists[validProbes[i]].Insert(score, nm)
        elif (simMetric == DataStructs.TverskySimilarity):
            av = float(kwargs.get('tverskyA', 0.5))
            bv = float(kwargs.get('tverskyB', 0.5))
            scores = DataStructs.BulkTverskySimilarity(fp, validFps, av, bv)
            for i, score in enumerate(scores):
                if score > simThresh:
                    nbrLists[validProbes[i]].Insert(score, nm)
        else:
            for i in range(len(probeFps)):
                pfp = probeFps[i]
                if pfp is not None:
                    score = simMetric(probeFps[i], fp)
                    if score > simThresh:
                        nbrLists[validProbes[i]].Insert(score, nm)
    return nbrLists
Example #6
0
  def test6BulkDice(self):
    """

    """
    sz = 10
    nToSet = 5
    nVs = 6
    import random
    vs = []
    for i in range(nVs):
      v = ds.IntSparseIntVect(sz)
      for j in range(nToSet):
        v[random.randint(0, sz - 1)] = random.randint(1, 10)
      vs.append(v)

    baseDs = [ds.DiceSimilarity(vs[0], vs[x]) for x in range(1, nVs)]
    bulkDs = ds.BulkDiceSimilarity(vs[0], vs[1:])
    for i in range(len(baseDs)):
      self.assertTrue(feq(baseDs[i], bulkDs[i]))
    '''Gets the fingerprints from the fingerprint library
    and stores them in a dictioanry'''
    fp_dict = {}
    for fp in fp_names:
        fp_dict[fp] = fingerprint_lib.CalculateFP(fp, smiles)
    return fp_dict


def getFP(fp_name, smiles):
    '''Gets fingerprint from fingerprint library'''
    return fingerprint_lib.CalculateFP(fp_name, smiles)


# dictionary for similarity measures
simil_dict = {}
simil_dict['Dice'] = lambda x, y: sorted(DataStructs.BulkDiceSimilarity(x, y),
                                         reverse=True)
simil_dict['Tanimoto'] = lambda x, y: sorted(
    DataStructs.BulkTanimotoSimilarity(x, y), reverse=True)
simil_dict['Cosine'] = lambda x, y: sorted(
    DataStructs.BulkCosineSimilarity(x, y), reverse=True)
simil_dict['Russel'] = lambda x, y: sorted(
    DataStructs.BulkRusselSimilarity(x, y), reverse=True)
simil_dict['Kulczynski'] = lambda x, y: sorted(
    DataStructs.BulkKulczynskiSimilarity(x, y), reverse=True)
simil_dict['McConnaughey'] = lambda x, y: sorted(
    DataStructs.BulkMcConnaugheySimilarity(x, y), reverse=True)
simil_dict['Manhattan'] = lambda x, y: sorted(
    DataStructs.BulkAllBitSimilarity(x, y), reverse=True)
simil_dict['RogotGoldberg'] = lambda x, y: sorted(
    DataStructs.BulkRogotGoldbergSimilarity(x, y), reverse=True)