Exemple #1
0
  def _bulkTest(self,bvs):
    for metric in 'Tanimoto','Dice','AllBit','OnBit','RogotGoldberg':
      bulk = getattr(DataStructs,f'Bulk{metric}Similarity')
      single = getattr(DataStructs,f'{metric}Similarity')
    sims = bulk(bvs[0],bvs)
    for i in range(len(bvs)):
      sim = single(bvs[0],bvs[i])
      self.assertEqual(sim,sims[i])
      self.assertEqual(sim, single(bvs[0],bvs[i].ToBinary()))
    dists = bulk(bvs[0], bvs, returnDistance=True)
    for i in range(len(bvs)):
      dist = single(bvs[0], bvs[i], returnDistance=True)
      self.assertEqual(dist, dists[i])
      self.assertEqual(dist, single(bvs[0], bvs[i].ToBinary(), returnDistance=True))

    sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
    for i in range(len(bvs)):
      sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
      self.assertEqual(sim, sims[i])
      sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
      self.assertEqual(sim, sims[i])

    sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1, returnDistance=True)
    for i in range(len(bvs)):
      sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1, returnDistance=True)
      self.assertEqual(sim, sims[i])
      sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i], returnDistance=True)
      self.assertEqual(sim, sims[i])
Exemple #2
0
    def test10BulkOps2(self):
        nbits = 10000
        bvs = []
        for bvi in range(10):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nbits):
                x = random.randrange(0, nbits)
                bv.SetBit(x)
            bvs.append(bv)
        bvs = tuple(bvs)
        sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
            self.failUnless(feq(sim, sims[i]))
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5)
            self.failUnless(feq(sim, sims[i]))
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.failUnless(feq(sim, sims[i]))
Exemple #3
0
  def test6BulkTversky(self):
    """

    """
    sz = 10
    nToSet = 5
    nVs = 6
    import random
    vs = []
    for i in range(nVs):
      v = ds.IntSparseIntVect(sz)
      for j in range(nToSet):
        v[random.randint(0, sz - 1)] = random.randint(1, 10)
      vs.append(v)

    baseDs = [ds.TverskySimilarity(vs[0], vs[x], .5, .5) for x in range(1, nVs)]
    bulkDs = ds.BulkTverskySimilarity(vs[0], vs[1:], 0.5, 0.5)
    diceDs = [ds.DiceSimilarity(vs[0], vs[x]) for x in range(1, nVs)]
    for i in range(len(baseDs)):
      self.assertTrue(feq(baseDs[i], bulkDs[i]))
      self.assertTrue(feq(baseDs[i], diceDs[i]))

    bulkDs = ds.BulkTverskySimilarity(vs[0], vs[1:], 1.0, 1.0)
    taniDs = [ds.TanimotoSimilarity(vs[0], vs[x]) for x in range(1, nVs)]
    for i in range(len(bulkDs)):
      self.assertTrue(feq(bulkDs[i], taniDs[i]))
    taniDs = ds.BulkTanimotoSimilarity(vs[0], vs[1:])
    for i in range(len(bulkDs)):
      self.assertTrue(feq(bulkDs[i], taniDs[i]))
Exemple #4
0
 def getSimilarity(self, reference, method='tanimoto', alpha=None, beta=None):
     if   method == 'tanimoto':
         return DataStructs.TanimotoSimilarity(reference.IFPvector, self.IFPvector)
     elif method == 'dice':
         return DataStructs.DiceSimilarity(reference.IFPvector, self.IFPvector)
     elif method == 'tversky':
         return DataStructs.TverskySimilarity(reference.IFPvector, self.IFPvector, alpha, beta)
Exemple #5
0
    def test10BulkOps3(self):
        nbits = 10000
        bvs = numpy.empty((10, ), DataStructs.ExplicitBitVect)
        for bvi in range(10):
            bv = DataStructs.ExplicitBitVect(nbits)
            for j in range(nbits):
                x = random.randrange(0, nbits)
                bv.SetBit(x)
            bvs[bvi] = bv
        sims = DataStructs.BulkTanimotoSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkDiceSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkAllBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.AllBitSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkOnBitSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.OnBitSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkRogotGoldbergSimilarity(bvs[0], bvs)
        for i in range(len(bvs)):
            sim = DataStructs.RogotGoldbergSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, 1, 1)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], 1, 1)
            self.assertTrue(feq(sim, sims[i]))
            sim = DataStructs.TanimotoSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))

        sims = DataStructs.BulkTverskySimilarity(bvs[0], bvs, .5, .5)
        for i in range(len(bvs)):
            sim = DataStructs.TverskySimilarity(bvs[0], bvs[i], .5, .5)
            self.assertTrue(feq(sim, sims[i]))
            sim = DataStructs.DiceSimilarity(bvs[0], bvs[i])
            self.assertTrue(feq(sim, sims[i]))
Exemple #6
0
    def partialFP(atomID, tverskyThresh):

        #create empty fp
        modifiedFP = DataStructs.ExplicitBitVect(1024)

        modifiedFP.SetBitsFromList(aBits[atomID])

        tverskySim = DataStructs.TverskySimilarity(subsFp, modifiedFP, 0, 1)

        if (tverskySim < tverskyThresh):
            #print "%i %s: %f" % (atomID+1, pMol.GetAtomWithIdx(atomID).GetSymbol(), tverskySim)
            marked[atomID] = 1
Exemple #7
0
 def partialSimilarity(atomID):
     """ Determine similarity for the atoms set by atomID """
     # create empty fp
     modifiedFP = DataStructs.ExplicitBitVect(1024)
     modifiedFP.SetBitsFromList(aBits[atomID])
     return DataStructs.TverskySimilarity(subsFp, modifiedFP, 0, 1)
Exemple #8
0
#         'path':OEFPType_Path}

fpcodes = {
    'rdkit': FingerprintMols.GetRDKFingerprint,
    'maccs': MACCSkeys.GenMACCSKeys,
    'morgan':
    lambda mol: AllChem.GetMorganFingerprintAsBitVect(mol, morganradius),
    'atompairs': lambda mol: Chem.GetAtomPairFingerPrint(mol, 2)
}

# tversky(F1, F2)= F1@F2 / ( a*sum(F1) + b*sum(F2) - (1-a-b)* F1@F2 )
dmetrics = {
    'tanimoto': DataStructs.TanimotoSimilarity,  # a, b = 1, 1
    'dice': DataStructs.DiceSimilarity,  # a, b = 0.5, 0.5
    'cosine': DataStructs.CosineSimilarity,
    'tversky': lambda m1, m2: DataStructs.TverskySimilarity(m1, m2, 0.5, 0.5),
    'sokal': DataStructs.SokalSimilarity
}

#########################
# Module initialization #
#########################


def Init():

    global dmetric, fpcode

    #check requested fingerprint existance
    if not fpcodes.has_key(fp):
        print 'Unrecognized fingerprint (mprms.fp): ' + fp
Exemple #9
0
    count = 0
    for i in range(fpA.GetNumBits()):
        if fpA.GetBit(i): count += 1
    print(count, size)
    if fper == GenMACCSKeys:
        onbits = tuple(fpA.GetOnBits())
        print(str(onbits))
        for i in onbits:
            print(Chem.MACCSkeys.smartsPatts[i][0])

    fpB = fper(molB)
    fptxtB = DataStructs.BitVectToText(fpB)
    print(fptxtB)
    size = fpB.GetNumBits()
    count = 0
    for i in range(fpB.GetNumBits()):
        if fpB.GetBit(i): count += 1
    print(count, size)
    if fper == GenMACCSKeys:
        onbits = tuple(fpB.GetOnBits())
        print(str(onbits))
        for i in onbits:
            print(Chem.MACCSkeys.smartsPatts[i][0])

    sim = DataStructs.TanimotoSimilarity(fpA, fpB)
    print(sim)
    sim = DataStructs.TverskySimilarity(fpA, fpB, 0.9, 0.1)
    print(sim)
    bcom = DataStructs.NumBitsInCommon(fpA, fpB)
    print(bcom)
Exemple #10
0
def Similarities(fps1, fps2, fingerprint, measure='tanimoto'):

    # Indigo fingerptins
    if fingerprint in indigofps and measure in indigosims:
        return np.array(
            [[indigo.similarity(fp1, fp2, measure) for fp2 in fps2]
             for fp1 in fps1])

    # RDKit fingerprints
    if fingerprint in rdkitfps and measure in rdkitsims:

        if measure == 'allbit':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.AllBitSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'asymmetric':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.AsymmetricSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'asymmetric2':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp2, fp1, metric=DataStructs.AsymmetricSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'braunblanquet':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.BraunBlanquetSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'cosine':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.CosineSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'dice':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.DiceSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'kulczynski':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.KulczynskiSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'mcconnaughey':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.McConnaugheySimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'rogotgoldberg':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.RogotGoldbergSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'russel':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.RusselSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'sokal':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.SokalSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'tanimoto':
            return np.array([[
                DataStructs.FingerprintSimilarity(
                    fp1, fp2, metric=DataStructs.TanimotoSimilarity)
                for fp2 in fps2
            ] for fp1 in fps1])
        elif measure == 'tversky':
            return np.array([[
                DataStructs.TverskySimilarity(fp1, fp2, a=0.5, b=0.5)
                for fp2 in fps2
            ] for fp1 in fps1])

    # RDKit non-bit (integer or float) fingerprints
    if fingerprint in rdkitnonbitfps and measure in rdkitnonbitsims:

        if measure == 'dice':
            return np.array(
                [[DataStructs.DiceSimilarity(fp1, fp2) for fp2 in fps2]
                 for fp1 in fps1])
        elif measure == 'tanimoto':
            return np.array(
                [[DataStructs.TanimotoSimilarity(fp1, fp2) for fp2 in fps2]
                 for fp1 in fps1])

    # E-state fingerprints
    if fingerprint in rdkitestatefps and measure in rdkitestatesims:

        if measure == 'pearson':
            sims = np.array([[np.corrcoef(fp1, fp2)[0][1] for fp2 in fps2]
                             for fp1 in fps1])
            # check for nan's
            if sum([sum(np.isnan(sim)) for sim in sims]) == 0:
                return sims
            else:
                return None

    # unknown fingerprint and/or similarity measure
    return None