예제 #1
0
파일: ClusterMols.py 프로젝트: kozo2/rdkit
def GetDistanceMatrix(data, metric, isSimilarity=1):
    """ data should be a list of tuples with fingerprints in position 1
   (the rest of the elements of the tuple are not important)
   
    Returns the symmetric distance matrix
    (see ML.Cluster.Resemblance for layout documentation)
    
  """
    nPts = len(data)
    res = numpy.zeros((nPts * (nPts - 1) / 2), numpy.float)
    nSoFar = 0
    for col in xrange(1, nPts):
        for row in xrange(col):
            fp1 = data[col][1]
            fp2 = data[row][1]
            if fp1.GetNumBits() > fp2.GetNumBits():
                fp1 = DataStructs.FoldFingerprint(
                    fp1,
                    fp1.GetNumBits() / fp2.GetNumBits())
            elif fp2.GetNumBits() > fp1.GetNumBits():
                fp2 = DataStructs.FoldFingerprint(
                    fp2,
                    fp2.GetNumBits() / fp1.GetNumBits())
            sim = metric(fp1, fp2)
            if isSimilarity:
                sim = 1. - sim
            res[nSoFar] = sim
            nSoFar += 1
    return res
예제 #2
0
def GetDistanceMatrix(data, metric, isSimilarity=1):
    """ data should be a list of tuples with fingerprints in position 1
   (the rest of the elements of the tuple are not important)

    Returns the symmetric distance matrix
    (see ML.Cluster.Resemblance for layout documentation)

  """
    nPts = len(data)
    distsMatrix = numpy.zeros((nPts * (nPts - 1) // 2), dtype=numpy.float64)
    nSoFar = 0
    for col in range(1, nPts):
        fp1 = data[col][1]
        nBits1 = fp1.GetNumBits()
        for row in range(col):
            fp2 = data[row][1]
            nBits2 = fp2.GetNumBits()
            if nBits1 > nBits2:
                fp1 = DataStructs.FoldFingerprint(fp1, nBits1 / nBits2)
            elif nBits2 > nBits1:
                fp2 = DataStructs.FoldFingerprint(fp2, nBits2 / nBits1)

            if isSimilarity:
                distsMatrix[nSoFar] = 1.0 - metric(fp1, fp2)
            else:
                distsMatrix[nSoFar] = metric(fp1, fp2)
            nSoFar += 1
    return distsMatrix
예제 #3
0
    def test6(self):
        """ check that the bits in a signature of size N which has been folded in half
      are the same as those in a signature of size N/2

    """
        smis = ['CCC(O)C(=O)O', 'c1ccccc1', 'C1CCCCC1', 'C1NCCCC1', 'CNCNCNC']
        for smi in smis:
            m = Chem.MolFromSmiles(smi)
            fp1 = Chem.RDKFingerprint(m, 2, 7, 4096)
            fp2 = DataStructs.FoldFingerprint(fp1, 2)
            fp3 = Chem.RDKFingerprint(m, 2, 7, 2048)
            assert tuple(fp2.GetOnBits()) == tuple(fp3.GetOnBits())
            fp2 = DataStructs.FoldFingerprint(fp2, 2)
            fp3 = Chem.RDKFingerprint(m, 2, 7, 1024)
            assert tuple(fp2.GetOnBits()) == tuple(fp3.GetOnBits())
            fp2 = DataStructs.FoldFingerprint(fp1, 4)
            assert tuple(fp2.GetOnBits()) == tuple(fp3.GetOnBits())
예제 #4
0
def fingerprint_reactions(reactions, fp_dim):
    fps = []
    for r in reactions:
        rxn = AllChem.ReactionFromSmarts(r)
        fp = AllChem.CreateStructuralFingerprintForReaction(rxn)
        fold_factor = fp.GetNumBits() // fp_dim
        fp = DataStructs.FoldFingerprint(fp, fold_factor)
        fps.append(fp)
    return fps
예제 #5
0
def FoldFingerprintToTargetDensity(fp, **fpArgs):
    nOn = fp.GetNumOnBits()
    nTot = fp.GetNumBits()
    while (float(nOn) / nTot < fpArgs['tgtDensity']):
        if nTot / 2 > fpArgs['minSize']:
            fp = DataStructs.FoldFingerprint(fp, 2)
            nOn = fp.GetNumOnBits()
            nTot = fp.GetNumBits()
        else:
            break
    return fp
예제 #6
0
파일: testBV.py 프로젝트: lmmentel/rdkit
 def test15FoldFingerprint(self):
   for cls in [DataStructs.ExplicitBitVect, DataStructs.SparseBitVect]:
     fp = cls(8)
     fp[0] = 1
     fp[1] = 1
     fp[6] = 1
     ffp = DataStructs.FoldFingerprint(fp)
     self.assertTrue(ffp[0])
     self.assertTrue(ffp[1])
     self.assertTrue(ffp[2])
     self.assertFalse(ffp[3])
예제 #7
0
def SMILE2Matrix(smile_list):
    # To ECFP6
    def ToECFP(id_smile):
        cid = id_smile[0]
        smile = id_smile[1]
        mol = Chem.MolFromSmiles(smile)
        return [cid, AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024)]

    fps = map(ToECFP, smile_list)

    np_fps = []
    ids = []
    for fp in fps:
        arr = np.zeros((1, ))
        vfp = DataStructs.FoldFingerprint(fp[1], 4)
        DataStructs.ConvertToNumpyArray(vfp, arr)
        ids.append(fp[0])
        np_fps.append(arr)

    return ids, np_fps