def GetDistanceMatrix(data, metric, isSimilarity=1): """ data should be a list of tuples with fingerprints in position 1 (the rest of the elements of the tuple are not important) Returns the symmetric distance matrix (see ML.Cluster.Resemblance for layout documentation) """ nPts = len(data) res = numpy.zeros((nPts * (nPts - 1) / 2), numpy.float) nSoFar = 0 for col in xrange(1, nPts): for row in xrange(col): fp1 = data[col][1] fp2 = data[row][1] if fp1.GetNumBits() > fp2.GetNumBits(): fp1 = DataStructs.FoldFingerprint( fp1, fp1.GetNumBits() / fp2.GetNumBits()) elif fp2.GetNumBits() > fp1.GetNumBits(): fp2 = DataStructs.FoldFingerprint( fp2, fp2.GetNumBits() / fp1.GetNumBits()) sim = metric(fp1, fp2) if isSimilarity: sim = 1. - sim res[nSoFar] = sim nSoFar += 1 return res
def GetDistanceMatrix(data, metric, isSimilarity=1): """ data should be a list of tuples with fingerprints in position 1 (the rest of the elements of the tuple are not important) Returns the symmetric distance matrix (see ML.Cluster.Resemblance for layout documentation) """ nPts = len(data) distsMatrix = numpy.zeros((nPts * (nPts - 1) // 2), dtype=numpy.float64) nSoFar = 0 for col in range(1, nPts): fp1 = data[col][1] nBits1 = fp1.GetNumBits() for row in range(col): fp2 = data[row][1] nBits2 = fp2.GetNumBits() if nBits1 > nBits2: fp1 = DataStructs.FoldFingerprint(fp1, nBits1 / nBits2) elif nBits2 > nBits1: fp2 = DataStructs.FoldFingerprint(fp2, nBits2 / nBits1) if isSimilarity: distsMatrix[nSoFar] = 1.0 - metric(fp1, fp2) else: distsMatrix[nSoFar] = metric(fp1, fp2) nSoFar += 1 return distsMatrix
def test6(self): """ check that the bits in a signature of size N which has been folded in half are the same as those in a signature of size N/2 """ smis = ['CCC(O)C(=O)O', 'c1ccccc1', 'C1CCCCC1', 'C1NCCCC1', 'CNCNCNC'] for smi in smis: m = Chem.MolFromSmiles(smi) fp1 = Chem.RDKFingerprint(m, 2, 7, 4096) fp2 = DataStructs.FoldFingerprint(fp1, 2) fp3 = Chem.RDKFingerprint(m, 2, 7, 2048) assert tuple(fp2.GetOnBits()) == tuple(fp3.GetOnBits()) fp2 = DataStructs.FoldFingerprint(fp2, 2) fp3 = Chem.RDKFingerprint(m, 2, 7, 1024) assert tuple(fp2.GetOnBits()) == tuple(fp3.GetOnBits()) fp2 = DataStructs.FoldFingerprint(fp1, 4) assert tuple(fp2.GetOnBits()) == tuple(fp3.GetOnBits())
def fingerprint_reactions(reactions, fp_dim): fps = [] for r in reactions: rxn = AllChem.ReactionFromSmarts(r) fp = AllChem.CreateStructuralFingerprintForReaction(rxn) fold_factor = fp.GetNumBits() // fp_dim fp = DataStructs.FoldFingerprint(fp, fold_factor) fps.append(fp) return fps
def FoldFingerprintToTargetDensity(fp, **fpArgs): nOn = fp.GetNumOnBits() nTot = fp.GetNumBits() while (float(nOn) / nTot < fpArgs['tgtDensity']): if nTot / 2 > fpArgs['minSize']: fp = DataStructs.FoldFingerprint(fp, 2) nOn = fp.GetNumOnBits() nTot = fp.GetNumBits() else: break return fp
def test15FoldFingerprint(self): for cls in [DataStructs.ExplicitBitVect, DataStructs.SparseBitVect]: fp = cls(8) fp[0] = 1 fp[1] = 1 fp[6] = 1 ffp = DataStructs.FoldFingerprint(fp) self.assertTrue(ffp[0]) self.assertTrue(ffp[1]) self.assertTrue(ffp[2]) self.assertFalse(ffp[3])
def SMILE2Matrix(smile_list): # To ECFP6 def ToECFP(id_smile): cid = id_smile[0] smile = id_smile[1] mol = Chem.MolFromSmiles(smile) return [cid, AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024)] fps = map(ToECFP, smile_list) np_fps = [] ids = [] for fp in fps: arr = np.zeros((1, )) vfp = DataStructs.FoldFingerprint(fp[1], 4) DataStructs.ConvertToNumpyArray(vfp, arr) ids.append(fp[0]) np_fps.append(arr) return ids, np_fps