def testRootedAtomPairs(self): m = Chem.MolFromSmiles('Oc1ccccc1') fp1 = rdMD.GetAtomPairFingerprint(m) fp2 = rdMD.GetAtomPairFingerprint(m, fromAtoms=(0, )) nz1 = fp1.GetNonzeroElements() nz2 = fp2.GetNonzeroElements() for k, v in nz2.items(): self.assertTrue(v <= nz1[k])
def testAtomPairs(self): m = Chem.MolFromSmiles('CCC') fp1 = rdMD.GetAtomPairFingerprint(m) fp2 = rdMD.GetAtomPairFingerprint(m, minLength=1, maxLength=2) nz1 = fp1.GetNonzeroElements() self.assertEqual(len(nz1), 2) nz2 = fp2.GetNonzeroElements() self.assertEqual(len(nz2), 2) fp2 = rdMD.GetAtomPairFingerprint(m, minLength=1, maxLength=1) nz2 = fp2.GetNonzeroElements() self.assertEqual(len(nz2), 1)
def testAtomPairTypesChirality(self): mols = [ Chem.MolFromSmiles(x) for x in ("CC(F)Cl", "C[C@@H](F)Cl", "C[C@H](F)Cl") ] self.assertEqual(rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1)), rdMD.GetAtomPairAtomCode(mols[1].GetAtomWithIdx(1))) self.assertEqual(rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1)), rdMD.GetAtomPairAtomCode(mols[2].GetAtomWithIdx(1))) self.assertEqual( rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1), includeChirality=True), rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1))) self.assertNotEqual( rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1), includeChirality=True), rdMD.GetAtomPairAtomCode(mols[1].GetAtomWithIdx(1), includeChirality=True)) self.assertNotEqual( rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1), includeChirality=True), rdMD.GetAtomPairAtomCode(mols[2].GetAtomWithIdx(1), includeChirality=True)) self.assertNotEqual( rdMD.GetAtomPairAtomCode(mols[1].GetAtomWithIdx(1), includeChirality=True), rdMD.GetAtomPairAtomCode(mols[2].GetAtomWithIdx(1), includeChirality=True)) fps = [rdMD.GetAtomPairFingerprint(x) for x in mols] chiralFps = [ rdMD.GetAtomPairFingerprint(x, includeChirality=True) for x in mols ] for mol, fp, cfp in zip(mols, fps, chiralFps): ac0 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(0)) ac1 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(1)) self.assertTrue( rdMD.GetAtomPairCode(ac0, ac1, 1) in fp.GetNonzeroElements()) ac0 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(0), includeChirality=True) ac1 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(1), includeChirality=True) self.assertFalse( rdMD.GetAtomPairCode(ac0, ac1, 1, includeChirality=True) in fp.GetNonzeroElements()) self.assertTrue( rdMD.GetAtomPairCode(ac0, ac1, 1, includeChirality=True) in cfp.GetNonzeroElements())
def GetAtomPairFingerprintAsBitVect(mol): """ Returns the Atom-pair fingerprint for a molecule as a SparseBitVect. Note that this doesn't match the standard definition of atom pairs, which uses counts of the pairs, not just their presence. **Arguments**: - mol: a molecule **Returns**: a SparseBitVect >>> m = Chem.MolFromSmiles('CCC') >>> v = [ pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1), ... pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2), ... ] >>> v.sort() >>> fp = GetAtomPairFingerprintAsBitVect(m) >>> list(fp.GetOnBits())==v True """ res = DataStructs.SparseBitVect(fpLen) fp = rdMolDescriptors.GetAtomPairFingerprint(mol) for val in fp.GetNonzeroElements().keys(): res.SetBit(val) return res
def GenerateAtomPairsFingerprints(Mols): """Generate AtomPairs fingerprints.""" MiscUtil.PrintInfo("\nGenerating AtomPairs fingerprints...") MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"] MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"] UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"][ "UseChirality"] if OptionsInfo["GenerateBitVectFingerints"]: # Generate ExplicitBitVect fingerprints... FPSize = 2048 BitsPerHash = 4 MolsFingerprints = [ rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( Mol, minLength=MinLength, maxLength=MaxLength, includeChirality=UseChirality, nBits=FPSize, nBitsPerEntry=BitsPerHash) for Mol in Mols ] else: # Generate IntSparseIntVect fingerprints... MolsFingerprints = [ rdMolDescriptors.GetAtomPairFingerprint( Mol, minLength=MinLength, maxLength=MaxLength, includeChirality=UseChirality) for Mol in Mols ] return MolsFingerprints
def testPairValues(self): import base64 testD=(('CCCO',b'AQAAAAQAAAAAAIAABgAAACGECAABAAAAIoQIAAEAAABBhAgAAQAAACNEGAABAAAAQUQYAAEAAABC\nRBgAAQAAAA==\n'), ('CNc1ccco1',b'AQAAAAQAAAAAAIAAEAAAACOECgABAAAAJIQKAAIAAABBhQoAAgAAAEKFCgABAAAAIsQKAAEAAABB\nxQoAAQAAAELFCgACAAAAIYQQAAEAAABChRAAAQAAAEOFEAACAAAAYYUQAAEAAAAjhBoAAQAAAEGF\nGgABAAAAQoUaAAIAAABhhRoAAQAAAEKIGgABAAAA\n'), ) for smi,txt in testD: pkl = base64.decodestring(txt) fp = rdMD.GetAtomPairFingerprint(Chem.MolFromSmiles(smi)) fp2 = DataStructs.IntSparseIntVect(pkl) self.assertEqual(DataStructs.DiceSimilarity(fp,fp2),1.0) self.assertEqual(fp,fp2)
def testAtomPairOptions(self): m1 = Chem.MolFromSmiles('c1ccccc1') m2 = Chem.MolFromSmiles('c1ccccn1') fp1 = rdMD.GetAtomPairFingerprint(m1) fp2 = rdMD.GetAtomPairFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2)
def GenerateAtomPairsFingerprints(Mols): """Generate AtomPairs fingerprints.""" MiscUtil.PrintInfo("\nGenerating AtomPairs %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"] MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"] UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"]["UseChirality"] FPSize = OptionsInfo["FingerprintsParams"]["AtomPairs"]["FPSize"] BitsPerHash = OptionsInfo["FingerprintsParams"]["AtomPairs"]["BitsPerHash"] if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): # Generate ExplicitBitVect fingerprints... MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) MolsFingerprints = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality, nBits = FPSize, nBitsPerEntry = BitsPerHash) for Mol in Mols] else: # Generate IntSparseIntVect fingerprints... MolsFingerprints = [rdMolDescriptors.GetAtomPairFingerprint(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality) for Mol in Mols] return MolsFingerprints
and fingerprint function. Parameters: probeMol -- the probe molecule fpFunction -- the fingerprint function predictionFunction -- the prediction function of the ML model kwargs -- additional arguments for drawing """ weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) weights, maxWeight = GetStandardizedWeights(weights) fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) return fig, maxWeight apDict = {} apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia) apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia) apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia) # usage: lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry) def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4): """ Calculates the atom pairs fingerprint with the torsions of atomId removed. Parameters: mol -- the molecule of interest atomId -- the atom to remove the pairs for (if -1, no pair is removed) fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair
def calculateMol(self, m, smiles, internalParsing=False): return list(rd.GetAtomPairFingerprint(m, minLength=self.minPathLen, maxLength=self.maxPathLen, nBits=self.nbits))
embed_graph = graph.values #molecular fingerprint #https://www.rdkit.org/UGM/2012/Landrum_RDKit_UGM.Fingerprints.Final.pptx.pdf finger_mqn = [] finger_morgan = [] finger_maccs = [] finger_ap = [] for i in smiles: mol = AllChem.MolFromSmiles(i) finger_mqn.append(np.array(Descriptors.MQNs_(mol))) finger_maccs.append(np.array(Descriptors.GetMACCSKeysFingerprint((mol)))) #finger_morgan.append(np.array(Descriptors.GetMorganFingerprint((mol)))) finger_ap.append(np.array(Descriptors.GetAtomPairFingerprint((mol)))) ### names = 'vec_spec,vec_smiles,embed_fn,finger_mqn,finger_maccs,finger_ap,embed_graph'.split( ',') data = [ vec_spec, vec_smiles, embed_fn, finger_mqn, finger_maccs, finger_ap, embed_graph ] counter = 0 for i in data: try: res = do_pca(i) plt.scatter(res[0], res[1], label=counter, alpha=.4) plt.savefig('figs/pca_%s.pdf' % names[counter]) plt.clf()