def testCountBounds(self): m = Chem.MolFromSmiles( 'COc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1') fp1 = rdFingerprintGenerator.GetRDKitFPGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetRDKitFPGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) fp1 = rdFingerprintGenerator.GetTopologicalTorsionGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetTopologicalTorsionGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) fp1 = rdFingerprintGenerator.GetMorganGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetMorganGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) fp1 = rdFingerprintGenerator.GetAtomPairGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetAtomPairGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
def make_fp_generator(fp_type, settings): if fp_type == 'morgan': arguments = {'includeChirality': True} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetMorganGenerator(**arguments) elif fp_type == 'atom_pair': arguments = {'includeChirality': True} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetAtomPairGenerator(**arguments) elif fp_type == 'rdkit': arguments = {} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetRDKitFPGenerator(**arguments) elif fp_type == 'toplogical': arguments = {'includeChirality': True} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator( **arguments) else: fp_gen = False return fp_gen
def _get_morgan(self): gen_mo = rdFingerprintGenerator.GetMorganGenerator() fps = list() for mol in self.df['mols']: fp = np.array(gen_mo.GetFingerprint(mol)) fps.append(fp) fps = np.array(fps) return sparse.csr_matrix(fps).astype('int')
def get_morgan_features(mols): invGen =rdFingerprintGenerator.GetMorganFeatureAtomInvGen() gen_mo = rdFingerprintGenerator.GetMorganGenerator(atomInvariantsGenerator=invGen) fps = list() for mol in mols: fp = np.array(gen_mo.GetFingerprint(mol)) fps.append(fp) fps = np.array(fps) return sparse.csr_matrix(fps).astype('int')
def generate_fingeprints(smiles): # Load these here so they're only needed on the worker machines. from rdkit import Chem from rdkit.Chem import rdFingerprintGenerator morgan_fp = '' rdkit_fp = '' atompair_fp = '' tt_fp = '' try: mol = Chem.MolFromSmiles(smiles) # Morgan morgan_fp = rdFingerprintGenerator.GetMorganGenerator().GetFingerprint( mol).ToBase64() # Feature Morgan # TODO # RDKit rdkit_fp = rdFingerprintGenerator.GetRDKitFPGenerator().GetFingerprint( mol).ToBase64() # Layered # TODO # Atom pairs atompair_fp = rdFingerprintGenerator.GetAtomPairGenerator( ).GetFingerprint(mol).ToBase64() # MACCS # TODO # Topological Torsion tt_fp = rdFingerprintGenerator.GetTopologicalTorsionGenerator( ).GetFingerprint(mol).ToBase64() # Pattern # TODO # E-state # TODO except Exception as e: print(f'Exception {e} processing {smiles}') return {} # NOTE: add any new fingerprints to fingerprint_columns. return { 'morgan_fp': morgan_fp, 'rdkit_fp': rdkit_fp, 'atompair_fp': atompair_fp, 'tt_fp': tt_fp }
def testMorganGenerator(self): m = Chem.MolFromSmiles('CCCC(=O)O') g = rdFingerprintGenerator.GetMorganGenerator(3) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 14) invgen = rdFingerprintGenerator.GetMorganAtomInvGen() g = rdFingerprintGenerator.GetMorganGenerator(radius=3, atomInvariantsGenerator=invgen) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 14) invgen = rdFingerprintGenerator.GetMorganFeatureAtomInvGen() g = rdFingerprintGenerator.GetMorganGenerator(radius=3, atomInvariantsGenerator=invgen) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 13) ms = [Chem.MolFromSmiles(x, sanitize=False) for x in ('C1=CC=CN=N1', 'C1C=CC=NN=1')] for m in ms: m.UpdatePropertyCache() Chem.GetSymmSSSR(m) g = rdFingerprintGenerator.GetMorganGenerator(radius=2, useBondTypes=True) self.assertNotEqual(g.GetSparseCountFingerprint(ms[0]), g.GetSparseCountFingerprint(ms[1])) g = rdFingerprintGenerator.GetMorganGenerator(radius=2, useBondTypes=False) self.assertEqual(g.GetSparseCountFingerprint(ms[0]), g.GetSparseCountFingerprint(ms[1])) binvgen = rdFingerprintGenerator.GetMorganBondInvGen(useBondTypes=False) g2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, bondInvariantsGenerator=binvgen) self.assertEqual(g.GetSparseCountFingerprint(ms[0]), g2.GetSparseCountFingerprint(ms[0])) self.assertEqual(g.GetSparseCountFingerprint(ms[1]), g2.GetSparseCountFingerprint(ms[1]))
def write_fingerprints(sesh): option = st.selectbox('What fingerprint?', ( 'choose one', 'Morgan', )) if option in ['Morgan', 'MACCS']: if st.button('Generate fingerprints'): pbar = st.progress(0) gen_mo = rdFingerprintGenerator.GetMorganGenerator(512) for count, mol in enumerate(sesh.df['mols']): fp = rdFingerprintGenerator sesh.fp.append(gen_mo.GetFingerprint(mol)) pbar.progress(int((count + 1) / len(sesh.df) * 100)) sesh.fp = np.array(sesh.fp)
def calculate_fingerprint(mol, method='morgan'): rdmol = to_rdkit_Mol(mol) rdmol.UpdatePropertyCache(strict = False) Chem.GetSymmSSSR(rdmol) Dict = { 'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=64), 'morgan': rdFingerprintGenerator.GetMorganGenerator(fpSize=64), 'topological-torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=64), 'atom-pairs': rdFingerprintGenerator.GetAtomPairGenerator(fpSize=64), } rep = [] fp = [Dict[method].GetFingerprint(rdmol, fromAtoms=[i]) for i in range(len(mol.atoms))] for atomic_fp in fp: arr = np.zeros((1, )) DataStructs.ConvertToNumpyArray(atomic_fp, arr) rep.append(arr) return np.array(rep)
def generate_fingerprints_and_create_list(self): #generate fingerprints of predicted ligands and known ligands: gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048, radius=2) predicted_fps = [ gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules'] ] true_fps = [ gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules'] ] similarities = list() for count, mol in enumerate(predicted_fps): tanimoto_values = ([ DataStructs.TanimotoSimilarity(mol, i) for i in true_fps ]) index_of_highest = np.argmax(tanimoto_values) similarities.append(tanimoto_values[index_of_highest]) #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score sa_score = [ sascorer.calculateScore(i) for i in list(self.predicted['molecules']) ] #create a list holding the QED drug-likeness score #reference: https://doi.org/10.1038/nchem.1243 qeds = [qed(mol) for mol in self.predicted['molecules']] #create a list holding logp: logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']] #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536 params = FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK) catalog = FilterCatalog(params) self.brenk = np.array( [catalog.HasMatch(m) for m in self.predicted['molecules']]) #add these lists as columns to the 'predicted' pd.DataFrame self.predicted['similarities'] = similarities self.predicted['sa_score'] = sa_score self.predicted['qeds'] = qeds self.predicted['logp'] = logp print(self.predicted['logp'] < 6) shortlist_mask = ((self.predicted['similarities'] < 0.2) & (self.predicted['sa_score'] < 4) & (self.predicted['qeds'] > 0.25) & (self.predicted['logp'] < 6) & (~self.brenk))
def fingerprint_morgan(radius, fpSize=2048, count=False): """Morgan fingerprint of the specified size (list of int). Args: radius: The number of iterations to grow the fingerprint. fpSize: Size of the generated fingerprint (defaults to 2048). count: The default value of False will generate fingerprint bits (0 or 1) whereas a value of True will generate the count of each fingerprint value. """ generator = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=fpSize) if count: fingerprint_fn = _fingerprint_fn_count(generator) else: fingerprint_fn = _fingerprint_fn_bits(generator) fingerprint_fn.__name__ = f'fingerprint_morgan(radius={radius},' + \ f'fpSize={fpSize},count={count})' return fingerprint_fn
def __init__(self, radius, fpSize, IC50function, molFile): self.fpgen = rdFingerprintGenerator.GetMorganGenerator( radius=radius, fpSize=fpSize) self.getIC50 = IC50function self.molFile = molFile # Open SMILES file and convert each sequence to rdkit molecule with open(self.molFile) as f: raw_text = f.read() raw_data = raw_text.split("\n") mol_list = [Chem.MolFromSmiles(x) for x in raw_data[:1000]] self.ms = [rdMolStandardize.FragmentParent(x) for x in mol_list] # Get a count of the BRICS bonds within the molecules cntr = Counter() for m in self.ms: bbnds = BRICS.FindBRICSBonds(m) for aids, lbls in bbnds: cntr[lbls] += 1 freqs = sorted([(y, x) for x, y in cntr.items()], reverse=True) # Keep the top 10 bonds self.bondsToKeep = [y for x, y in freqs]
def testBulk(self): m1 = Chem.MolFromSmiles('CCC') m2 = Chem.MolFromSmiles('OCCCCC') m3 = Chem.MolFromSmiles('CCCCC') g = rdFingerprintGenerator.GetAtomPairGenerator() results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.AtomPairFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetMorganGenerator(2) results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.MorganFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetRDKitFPGenerator() results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.RDKitFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetTopologicalTorsionGenerator() results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetAtomPairGenerator() results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.AtomPairFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetMorganGenerator(2) results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.MorganFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetRDKitFPGenerator() results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.RDKitFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetTopologicalTorsionGenerator() results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetAtomPairGenerator() results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.AtomPairFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetMorganGenerator(2) results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.MorganFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetRDKitFPGenerator() results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.RDKitFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetTopologicalTorsionGenerator() results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3)
def testMorganGenerator(self): m = Chem.MolFromSmiles('CCCCC') g = rdFingerprintGenerator.GetMorganGenerator(3) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 7)
def fingerprint_molecules(mols): fps = [ rdFingerprintGenerator.GetMorganGenerator().GetFingerprint(mol) for mol in mols ] return fps