Ejemplo n.º 1
0
 def generate_fingerprints_and_create_list(self):
     #generate fingerprints of predicted ligands and known ligands:
     gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                        radius=2)
     predicted_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules']
     ]
     true_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules']
     ]
     similarities = list()
     for count, mol in enumerate(predicted_fps):
         tanimoto_values = ([
             DataStructs.TanimotoSimilarity(mol, i) for i in true_fps
         ])
         index_of_highest = np.argmax(tanimoto_values)
         similarities.append(tanimoto_values[index_of_highest])
     #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score
     sa_score = [
         sascorer.calculateScore(i)
         for i in list(self.predicted['molecules'])
     ]
     #create a list holding the QED drug-likeness score
     #reference: https://doi.org/10.1038/nchem.1243
     qeds = [qed(mol) for mol in self.predicted['molecules']]
     #create a list holding logp:
     logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']]
     #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536
     params = FilterCatalogParams()
     params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
     catalog = FilterCatalog(params)
     self.brenk = np.array(
         [catalog.HasMatch(m) for m in self.predicted['molecules']])
     #add these lists as columns to the 'predicted' pd.DataFrame
     self.predicted['similarities'] = similarities
     self.predicted['sa_score'] = sa_score
     self.predicted['qeds'] = qeds
     self.predicted['logp'] = logp
     print(self.predicted['logp'] < 6)
     shortlist_mask = ((self.predicted['similarities'] < 0.2) &
                       (self.predicted['sa_score'] < 4) &
                       (self.predicted['qeds'] > 0.25) &
                       (self.predicted['logp'] < 6) & (~self.brenk))
Ejemplo n.º 2
0
params[1].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
params[2].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
params[3].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
catalog_pains = FilterCatalog(params[0])
catalog_painsA = FilterCatalog(params[1])
catalog_painsB = FilterCatalog(params[2])
catalog_painsC = FilterCatalog(params[3])

entries_pains = []
entries_painsA = []
entries_painsB = []
entries_painsC = []
entries_painsAll = []

for i in range(len(data)):
    if catalog_pains.HasMatch(data[i]):
        entries_pains.append(i)
    if catalog_painsA.HasMatch(data[i]):
        entries_painsA.append(i)
    if catalog_painsB.HasMatch(data[i]):
        entries_painsB.append(i)
    if catalog_painsC.HasMatch(data[i]):
        entries_painsC.append(i)
    if catalog_pains.HasMatch(data[i]) or catalog_painsA.HasMatch(
            data[i]) or catalog_painsB.HasMatch(
                data[i]) or catalog_painsC.HasMatch(data[i]):
        entries_painsAll.append(i)

print(len(entries_pains), len(entries_painsA), len(entries_painsB),
      len(entries_painsC), len(entries_painsAll))
print(