def testRemoveEntry(self): params = FilterCatalog.FilterCatalogParams( FilterCatalogParams.FilterCatalogs.ZINC) catalog = FilterCatalog.FilterCatalog(params) entry = catalog.GetEntryWithIdx(10) desc = entry.GetDescription() count = 0 descs = set([ catalog.GetEntryWithIdx(i).GetDescription() for i in range(catalog.GetNumEntries()) ]) for i in range(catalog.GetNumEntries()): if catalog.GetEntryWithIdx(i).GetDescription() == desc: count += 1 print("Count", count) sz = catalog.GetNumEntries() print("*" * 44) self.assertTrue(catalog.RemoveEntry(entry)) del entry self.assertTrue(catalog.GetNumEntries() == sz - 1) descs2 = set([ catalog.GetEntryWithIdx(i).GetDescription() for i in range(catalog.GetNumEntries()) ]) print(descs - descs2) newcount = 0 for i in range(catalog.GetNumEntries()): if catalog.GetEntryWithIdx(i).GetDescription() == desc: newcount += 1 self.assertEquals(count, newcount + 1)
def testThreadedRunner(self): path = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol', 'test_data', 'pains.smi') with open(path) as f: smiles = [f.strip() for f in f.readlines()][1:] self.assertEquals(len(smiles), 3) params = FilterCatalog.FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A) params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B) params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C) fc = FilterCatalog.FilterCatalog(params) results = FilterCatalog.RunFilterCatalog(fc, smiles) self.assertEquals(len(results), 3) descriptions = [ "hzone_phenol_A(479)", "cyano_imine_B(17)", "keto_keto_gamma(5)" ] for i, res in enumerate(results): self.assertTrue(len(res) > 0) self.assertEquals(res[0].GetDescription(), descriptions[i]) # Test with some bad input smiles = ['mydoghasfleas'] results = FilterCatalog.RunFilterCatalog(fc, smiles, numThreads=3) self.assertEquals(len(results[0]), 1) self.assertEquals(results[0][0].GetDescription(), "no valid RDKit molecule")
def test0FilterCatalogEntry(self): matcher = FilterCatalog.SmartsMatcher("Aromatic carbon chain") self.assertTrue(not matcher.IsValid()) pat = Chem.MolFromSmarts("c:c:c:c:c") matcher.SetPattern(pat) matcher.SetMinCount(1) entry = FilterCatalog.FilterCatalogEntry("Bar", matcher) if FilterCatalog.FilterCatalogCanSerialize(): pickle = entry.Serialize() else: pickle = None self.assertTrue(entry.GetDescription() == "Bar") self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.GetMaxCount() == 2**32 - 1) self.assertTrue(matcher.IsValid()) entry.SetDescription("Foo") self.assertTrue(entry.GetDescription() == "Foo") mol = Chem.MolFromSmiles("c1ccccc1") self.assertTrue(matcher.HasMatch(mol)) matcher = FilterCatalog.SmartsMatcher(pat) self.assertEqual(str(matcher), "Unnamed SmartsMatcher") self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.HasMatch(mol)) matches = matcher.GetMatches(mol) matcher = FilterCatalog.ExclusionList() matcher.SetExclusionPatterns([matcher]) self.assertTrue(not matcher.HasMatch(mol))
def testFunctionalGroupHierarchy(self): fc = FilterCatalog.GetFunctionalGroupHierarchy() matches = [(Chem.MolFromSmiles("CCl"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic']), (Chem.MolFromSmiles("c1ccccc1Cl"), ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic']), (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen.Aromatic']), ( Chem.MolFromSmiles("CBr"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic', 'Halogen.Bromine.Aliphatic'])] catalogs = [fc] if FilterCatalog.FilterCatalogCanSerialize(): pickle = fc.Serialize() fc2 = FilterCatalog.FilterCatalog(pickle) catalogs.append(fc2) for fc in catalogs: # test GetMatches API for mol, res in matches: entries = list(fc.GetMatches(mol)) for entry in entries: hits = [match.filterMatch.GetName() for match in entry.GetFilterMatches(mol)] self.assertEquals(res, hits) # test GetFilterMatches API for mol, res in matches: self.assertEquals(res, [match.filterMatch.GetName() for match in fc.GetFilterMatches(mol)])
def testAddEntry(self): sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1) entry = FilterCatalog.FilterCatalogEntry("Bar", sm) fc = FilterCatalog.FilterCatalog() fc.AddEntry(entry) del entry del fc
def testThreadedPythonFilter(self): class MWFilter(FilterCatalog.FilterMatcher): def __init__(self, minMw, maxMw): FilterCatalog.FilterMatcher.__init__(self, "MW violation") self.minMw = minMw self.maxMw = maxMw def IsValid(self): return True def HasMatch(self, mol): mw = rdMolDescriptors.CalcExactMolWt(mol) res = not self.minMw <= mw <= self.maxMw Chem.MolFromSmiles("---") Chem.LogErrorMsg("dasfsadf") return res path = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol', 'test_data', 'pains.smi') with open(path) as f: smiles = [f.strip() for f in f.readlines()][1:] print("1") self.assertEqual(len(smiles), 3) print("2") entry = FilterCatalog.FilterCatalogEntry("MW Violation", MWFilter(100, 500)) fc = FilterCatalog.FilterCatalog() fc.AddEntry(entry) self.assertTrue(entry.GetDescription() == "MW Violation") print("running") results = FilterCatalog.RunFilterCatalog(fc, smiles * 10, numThreads=3)
def testFlattenedFunctionalGroupHierarchy(self): queryDefs = FilterCatalog.GetFlattenedFunctionalGroupHierarchy() items = sorted(queryDefs.items()) matches = [(Chem.MolFromSmiles("CCl"), [ 'Halogen', 'Halogen.Aliphatic', 'Halogen.NotFluorine', 'Halogen.NotFluorine.Aliphatic' ]), (Chem.MolFromSmiles("c1ccccc1Cl"), [ 'Halogen', 'Halogen.Aromatic', 'Halogen.NotFluorine', 'Halogen.NotFluorine.Aromatic' ]), (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen', 'Halogen.Aromatic']), (Chem.MolFromSmiles("CBr"), [ 'Halogen', 'Halogen.Aliphatic', 'Halogen.Bromine', 'Halogen.Bromine.Aliphatic', 'Halogen.NotFluorine', 'Halogen.NotFluorine.Aliphatic', ])] # test the normalized groups for mol, res in matches: hits = [name for name, pat in items if mol.HasSubstructMatch(pat)] self.assertEquals(hits, res) queryDefs = FilterCatalog.GetFlattenedFunctionalGroupHierarchy( normalized=True) items = sorted(queryDefs.items()) matches = [(Chem.MolFromSmiles("CCl"), [ 'halogen', 'halogen.aliphatic', 'halogen.notfluorine', 'halogen.notfluorine.aliphatic' ]), (Chem.MolFromSmiles("c1ccccc1Cl"), [ 'halogen', 'halogen.aromatic', 'halogen.notfluorine', 'halogen.notfluorine.aromatic' ]), (Chem.MolFromSmiles("c1ccccc1F"), ['halogen', 'halogen.aromatic']), (Chem.MolFromSmiles("CBr"), [ 'halogen', 'halogen.aliphatic', 'halogen.bromine', 'halogen.bromine.aliphatic', 'halogen.notfluorine', 'halogen.notfluorine.aliphatic', ])] for mol, res in matches: hits = [name for name, pat in items if mol.HasSubstructMatch(pat)] self.assertEquals(hits, res)
def testZinc(self): params = FilterCatalog.FilterCatalogParams(FilterCatalogParams.FilterCatalogs.ZINC) catalog = FilterCatalog.FilterCatalog(params) self.assertTrue(catalog.GetNumEntries()) m = Chem.MolFromSmiles("C" * 41) entry = catalog.GetFirstMatch(m) self.assertTrue(entry.GetDescription(), "Non-Hydrogen_atoms") m = Chem.MolFromSmiles("CN" * 20) entry = catalog.GetFirstMatch(m) self.assertEquals(catalog.GetFirstMatch(m), None)
def test4CountTests(self): matcher = FilterCatalog.SmartsMatcher("Carbon", "[#6]", 0, 2) m = Chem.MolFromSmiles("N") self.assertTrue(matcher.HasMatch(m)) m = Chem.MolFromSmiles("C") self.assertTrue(matcher.HasMatch(m)) m = Chem.MolFromSmiles("CC") self.assertTrue(matcher.HasMatch(m)) m = Chem.MolFromSmiles("CCC") self.assertFalse(matcher.HasMatch(m)) matcher = FilterCatalog.SmartsMatcher("Carbon", "[#6]", 1, 2) m = Chem.MolFromSmiles("N") self.assertFalse(matcher.HasMatch(m))
def testSmartsMatcherAPI(self): sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1) sm2 = FilterCatalog.SmartsMatcher("ok # carbons", "[#6]", 0, 40) sm3 = FilterCatalog.FilterMatchOps.Not(sm2) m = Chem.MolFromSmiles("C" * 40) self.assertFalse(sm.HasMatch(m)) self.assertTrue(sm2.HasMatch(m)) self.assertFalse(sm3.HasMatch(m)) m = Chem.MolFromSmiles("C" * 41) self.assertTrue(sm.HasMatch(m)) self.assertFalse(sm2.HasMatch(m)) self.assertTrue(sm3.HasMatch(m))
def get_filters_list(self): """ This loads in the filters which will be used. Returns: :returns: rdkit.Chem.rdfiltercatalog.FilterCatalog filters: A set of RDKit Filters """ # Make a list of all the different PAINS Filters. PAINS should include # PAINS_A,PAINS_B, and PAINS_C, but because RDKit documentation # doesn't specify this explicitly we have included all 4 of the PAINS # FilterCatalogs for precaution. params_PAINS_A = FilterCatalogParams() params_PAINS_A.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A) params_PAINS_B = FilterCatalogParams() params_PAINS_B.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B) params_PAINS_C = FilterCatalogParams() params_PAINS_C.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C) params_PAINS = FilterCatalogParams() params_PAINS.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS) params_list = [ params_PAINS_A, params_PAINS_B, params_PAINS_C, params_PAINS ] filters_list = [] for param in params_list: filter = FilterCatalog.FilterCatalog(param) filters_list.append(filter) return filters_list
def test17bAddRecursiveQueriesToReaction(self): from rdkit.Chem import FilterCatalog rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]") self.assertTrue(rxn) rxn.Initialize() rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'carboxylicacid') querydefs = {k.lower(): v for k, v in FilterCatalog.GetFlattenedFunctionalGroupHierarchy().items()} self.assertTrue('CarboxylicAcid' in FilterCatalog.GetFlattenedFunctionalGroupHierarchy()) rxn.AddRecursiveQueriesToReaction(querydefs, 'query') q = rxn.GetReactantTemplate(0) m = Chem.MolFromSmiles('C(=O)[O-].N') self.assertTrue(m.HasSubstructMatch(q)) m = Chem.MolFromSmiles('C.N') self.assertFalse(m.HasSubstructMatch(q))
def pains(self): """ Baell and Holloway (2010) New Substructure Filters for Removal of Pan Assay Interference Compounds (PAINS) from Screening Libraries and for Their Exclusion in Bioassays This filter finds promiscuous compounds that are likely to show activity regardless of the target. Returns: Boolean of whether the molecule triggers the PAINS filter. """ params = FilterCatalog.FilterCatalogParams() params.AddCatalog( FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS) catalog = FilterCatalog.FilterCatalog(params) return catalog.HasMatch(self.mol)
def test3ExclusionFilter(self): mol = Chem.MolFromSmiles("c1ccccc1") pat = Chem.MolFromSmarts("c:c:c:c:c") matcher = FilterCatalog.SmartsMatcher("Five aromatic carbons", pat) self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.HasMatch(mol)) matches = matcher.GetMatches(mol) exclusionFilter = FilterCatalog.ExclusionList() exclusionFilter.AddPattern(matcher) self.assertFalse(exclusionFilter.HasMatch(mol)) matches2 = exclusionFilter.GetMatches(mol) self.assertTrue(matches) self.assertFalse(matches2)
def brenk(self): """ Brenk (2008) Lessons Learnt from Assembling Screening Libraries for Drug Discovery for Neglected Diseases Brenk's Structural Alert filter finds fragments "putatively toxic, chemically reactive, metabolically unstable or to bear properties responsible for poor pharmacokinetics." Returns: Boolean of whether the molecule triggers the Brenk filter. """ params = FilterCatalog.FilterCatalogParams() params.AddCatalog( FilterCatalog.FilterCatalogParams.FilterCatalogs.BRENK) catalog = FilterCatalog.FilterCatalog(params) return catalog.HasMatch(self.mol)
def generate_fingerprints_and_create_list(self): #generate fingerprints of predicted ligands and known ligands: gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048, radius=2) predicted_fps = [ gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules'] ] true_fps = [ gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules'] ] similarities = list() for count, mol in enumerate(predicted_fps): tanimoto_values = ([ DataStructs.TanimotoSimilarity(mol, i) for i in true_fps ]) index_of_highest = np.argmax(tanimoto_values) similarities.append(tanimoto_values[index_of_highest]) #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score sa_score = [ sascorer.calculateScore(i) for i in list(self.predicted['molecules']) ] #create a list holding the QED drug-likeness score #reference: https://doi.org/10.1038/nchem.1243 qeds = [qed(mol) for mol in self.predicted['molecules']] #create a list holding logp: logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']] #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536 params = FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK) catalog = FilterCatalog(params) self.brenk = np.array( [catalog.HasMatch(m) for m in self.predicted['molecules']]) #add these lists as columns to the 'predicted' pd.DataFrame self.predicted['similarities'] = similarities self.predicted['sa_score'] = sa_score self.predicted['qeds'] = qeds self.predicted['logp'] = logp print(self.predicted['logp'] < 6) shortlist_mask = ((self.predicted['similarities'] < 0.2) & (self.predicted['sa_score'] < 4) & (self.predicted['qeds'] > 0.25) & (self.predicted['logp'] < 6) & (~self.brenk))
def testPyFilter(self): class MyFilterMatcher(FilterCatalog.FilterMatcher): def IsValid(self): return True def HasMatch(self, mol): return True def GetMatches(self, mol, vect): v = FilterCatalog.MatchTypeVect() v.append(FilterCatalog.IntPair(1, 1)) match = FilterCatalog.FilterMatch(self, v) vect.append(match) return True func = MyFilterMatcher("FilterMatcher") self.assertEquals(func.GetName(), "FilterMatcher") mol = Chem.MolFromSmiles("c1ccccc1") self.assertEquals(func.HasMatch(mol), True) or_match = FilterMatchOps.Or(func, func) self.assertEquals([[tuple(x) for x in filtermatch.atomPairs] for filtermatch in or_match.GetMatches(mol)], [[(1, 1)], [(1, 1)]]) not_match = FilterMatchOps.Not(func) print(not_match) self.assertEquals(not_match.HasMatch(mol), False) # test memory del func self.assertEquals(not_match.HasMatch(mol), False) self.assertEquals([[tuple(x) for x in filtermatch.atomPairs] for filtermatch in not_match.GetMatches(mol)], []) entry = FilterCatalog.FilterCatalogEntry( "Bar", MyFilterMatcher("FilterMatcher")) fc = FilterCatalog.FilterCatalog() fc.AddEntry(entry) catalogEntry = fc.GetFirstMatch(mol) print(catalogEntry.GetDescription())
def pains(filtered_df): filteredData = filtered_df params = FilterCatalogParams() # Build a catalog from all PAINS (A, B and C) params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS) catalog = FilterCatalog(params) # Create empty dataframes for filtered data rdkit_highLightFramePAINS = pd.DataFrame(columns=('CompID', 'CompMol', 'unwantedID')) rdkit_noPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles','pIC50')) rdkit_withPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles', 'pIC50','unwantedID')) # For index and row in the filtered df for i,row in filteredData.iterrows(): curMol = Chem.MolFromSmiles(row.smiles) # Current molecule match = False # Set match to false rdkit_PAINSList = [] # Get the first match entry = catalog.GetFirstMatch(curMol) if entry!=None: # Add name of current unwanted subsftructure to list rdkit_PAINSList.append(entry.GetDescription().capitalize()) # Add relevant matching information to dataframe rdkit_highLightFramePAINS.loc[len(rdkit_highLightFramePAINS)] = [row.molecule_chembl_id, curMol, entry.GetDescription().capitalize()] match = True if not match: # Add to frame of PAINS free compounds rdkit_noPAINS.loc[len(rdkit_noPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50] else: # Add to frame of compounds that contain PAINS # Put the relevant information in the dataframe with the unwanted substructures rdkit_withPAINS.loc[len(rdkit_withPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50, entry.GetDescription().capitalize()] df = rdkit_noPAINS # Drop unnecessary columns ## df_new = df.drop(['units', 'IC50'], axis=1) df_new = df # Create molecules from smiles and their fingerprints create_mol(df_new, 2048) # Add column for activity df_new['active'] = np.zeros(len(df_new)) # Mark every molecule as active with an pIC50 of > 6.3 df_new.loc[df_new[df_new.pIC50 >= 6.3].index, 'active'] = 1.0 return df_new
def buildFilterCatalog(): inhousefilter = pd.read_csv( 'SubstructureFilter_HitTriaging_wPubChemExamples.csv') inhouseFiltersCat = FilterCatalog.FilterCatalog() for i in range(inhousefilter.shape[0]): mincount = 1 if inhousefilter['MIN_COUNT'][i] != 0: mincount = int(inhousefilter['MIN_COUNT'][i]) pname = inhousefilter['PATTERN_NAME'][i] sname = inhousefilter['SET_NAME'][i] pname_final = '{0}_min({1})__{2}__{3}__{4}'.format( pname, mincount, inhousefilter['SEVERITY_SCORE'][i], inhousefilter['COVALENT'][i], inhousefilter['SPECIAL_MOL'][i]) fil = FilterCatalog.SmartsMatcher(pname_final, inhousefilter['SMARTS'][i], mincount) inhouseFiltersCat.AddEntry( FilterCatalog.FilterCatalogEntry(pname_final, fil)) inhouseFiltersCat.GetEntry(i).SetProp('Scope', sname) return inhouseFiltersCat
def painspredict(thefile, theoutput): os.remove('output.txt') f1 = open(theoutput, 'w+') mySMILESinput = pd.DataFrame(columns=['ID', 'my_smiles']) params = FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH) catalog = FilterCatalog(params) suppl = Chem.SmilesMolSupplier(thefile) with open(thefile, 'r') as inf: first_line = inf.readline() inf.close() with open(thefile, 'a') as inf: inf.write(first_line) inf.close() inf = open(thefile, 'r') sub_strct = [line.rstrip().split(" ") for line in inf] ms = [x for x in suppl if x is not None] i = 0 for mol in ms: entry = catalog.GetFirstMatch(mol) sphybrid = Chem.rdMolDescriptors.CalcFractionCSP3(mol) if (entry is not None): print(i, sub_strct[i], "PAINS", entry.GetDescription(), "Fsp3", sphybrid, file=f1) else: print(i, sub_strct[i], "PAINS OK", "Fsp3", sphybrid, file=f1) i += 1
def testMWFilter(self): class MWFilter(FilterCatalog.FilterMatcher): def __init__(self, minMw, maxMw): FilterCatalog.FilterMatcher.__init__(self, "MW violation") self.minMw = minMw self.maxMw = maxMw def IsValid(self): return True def HasMatch(self, mol): mw = rdMolDescriptors.CalcExactMolWt(mol) return not self.minMw <= mw <= self.maxMw entry = FilterCatalog.FilterCatalogEntry("MW Violation", MWFilter(100, 500)) fc = FilterCatalog.FilterCatalog() fc.AddEntry(entry) self.assertTrue(entry.GetDescription() == "MW Violation") mol = Chem.MolFromSmiles("c1ccccc1") catalogEntry = fc.GetFirstMatch(mol)
def test1FilterMatchOps(self): mol = Chem.MolFromSmiles("c1ccccc1") pat = Chem.MolFromSmarts("c:c:c:c:c") matcher = FilterCatalog.SmartsMatcher("Five aromatic carbons", pat) self.assertTrue(matcher.GetMinCount() == 1) self.assertTrue(matcher.HasMatch(mol)) matches = matcher.GetMatches(mol) matcher2 = FilterCatalog.ExclusionList() matcher2.SetExclusionPatterns([matcher]) self.assertTrue(not matcher2.HasMatch(mol)) and_match = FilterMatchOps.And(matcher, matcher2) self.assertTrue(not and_match.HasMatch(mol)) not_match = FilterMatchOps.Not(and_match) self.assertTrue(not_match.HasMatch(mol)) or_match = FilterMatchOps.Or(matcher, matcher2) self.assertTrue(or_match.HasMatch(mol)) print(and_match) print(or_match) print(not_match)
def get_filters(self): """ This loads in the filters which will be used. Returns: :returns: rdkit.Chem.rdfiltercatalog.FilterCatalog filters: A set of RDKit Filters """ # Make a list of the NIH filter. params = FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH) # This is our set of all the NIH filters filters = FilterCatalog.FilterCatalog(params) return filters
smiles_.append(row[0]) data = get_data_from_smiles(smiles) data = data.mols(flatten=True) params = [ FilterCatalogParams(), FilterCatalogParams(), FilterCatalogParams(), FilterCatalogParams() ] params[0].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS) params[1].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A) params[2].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B) params[3].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C) catalog_pains = FilterCatalog(params[0]) catalog_painsA = FilterCatalog(params[1]) catalog_painsB = FilterCatalog(params[2]) catalog_painsC = FilterCatalog(params[3]) entries_pains = [] entries_painsA = [] entries_painsB = [] entries_painsC = [] entries_painsAll = [] for i in range(len(data)): if catalog_pains.HasMatch(data[i]): entries_pains.append(i) if catalog_painsA.HasMatch(data[i]): entries_painsA.append(i)
def hierarchy(matcher): node = FilterCatalog.FilterHierarchyMatcher(matcher) self.assertEquals(matcher.GetName(), node.GetName()) return node
def testFilterHierarchyMatcher(self): # test root = FilterCatalog.FilterHierarchyMatcher() sm = h = FilterCatalog.SmartsMatcher( "Halogen", "[$([F,Cl,Br,I]-!@[#6]);!$([F,Cl,Br,I]" "-!@C-!@[F,Cl,Br,I]);!$([F,Cl,Br,I]-[C,S]" "(=[O,S,N]))]", 1) root.SetPattern(sm) def hierarchy(matcher): node = FilterCatalog.FilterHierarchyMatcher(matcher) self.assertEquals(matcher.GetName(), node.GetName()) return node sm = FilterCatalog.SmartsMatcher("Halogen.Aromatic", "[F,Cl,Br,I;$(*-!@c)]") root.AddChild(hierarchy(sm)) sm = FilterCatalog.SmartsMatcher( "Halogen.NotFluorine", "[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]" "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S]" "(=[O,S,N]))]") node = hierarchy(sm) halogen_notf_children = [ hierarchy(x) for x in [ FilterCatalog.SmartsMatcher( "Halogen.NotFluorine.Aliphatic", "[$([Cl,Br,I]-!@C);!$([Cl,Br,I]" "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]"), FilterCatalog.SmartsMatcher("Halogen.NotFluorine.Aromatic", "[$([Cl,Br,I]-!@c)]") ] ] for child in halogen_notf_children: node.AddChild(child) root.AddChild(node) sm = FilterCatalog.SmartsMatcher( "Halogen.Bromine", "[Br;$([Br]-!@[#6]);!$([Br]-!@C-!@[F,Cl,Br,I])" ";!$([Br]-[C,S](=[O,S,N]))]", 1) node = hierarchy(sm) halogen_bromine_children = [ hierarchy(x) for x in [ FilterCatalog.SmartsMatcher( "Halogen.Bromine.Aliphatic", "[Br;$(Br-!@C);!$(Br-!@C-!@[F,Cl,Br,I]);" "!$(Br-[C,S](=[O,S,N]))]"), FilterCatalog.SmartsMatcher("Halogen.Bromine.Aromatic", "[Br;$(Br-!@c)]"), FilterCatalog.SmartsMatcher("Halogen.Bromine.BromoKetone", "[Br;$(Br-[CH2]-C(=O)-[#6])]") ] ] for child in halogen_bromine_children: node.AddChild(child) root.AddChild(node) m = Chem.MolFromSmiles("CCl") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 1) self.assertEquals([match.filterMatch.GetName() for match in res], ['Halogen.NotFluorine.Aliphatic']) m = Chem.MolFromSmiles("c1ccccc1Cl") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 2) m = Chem.MolFromSmiles("c1ccccc1Br") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 3) self.assertEquals([match.filterMatch.GetName() for match in res], [ 'Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic', 'Halogen.Bromine.Aromatic' ]) m = Chem.MolFromSmiles("c1ccccc1F") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals(len(res), 1) self.assertEquals([match.filterMatch.GetName() for match in res], ['Halogen.Aromatic']) m = Chem.MolFromSmiles("CBr") assert h.HasMatch(m) res = root.GetMatches(m) self.assertEquals( [match.filterMatch.GetName() for match in res], ['Halogen.NotFluorine.Aliphatic', 'Halogen.Bromine.Aliphatic'])
def GetMatches(self, mol, vect): v = FilterCatalog.MatchTypeVect() v.append(FilterCatalog.IntPair(1, 1)) match = FilterCatalog.FilterMatch(self, v) vect.append(match) return True
def __init__(self, n_cores=-1): params = FilterCatalog.FilterCatalogParams() params.AddCatalog(FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS) self.filter = FilterCatalog.FilterCatalog(params) self.n_cores = n_cores
from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import FilterCatalog from rdkit.Chem.FilterCatalog import FilterCatalogParams import json, gzip from ijson import items from django.core.exceptions import ValidationError from threading import Thread import CloseableQueue #from multiprocessing.dummy import Pool from mol_parsing.functions import find_lib_type, read_input, write_json_results, write_results from mol_parsing.rdkit_parse import parse_mol_json, generate_mols_from_json from StringIO import StringIO params = FilterCatalog.FilterCatalogParams() params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A) params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B) params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C) catalog = FilterCatalog.FilterCatalog(params) def request_params(request): """Function to handle the request parameters""" #screen_lib, mol_type = find_lib_type(request) screen_lib, mol_type = read_input(request) if "filter" in request.GET: filter = request.GET["filter"] if not (filter == "INCLUDE_MATCHING" or filter == "INCLUDE_NON_MATCHING" or filter == "INCLUDE_ALL"): raise ValidationError("Invalid filter value " + filter)
def test2FilterCatalogTest(self): tests = ((FilterCatalogParams.FilterCatalogs.PAINS_A, 16), (FilterCatalogParams.FilterCatalogs.PAINS_B, 55), (FilterCatalogParams.FilterCatalogs.PAINS_C, 409), (FilterCatalogParams.FilterCatalogs.PAINS, 409 + 16 + 55)) for catalog_idx, num in tests: params = FilterCatalog.FilterCatalogParams() print("*" * 44) print("Testing:", catalog_idx, int(catalog_idx)) self.assertTrue(params.AddCatalog(catalog_idx)) catalog1 = FilterCatalog.FilterCatalog(params) if FilterCatalog.FilterCatalogCanSerialize(): pickle = catalog1.Serialize() catalog2 = FilterCatalog.FilterCatalog(pickle) catalogs = [catalog1, catalog2] else: catalogs = [catalog1] catalogs.append(FilterCatalog.FilterCatalog(catalog_idx)) for index, catalog in enumerate(catalogs): self.assertEqual(catalog.GetNumEntries(), num) if catalog_idx in [ FilterCatalogParams.FilterCatalogs.PAINS_A, FilterCatalogParams.FilterCatalogs.PAINS ]: # http://chemistrycompass.com/chemsearch/58909/ mol = Chem.MolFromSmiles( "O=C(Cn1cnc2c1c(=O)n(C)c(=O)n2C)N/N=C/c1c(O)ccc2c1cccc2" ) entry = catalog.GetFirstMatch(mol) for key in entry.GetPropList(): if key == "Reference": self.assertEquals( entry.GetProp(key), "Baell JB, Holloway GA. New Substructure Filters for " "Removal of Pan Assay Interference Compounds (PAINS) " "from Screening Libraries and for Their Exclusion in " "Bioassays. J Med Chem 53 (2010) 2719D40. " "doi:10.1021/jm901137j.") elif key == "Scope": self.assertEquals(entry.GetProp(key), "PAINS filters (family A)") self.assertEqual(entry.GetDescription(), "hzone_phenol_A(479)") result = catalog.GetMatches(mol) self.assertEquals(len(result), 1) for entry in result: for filtermatch in entry.GetFilterMatches(mol): self.assertEquals(str(filtermatch.filterMatch), "hzone_phenol_A(479)") atomPairs = [ tuple(x) for x in filtermatch.atomPairs ] self.assertEquals(atomPairs, [(0, 23), (1, 22), (2, 20), (3, 19), (4, 25), (5, 24), (6, 18), (7, 17), (8, 16), (9, 21)]) elif catalog_idx == FilterCatalogParams.FilterCatalogs.PAINS_B: mol = Chem.MolFromSmiles( "FC(F)(F)Oc1ccc(NN=C(C#N)C#N)cc1") # CHEMBL457504 entry = catalog.GetFirstMatch(mol) self.assertTrue(entry) self.assertEquals(entry.GetDescription(), "cyano_imine_B(17)") elif catalog_idx == FilterCatalogParams.FilterCatalogs.PAINS_C: mol = Chem.MolFromSmiles( "O=C1C2OC2C(=O)c3cc4CCCCc4cc13") # CHEMBL476649 entry = catalog.GetFirstMatch(mol) self.assertTrue(entry) self.assertEquals(entry.GetDescription(), "keto_keto_gamma(5)")