Ejemplo n.º 1
0
    def testRemoveEntry(self):
        params = FilterCatalog.FilterCatalogParams(
            FilterCatalogParams.FilterCatalogs.ZINC)
        catalog = FilterCatalog.FilterCatalog(params)
        entry = catalog.GetEntryWithIdx(10)
        desc = entry.GetDescription()
        count = 0
        descs = set([
            catalog.GetEntryWithIdx(i).GetDescription()
            for i in range(catalog.GetNumEntries())
        ])
        for i in range(catalog.GetNumEntries()):
            if catalog.GetEntryWithIdx(i).GetDescription() == desc:
                count += 1
        print("Count", count)
        sz = catalog.GetNumEntries()
        print("*" * 44)
        self.assertTrue(catalog.RemoveEntry(entry))
        del entry
        self.assertTrue(catalog.GetNumEntries() == sz - 1)

        descs2 = set([
            catalog.GetEntryWithIdx(i).GetDescription()
            for i in range(catalog.GetNumEntries())
        ])
        print(descs - descs2)

        newcount = 0
        for i in range(catalog.GetNumEntries()):
            if catalog.GetEntryWithIdx(i).GetDescription() == desc:
                newcount += 1
        self.assertEquals(count, newcount + 1)
Ejemplo n.º 2
0
    def testThreadedRunner(self):
        path = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol',
                            'test_data', 'pains.smi')
        with open(path) as f:
            smiles = [f.strip() for f in f.readlines()][1:]

        self.assertEquals(len(smiles), 3)
        params = FilterCatalog.FilterCatalogParams()
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
        fc = FilterCatalog.FilterCatalog(params)

        results = FilterCatalog.RunFilterCatalog(fc, smiles)
        self.assertEquals(len(results), 3)

        descriptions = [
            "hzone_phenol_A(479)", "cyano_imine_B(17)", "keto_keto_gamma(5)"
        ]

        for i, res in enumerate(results):
            self.assertTrue(len(res) > 0)
            self.assertEquals(res[0].GetDescription(), descriptions[i])

        # Test with some bad input
        smiles = ['mydoghasfleas']
        results = FilterCatalog.RunFilterCatalog(fc, smiles, numThreads=3)
        self.assertEquals(len(results[0]), 1)
        self.assertEquals(results[0][0].GetDescription(),
                          "no valid RDKit molecule")
Ejemplo n.º 3
0
    def test0FilterCatalogEntry(self):
        matcher = FilterCatalog.SmartsMatcher("Aromatic carbon chain")
        self.assertTrue(not matcher.IsValid())

        pat = Chem.MolFromSmarts("c:c:c:c:c")
        matcher.SetPattern(pat)
        matcher.SetMinCount(1)

        entry = FilterCatalog.FilterCatalogEntry("Bar", matcher)
        if FilterCatalog.FilterCatalogCanSerialize():
            pickle = entry.Serialize()
        else:
            pickle = None

        self.assertTrue(entry.GetDescription() == "Bar")
        self.assertTrue(matcher.GetMinCount() == 1)
        self.assertTrue(matcher.GetMaxCount() == 2**32 - 1)
        self.assertTrue(matcher.IsValid())

        entry.SetDescription("Foo")
        self.assertTrue(entry.GetDescription() == "Foo")

        mol = Chem.MolFromSmiles("c1ccccc1")
        self.assertTrue(matcher.HasMatch(mol))

        matcher = FilterCatalog.SmartsMatcher(pat)
        self.assertEqual(str(matcher), "Unnamed SmartsMatcher")
        self.assertTrue(matcher.GetMinCount() == 1)
        self.assertTrue(matcher.HasMatch(mol))
        matches = matcher.GetMatches(mol)

        matcher = FilterCatalog.ExclusionList()
        matcher.SetExclusionPatterns([matcher])
        self.assertTrue(not matcher.HasMatch(mol))
Ejemplo n.º 4
0
  def testFunctionalGroupHierarchy(self):
    fc = FilterCatalog.GetFunctionalGroupHierarchy()

    matches = [(Chem.MolFromSmiles("CCl"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic']),
               (Chem.MolFromSmiles("c1ccccc1Cl"),
                ['Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic']),
               (Chem.MolFromSmiles("c1ccccc1F"), ['Halogen.Aromatic']), (
                 Chem.MolFromSmiles("CBr"), ['Halogen.Aliphatic', 'Halogen.NotFluorine.Aliphatic',
                                             'Halogen.Bromine.Aliphatic'])]

    catalogs = [fc]
    if FilterCatalog.FilterCatalogCanSerialize():
      pickle = fc.Serialize()
      fc2 = FilterCatalog.FilterCatalog(pickle)
      catalogs.append(fc2)

    for fc in catalogs:
      # test GetMatches API
      for mol, res in matches:
        entries = list(fc.GetMatches(mol))
        for entry in entries:
          hits = [match.filterMatch.GetName() for match in entry.GetFilterMatches(mol)]
          self.assertEquals(res, hits)

      # test GetFilterMatches API
      for mol, res in matches:
        self.assertEquals(res, [match.filterMatch.GetName() for match in fc.GetFilterMatches(mol)])
Ejemplo n.º 5
0
 def testAddEntry(self):
     sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1)
     entry = FilterCatalog.FilterCatalogEntry("Bar", sm)
     fc = FilterCatalog.FilterCatalog()
     fc.AddEntry(entry)
     del entry
     del fc
Ejemplo n.º 6
0
    def testThreadedPythonFilter(self):
        class MWFilter(FilterCatalog.FilterMatcher):
            def __init__(self, minMw, maxMw):
                FilterCatalog.FilterMatcher.__init__(self, "MW violation")
                self.minMw = minMw
                self.maxMw = maxMw

            def IsValid(self):
                return True

            def HasMatch(self, mol):
                mw = rdMolDescriptors.CalcExactMolWt(mol)
                res = not self.minMw <= mw <= self.maxMw
                Chem.MolFromSmiles("---")
                Chem.LogErrorMsg("dasfsadf")
                return res

        path = os.path.join(os.environ['RDBASE'], 'Code', 'GraphMol',
                            'test_data', 'pains.smi')
        with open(path) as f:
            smiles = [f.strip() for f in f.readlines()][1:]

        print("1")
        self.assertEqual(len(smiles), 3)

        print("2")
        entry = FilterCatalog.FilterCatalogEntry("MW Violation",
                                                 MWFilter(100, 500))
        fc = FilterCatalog.FilterCatalog()
        fc.AddEntry(entry)
        self.assertTrue(entry.GetDescription() == "MW Violation")

        print("running")
        results = FilterCatalog.RunFilterCatalog(fc, smiles * 10, numThreads=3)
Ejemplo n.º 7
0
    def testFlattenedFunctionalGroupHierarchy(self):
        queryDefs = FilterCatalog.GetFlattenedFunctionalGroupHierarchy()
        items = sorted(queryDefs.items())

        matches = [(Chem.MolFromSmiles("CCl"), [
            'Halogen', 'Halogen.Aliphatic', 'Halogen.NotFluorine',
            'Halogen.NotFluorine.Aliphatic'
        ]),
                   (Chem.MolFromSmiles("c1ccccc1Cl"), [
                       'Halogen', 'Halogen.Aromatic', 'Halogen.NotFluorine',
                       'Halogen.NotFluorine.Aromatic'
                   ]),
                   (Chem.MolFromSmiles("c1ccccc1F"),
                    ['Halogen', 'Halogen.Aromatic']),
                   (Chem.MolFromSmiles("CBr"), [
                       'Halogen',
                       'Halogen.Aliphatic',
                       'Halogen.Bromine',
                       'Halogen.Bromine.Aliphatic',
                       'Halogen.NotFluorine',
                       'Halogen.NotFluorine.Aliphatic',
                   ])]

        # test the normalized groups
        for mol, res in matches:
            hits = [name for name, pat in items if mol.HasSubstructMatch(pat)]
            self.assertEquals(hits, res)
        queryDefs = FilterCatalog.GetFlattenedFunctionalGroupHierarchy(
            normalized=True)

        items = sorted(queryDefs.items())

        matches = [(Chem.MolFromSmiles("CCl"), [
            'halogen', 'halogen.aliphatic', 'halogen.notfluorine',
            'halogen.notfluorine.aliphatic'
        ]),
                   (Chem.MolFromSmiles("c1ccccc1Cl"), [
                       'halogen', 'halogen.aromatic', 'halogen.notfluorine',
                       'halogen.notfluorine.aromatic'
                   ]),
                   (Chem.MolFromSmiles("c1ccccc1F"),
                    ['halogen', 'halogen.aromatic']),
                   (Chem.MolFromSmiles("CBr"), [
                       'halogen',
                       'halogen.aliphatic',
                       'halogen.bromine',
                       'halogen.bromine.aliphatic',
                       'halogen.notfluorine',
                       'halogen.notfluorine.aliphatic',
                   ])]

        for mol, res in matches:
            hits = [name for name, pat in items if mol.HasSubstructMatch(pat)]
            self.assertEquals(hits, res)
Ejemplo n.º 8
0
  def testZinc(self):
    params = FilterCatalog.FilterCatalogParams(FilterCatalogParams.FilterCatalogs.ZINC)
    catalog = FilterCatalog.FilterCatalog(params)
    self.assertTrue(catalog.GetNumEntries())

    m = Chem.MolFromSmiles("C" * 41)
    entry = catalog.GetFirstMatch(m)
    self.assertTrue(entry.GetDescription(), "Non-Hydrogen_atoms")

    m = Chem.MolFromSmiles("CN" * 20)
    entry = catalog.GetFirstMatch(m)
    self.assertEquals(catalog.GetFirstMatch(m), None)
Ejemplo n.º 9
0
    def test4CountTests(self):
        matcher = FilterCatalog.SmartsMatcher("Carbon", "[#6]", 0, 2)
        m = Chem.MolFromSmiles("N")
        self.assertTrue(matcher.HasMatch(m))
        m = Chem.MolFromSmiles("C")
        self.assertTrue(matcher.HasMatch(m))
        m = Chem.MolFromSmiles("CC")
        self.assertTrue(matcher.HasMatch(m))
        m = Chem.MolFromSmiles("CCC")
        self.assertFalse(matcher.HasMatch(m))

        matcher = FilterCatalog.SmartsMatcher("Carbon", "[#6]", 1, 2)
        m = Chem.MolFromSmiles("N")
        self.assertFalse(matcher.HasMatch(m))
Ejemplo n.º 10
0
    def testSmartsMatcherAPI(self):
        sm = FilterCatalog.SmartsMatcher("Too many carbons", "[#6]", 40 + 1)
        sm2 = FilterCatalog.SmartsMatcher("ok # carbons", "[#6]", 0, 40)
        sm3 = FilterCatalog.FilterMatchOps.Not(sm2)

        m = Chem.MolFromSmiles("C" * 40)
        self.assertFalse(sm.HasMatch(m))
        self.assertTrue(sm2.HasMatch(m))
        self.assertFalse(sm3.HasMatch(m))

        m = Chem.MolFromSmiles("C" * 41)
        self.assertTrue(sm.HasMatch(m))
        self.assertFalse(sm2.HasMatch(m))
        self.assertTrue(sm3.HasMatch(m))
Ejemplo n.º 11
0
    def get_filters_list(self):
        """
        This loads in the filters which will be used.

        Returns:
        :returns: rdkit.Chem.rdfiltercatalog.FilterCatalog filters: A set of
            RDKit Filters
        """

        # Make a list of all the different PAINS Filters. PAINS should include
        # PAINS_A,PAINS_B, and PAINS_C, but because RDKit documentation
        # doesn't specify this explicitly we have included all 4 of the PAINS
        # FilterCatalogs for precaution.
        params_PAINS_A = FilterCatalogParams()
        params_PAINS_A.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
        params_PAINS_B = FilterCatalogParams()
        params_PAINS_B.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
        params_PAINS_C = FilterCatalogParams()
        params_PAINS_C.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
        params_PAINS = FilterCatalogParams()
        params_PAINS.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)

        params_list = [
            params_PAINS_A, params_PAINS_B, params_PAINS_C, params_PAINS
        ]
        filters_list = []
        for param in params_list:
            filter = FilterCatalog.FilterCatalog(param)
            filters_list.append(filter)

        return filters_list
Ejemplo n.º 12
0
  def test17bAddRecursiveQueriesToReaction(self):
    from rdkit.Chem import FilterCatalog
    rxn = rdChemReactions.ReactionFromSmarts("[C:1][O:2].[N:3]>>[C:1][N:2]")
    self.assertTrue(rxn)
    rxn.Initialize()
    rxn.GetReactantTemplate(0).GetAtomWithIdx(0).SetProp('query', 'carboxylicacid')
    querydefs = {k.lower(): v
                 for k, v in FilterCatalog.GetFlattenedFunctionalGroupHierarchy().items()}

    self.assertTrue('CarboxylicAcid' in FilterCatalog.GetFlattenedFunctionalGroupHierarchy())
    rxn.AddRecursiveQueriesToReaction(querydefs, 'query')
    q = rxn.GetReactantTemplate(0)
    m = Chem.MolFromSmiles('C(=O)[O-].N')
    self.assertTrue(m.HasSubstructMatch(q))
    m = Chem.MolFromSmiles('C.N')
    self.assertFalse(m.HasSubstructMatch(q))
Ejemplo n.º 13
0
    def pains(self):
        """
        Baell and Holloway (2010) New Substructure Filters for Removal of Pan
        Assay Interference Compounds (PAINS) from Screening Libraries and for
        Their Exclusion in Bioassays

        This filter finds promiscuous compounds that are likely to show activity
        regardless of the target.

        Returns:
            Boolean of whether the molecule triggers the PAINS filter.
        """
        params = FilterCatalog.FilterCatalogParams()
        params.AddCatalog(
            FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS)
        catalog = FilterCatalog.FilterCatalog(params)
        return catalog.HasMatch(self.mol)
Ejemplo n.º 14
0
    def test3ExclusionFilter(self):
        mol = Chem.MolFromSmiles("c1ccccc1")

        pat = Chem.MolFromSmarts("c:c:c:c:c")
        matcher = FilterCatalog.SmartsMatcher("Five aromatic carbons", pat)
        self.assertTrue(matcher.GetMinCount() == 1)
        self.assertTrue(matcher.HasMatch(mol))
        matches = matcher.GetMatches(mol)

        exclusionFilter = FilterCatalog.ExclusionList()
        exclusionFilter.AddPattern(matcher)
        self.assertFalse(exclusionFilter.HasMatch(mol))

        matches2 = exclusionFilter.GetMatches(mol)

        self.assertTrue(matches)
        self.assertFalse(matches2)
Ejemplo n.º 15
0
    def brenk(self):
        """
        Brenk (2008) Lessons Learnt from Assembling Screening Libraries for
        Drug Discovery for Neglected Diseases

        Brenk's Structural Alert filter finds fragments "putatively toxic,
        chemically reactive, metabolically unstable or to bear properties
        responsible for poor pharmacokinetics."

        Returns:
            Boolean of whether the molecule triggers the Brenk filter.
        """
        params = FilterCatalog.FilterCatalogParams()
        params.AddCatalog(
            FilterCatalog.FilterCatalogParams.FilterCatalogs.BRENK)
        catalog = FilterCatalog.FilterCatalog(params)
        return catalog.HasMatch(self.mol)
Ejemplo n.º 16
0
 def generate_fingerprints_and_create_list(self):
     #generate fingerprints of predicted ligands and known ligands:
     gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                        radius=2)
     predicted_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules']
     ]
     true_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules']
     ]
     similarities = list()
     for count, mol in enumerate(predicted_fps):
         tanimoto_values = ([
             DataStructs.TanimotoSimilarity(mol, i) for i in true_fps
         ])
         index_of_highest = np.argmax(tanimoto_values)
         similarities.append(tanimoto_values[index_of_highest])
     #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score
     sa_score = [
         sascorer.calculateScore(i)
         for i in list(self.predicted['molecules'])
     ]
     #create a list holding the QED drug-likeness score
     #reference: https://doi.org/10.1038/nchem.1243
     qeds = [qed(mol) for mol in self.predicted['molecules']]
     #create a list holding logp:
     logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']]
     #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536
     params = FilterCatalogParams()
     params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
     catalog = FilterCatalog(params)
     self.brenk = np.array(
         [catalog.HasMatch(m) for m in self.predicted['molecules']])
     #add these lists as columns to the 'predicted' pd.DataFrame
     self.predicted['similarities'] = similarities
     self.predicted['sa_score'] = sa_score
     self.predicted['qeds'] = qeds
     self.predicted['logp'] = logp
     print(self.predicted['logp'] < 6)
     shortlist_mask = ((self.predicted['similarities'] < 0.2) &
                       (self.predicted['sa_score'] < 4) &
                       (self.predicted['qeds'] > 0.25) &
                       (self.predicted['logp'] < 6) & (~self.brenk))
Ejemplo n.º 17
0
    def testPyFilter(self):
        class MyFilterMatcher(FilterCatalog.FilterMatcher):
            def IsValid(self):
                return True

            def HasMatch(self, mol):
                return True

            def GetMatches(self, mol, vect):
                v = FilterCatalog.MatchTypeVect()
                v.append(FilterCatalog.IntPair(1, 1))
                match = FilterCatalog.FilterMatch(self, v)
                vect.append(match)
                return True

        func = MyFilterMatcher("FilterMatcher")
        self.assertEquals(func.GetName(), "FilterMatcher")
        mol = Chem.MolFromSmiles("c1ccccc1")
        self.assertEquals(func.HasMatch(mol), True)

        or_match = FilterMatchOps.Or(func, func)
        self.assertEquals([[tuple(x) for x in filtermatch.atomPairs]
                           for filtermatch in or_match.GetMatches(mol)],
                          [[(1, 1)], [(1, 1)]])

        not_match = FilterMatchOps.Not(func)
        print(not_match)
        self.assertEquals(not_match.HasMatch(mol), False)
        # test memory
        del func

        self.assertEquals(not_match.HasMatch(mol), False)
        self.assertEquals([[tuple(x) for x in filtermatch.atomPairs]
                           for filtermatch in not_match.GetMatches(mol)], [])

        entry = FilterCatalog.FilterCatalogEntry(
            "Bar", MyFilterMatcher("FilterMatcher"))
        fc = FilterCatalog.FilterCatalog()
        fc.AddEntry(entry)

        catalogEntry = fc.GetFirstMatch(mol)
        print(catalogEntry.GetDescription())
Ejemplo n.º 18
0
def pains(filtered_df):
	filteredData = filtered_df
	params = FilterCatalogParams()
	# Build a catalog from all PAINS (A, B and C)
	params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
	catalog = FilterCatalog(params)
	# Create empty dataframes for filtered data
	rdkit_highLightFramePAINS = pd.DataFrame(columns=('CompID', 'CompMol', 'unwantedID'))
	rdkit_noPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles','pIC50'))
	rdkit_withPAINS = pd.DataFrame(columns=('ChEMBL_ID', 'smiles', 'pIC50','unwantedID'))
	# For index and row in the filtered df
	for i,row in filteredData.iterrows():
		curMol = Chem.MolFromSmiles(row.smiles) # Current molecule
		match = False # Set match to false
		rdkit_PAINSList = []
		# Get the first match
		entry = catalog.GetFirstMatch(curMol)
		if entry!=None:
			# Add name of current unwanted subsftructure to list
			rdkit_PAINSList.append(entry.GetDescription().capitalize())
			# Add relevant matching information to dataframe
			rdkit_highLightFramePAINS.loc[len(rdkit_highLightFramePAINS)] = [row.molecule_chembl_id, curMol,
			entry.GetDescription().capitalize()]
			match = True
		if not match:
			# Add to frame of PAINS free compounds
			rdkit_noPAINS.loc[len(rdkit_noPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50]
		else: 
			# Add to frame of compounds that contain PAINS
			# Put the relevant information in the dataframe with the unwanted substructures
			rdkit_withPAINS.loc[len(rdkit_withPAINS)] = [row.molecule_chembl_id, row.smiles, row.pIC50, entry.GetDescription().capitalize()]
	df = rdkit_noPAINS
	# Drop unnecessary columns
	## df_new = df.drop(['units', 'IC50'], axis=1)
	df_new = df
	# Create molecules from smiles and their fingerprints
	create_mol(df_new, 2048)
	# Add column for activity
	df_new['active'] = np.zeros(len(df_new))
	# Mark every molecule as active with an pIC50 of > 6.3
	df_new.loc[df_new[df_new.pIC50 >= 6.3].index, 'active'] = 1.0
	return df_new
Ejemplo n.º 19
0
def buildFilterCatalog():

    inhousefilter = pd.read_csv(
        'SubstructureFilter_HitTriaging_wPubChemExamples.csv')
    inhouseFiltersCat = FilterCatalog.FilterCatalog()
    for i in range(inhousefilter.shape[0]):
        mincount = 1
        if inhousefilter['MIN_COUNT'][i] != 0:
            mincount = int(inhousefilter['MIN_COUNT'][i])
        pname = inhousefilter['PATTERN_NAME'][i]
        sname = inhousefilter['SET_NAME'][i]
        pname_final = '{0}_min({1})__{2}__{3}__{4}'.format(
            pname, mincount, inhousefilter['SEVERITY_SCORE'][i],
            inhousefilter['COVALENT'][i], inhousefilter['SPECIAL_MOL'][i])
        fil = FilterCatalog.SmartsMatcher(pname_final,
                                          inhousefilter['SMARTS'][i], mincount)
        inhouseFiltersCat.AddEntry(
            FilterCatalog.FilterCatalogEntry(pname_final, fil))
        inhouseFiltersCat.GetEntry(i).SetProp('Scope', sname)
    return inhouseFiltersCat
Ejemplo n.º 20
0
def painspredict(thefile, theoutput):

    os.remove('output.txt')
    f1 = open(theoutput, 'w+')

    mySMILESinput = pd.DataFrame(columns=['ID', 'my_smiles'])

    params = FilterCatalogParams()
    params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH)
    catalog = FilterCatalog(params)
    suppl = Chem.SmilesMolSupplier(thefile)
    with open(thefile, 'r') as inf:
        first_line = inf.readline()
        inf.close()

    with open(thefile, 'a') as inf:

        inf.write(first_line)
        inf.close()

    inf = open(thefile, 'r')

    sub_strct = [line.rstrip().split(" ") for line in inf]

    ms = [x for x in suppl if x is not None]
    i = 0

    for mol in ms:
        entry = catalog.GetFirstMatch(mol)
        sphybrid = Chem.rdMolDescriptors.CalcFractionCSP3(mol)
        if (entry is not None):
            print(i,
                  sub_strct[i],
                  "PAINS",
                  entry.GetDescription(),
                  "Fsp3",
                  sphybrid,
                  file=f1)
        else:
            print(i, sub_strct[i], "PAINS OK", "Fsp3", sphybrid, file=f1)
        i += 1
Ejemplo n.º 21
0
    def testMWFilter(self):
        class MWFilter(FilterCatalog.FilterMatcher):
            def __init__(self, minMw, maxMw):
                FilterCatalog.FilterMatcher.__init__(self, "MW violation")
                self.minMw = minMw
                self.maxMw = maxMw

            def IsValid(self):
                return True

            def HasMatch(self, mol):
                mw = rdMolDescriptors.CalcExactMolWt(mol)
                return not self.minMw <= mw <= self.maxMw

        entry = FilterCatalog.FilterCatalogEntry("MW Violation",
                                                 MWFilter(100, 500))
        fc = FilterCatalog.FilterCatalog()
        fc.AddEntry(entry)
        self.assertTrue(entry.GetDescription() == "MW Violation")

        mol = Chem.MolFromSmiles("c1ccccc1")
        catalogEntry = fc.GetFirstMatch(mol)
Ejemplo n.º 22
0
    def test1FilterMatchOps(self):
        mol = Chem.MolFromSmiles("c1ccccc1")

        pat = Chem.MolFromSmarts("c:c:c:c:c")
        matcher = FilterCatalog.SmartsMatcher("Five aromatic carbons", pat)
        self.assertTrue(matcher.GetMinCount() == 1)
        self.assertTrue(matcher.HasMatch(mol))
        matches = matcher.GetMatches(mol)

        matcher2 = FilterCatalog.ExclusionList()
        matcher2.SetExclusionPatterns([matcher])
        self.assertTrue(not matcher2.HasMatch(mol))

        and_match = FilterMatchOps.And(matcher, matcher2)
        self.assertTrue(not and_match.HasMatch(mol))
        not_match = FilterMatchOps.Not(and_match)
        self.assertTrue(not_match.HasMatch(mol))
        or_match = FilterMatchOps.Or(matcher, matcher2)
        self.assertTrue(or_match.HasMatch(mol))

        print(and_match)
        print(or_match)
        print(not_match)
Ejemplo n.º 23
0
    def get_filters(self):
        """
        This loads in the filters which will be used.

        Returns:
        :returns: rdkit.Chem.rdfiltercatalog.FilterCatalog filters: A set of
            RDKit Filters
        """

        # Make a list of the NIH filter.
        params = FilterCatalogParams()
        params.AddCatalog(FilterCatalogParams.FilterCatalogs.NIH)
        # This is our set of all the NIH filters
        filters = FilterCatalog.FilterCatalog(params)
        return filters
Ejemplo n.º 24
0
        smiles_.append(row[0])

data = get_data_from_smiles(smiles)
data = data.mols(flatten=True)

params = [
    FilterCatalogParams(),
    FilterCatalogParams(),
    FilterCatalogParams(),
    FilterCatalogParams()
]
params[0].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
params[1].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
params[2].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
params[3].AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
catalog_pains = FilterCatalog(params[0])
catalog_painsA = FilterCatalog(params[1])
catalog_painsB = FilterCatalog(params[2])
catalog_painsC = FilterCatalog(params[3])

entries_pains = []
entries_painsA = []
entries_painsB = []
entries_painsC = []
entries_painsAll = []

for i in range(len(data)):
    if catalog_pains.HasMatch(data[i]):
        entries_pains.append(i)
    if catalog_painsA.HasMatch(data[i]):
        entries_painsA.append(i)
Ejemplo n.º 25
0
 def hierarchy(matcher):
     node = FilterCatalog.FilterHierarchyMatcher(matcher)
     self.assertEquals(matcher.GetName(), node.GetName())
     return node
Ejemplo n.º 26
0
    def testFilterHierarchyMatcher(self):
        # test
        root = FilterCatalog.FilterHierarchyMatcher()
        sm = h = FilterCatalog.SmartsMatcher(
            "Halogen", "[$([F,Cl,Br,I]-!@[#6]);!$([F,Cl,Br,I]"
            "-!@C-!@[F,Cl,Br,I]);!$([F,Cl,Br,I]-[C,S]"
            "(=[O,S,N]))]", 1)
        root.SetPattern(sm)

        def hierarchy(matcher):
            node = FilterCatalog.FilterHierarchyMatcher(matcher)
            self.assertEquals(matcher.GetName(), node.GetName())
            return node

        sm = FilterCatalog.SmartsMatcher("Halogen.Aromatic",
                                         "[F,Cl,Br,I;$(*-!@c)]")
        root.AddChild(hierarchy(sm))

        sm = FilterCatalog.SmartsMatcher(
            "Halogen.NotFluorine", "[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]"
            "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S]"
            "(=[O,S,N]))]")
        node = hierarchy(sm)
        halogen_notf_children = [
            hierarchy(x) for x in [
                FilterCatalog.SmartsMatcher(
                    "Halogen.NotFluorine.Aliphatic",
                    "[$([Cl,Br,I]-!@C);!$([Cl,Br,I]"
                    "-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]"),
                FilterCatalog.SmartsMatcher("Halogen.NotFluorine.Aromatic",
                                            "[$([Cl,Br,I]-!@c)]")
            ]
        ]
        for child in halogen_notf_children:
            node.AddChild(child)
        root.AddChild(node)

        sm = FilterCatalog.SmartsMatcher(
            "Halogen.Bromine", "[Br;$([Br]-!@[#6]);!$([Br]-!@C-!@[F,Cl,Br,I])"
            ";!$([Br]-[C,S](=[O,S,N]))]", 1)
        node = hierarchy(sm)
        halogen_bromine_children = [
            hierarchy(x) for x in [
                FilterCatalog.SmartsMatcher(
                    "Halogen.Bromine.Aliphatic",
                    "[Br;$(Br-!@C);!$(Br-!@C-!@[F,Cl,Br,I]);"
                    "!$(Br-[C,S](=[O,S,N]))]"),
                FilterCatalog.SmartsMatcher("Halogen.Bromine.Aromatic",
                                            "[Br;$(Br-!@c)]"),
                FilterCatalog.SmartsMatcher("Halogen.Bromine.BromoKetone",
                                            "[Br;$(Br-[CH2]-C(=O)-[#6])]")
            ]
        ]
        for child in halogen_bromine_children:
            node.AddChild(child)

        root.AddChild(node)

        m = Chem.MolFromSmiles("CCl")
        assert h.HasMatch(m)
        res = root.GetMatches(m)
        self.assertEquals(len(res), 1)
        self.assertEquals([match.filterMatch.GetName() for match in res],
                          ['Halogen.NotFluorine.Aliphatic'])

        m = Chem.MolFromSmiles("c1ccccc1Cl")
        assert h.HasMatch(m)
        res = root.GetMatches(m)
        self.assertEquals(len(res), 2)

        m = Chem.MolFromSmiles("c1ccccc1Br")
        assert h.HasMatch(m)
        res = root.GetMatches(m)
        self.assertEquals(len(res), 3)

        self.assertEquals([match.filterMatch.GetName() for match in res], [
            'Halogen.Aromatic', 'Halogen.NotFluorine.Aromatic',
            'Halogen.Bromine.Aromatic'
        ])

        m = Chem.MolFromSmiles("c1ccccc1F")
        assert h.HasMatch(m)
        res = root.GetMatches(m)
        self.assertEquals(len(res), 1)

        self.assertEquals([match.filterMatch.GetName() for match in res],
                          ['Halogen.Aromatic'])

        m = Chem.MolFromSmiles("CBr")
        assert h.HasMatch(m)
        res = root.GetMatches(m)

        self.assertEquals(
            [match.filterMatch.GetName() for match in res],
            ['Halogen.NotFluorine.Aliphatic', 'Halogen.Bromine.Aliphatic'])
Ejemplo n.º 27
0
 def GetMatches(self, mol, vect):
     v = FilterCatalog.MatchTypeVect()
     v.append(FilterCatalog.IntPair(1, 1))
     match = FilterCatalog.FilterMatch(self, v)
     vect.append(match)
     return True
Ejemplo n.º 28
0
 def __init__(self, n_cores=-1):
     params = FilterCatalog.FilterCatalogParams()
     params.AddCatalog(FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS)
     self.filter = FilterCatalog.FilterCatalog(params)
     self.n_cores = n_cores
Ejemplo n.º 29
0
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import FilterCatalog
from rdkit.Chem.FilterCatalog import FilterCatalogParams
import json, gzip
from ijson import items
from django.core.exceptions import ValidationError
from threading import Thread
import CloseableQueue
#from multiprocessing.dummy import Pool
from mol_parsing.functions import find_lib_type, read_input, write_json_results, write_results
from mol_parsing.rdkit_parse import parse_mol_json, generate_mols_from_json

from StringIO import StringIO

params = FilterCatalog.FilterCatalogParams()
params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_A)
params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_B)
params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS_C)
catalog = FilterCatalog.FilterCatalog(params)


def request_params(request):
    """Function to handle the request parameters"""
    #screen_lib, mol_type = find_lib_type(request)
    screen_lib, mol_type = read_input(request)
    if "filter" in request.GET:
        filter = request.GET["filter"]
        if not (filter == "INCLUDE_MATCHING" or filter
                == "INCLUDE_NON_MATCHING" or filter == "INCLUDE_ALL"):
            raise ValidationError("Invalid filter value " + filter)
Ejemplo n.º 30
0
    def test2FilterCatalogTest(self):
        tests = ((FilterCatalogParams.FilterCatalogs.PAINS_A,
                  16), (FilterCatalogParams.FilterCatalogs.PAINS_B,
                        55), (FilterCatalogParams.FilterCatalogs.PAINS_C, 409),
                 (FilterCatalogParams.FilterCatalogs.PAINS, 409 + 16 + 55))

        for catalog_idx, num in tests:
            params = FilterCatalog.FilterCatalogParams()
            print("*" * 44)
            print("Testing:", catalog_idx, int(catalog_idx))
            self.assertTrue(params.AddCatalog(catalog_idx))
            catalog1 = FilterCatalog.FilterCatalog(params)

            if FilterCatalog.FilterCatalogCanSerialize():
                pickle = catalog1.Serialize()
                catalog2 = FilterCatalog.FilterCatalog(pickle)
                catalogs = [catalog1, catalog2]
            else:
                catalogs = [catalog1]

            catalogs.append(FilterCatalog.FilterCatalog(catalog_idx))
            for index, catalog in enumerate(catalogs):
                self.assertEqual(catalog.GetNumEntries(), num)

                if catalog_idx in [
                        FilterCatalogParams.FilterCatalogs.PAINS_A,
                        FilterCatalogParams.FilterCatalogs.PAINS
                ]:
                    # http://chemistrycompass.com/chemsearch/58909/
                    mol = Chem.MolFromSmiles(
                        "O=C(Cn1cnc2c1c(=O)n(C)c(=O)n2C)N/N=C/c1c(O)ccc2c1cccc2"
                    )
                    entry = catalog.GetFirstMatch(mol)
                    for key in entry.GetPropList():
                        if key == "Reference":
                            self.assertEquals(
                                entry.GetProp(key),
                                "Baell JB, Holloway GA. New Substructure Filters for "
                                "Removal of Pan Assay Interference Compounds (PAINS) "
                                "from Screening Libraries and for Their Exclusion in "
                                "Bioassays. J Med Chem 53 (2010) 2719D40. "
                                "doi:10.1021/jm901137j.")
                        elif key == "Scope":
                            self.assertEquals(entry.GetProp(key),
                                              "PAINS filters (family A)")

                    self.assertEqual(entry.GetDescription(),
                                     "hzone_phenol_A(479)")
                    result = catalog.GetMatches(mol)
                    self.assertEquals(len(result), 1)

                    for entry in result:
                        for filtermatch in entry.GetFilterMatches(mol):
                            self.assertEquals(str(filtermatch.filterMatch),
                                              "hzone_phenol_A(479)")
                            atomPairs = [
                                tuple(x) for x in filtermatch.atomPairs
                            ]
                            self.assertEquals(atomPairs, [(0, 23), (1, 22),
                                                          (2, 20), (3, 19),
                                                          (4, 25), (5, 24),
                                                          (6, 18), (7, 17),
                                                          (8, 16), (9, 21)])

                elif catalog_idx == FilterCatalogParams.FilterCatalogs.PAINS_B:
                    mol = Chem.MolFromSmiles(
                        "FC(F)(F)Oc1ccc(NN=C(C#N)C#N)cc1")  # CHEMBL457504
                    entry = catalog.GetFirstMatch(mol)
                    self.assertTrue(entry)
                    self.assertEquals(entry.GetDescription(),
                                      "cyano_imine_B(17)")

                elif catalog_idx == FilterCatalogParams.FilterCatalogs.PAINS_C:
                    mol = Chem.MolFromSmiles(
                        "O=C1C2OC2C(=O)c3cc4CCCCc4cc13")  # CHEMBL476649
                    entry = catalog.GetFirstMatch(mol)
                    self.assertTrue(entry)
                    self.assertEquals(entry.GetDescription(),
                                      "keto_keto_gamma(5)")