Python QueryService.efetch Examples, eutils.queryservice.QueryService.efetch Python Examples

Example #1

0

Show file

File: client.py Project: Grindizer/eutils

class Client(object):

    def __init__(self,
                 cache_path=default_cache_path,
                 request_interval=0.4
                 ):
        self._qs = QueryService(cache_path=cache_path, request_interval=request_interval)
        #self.databases = self.einfo().databases


    def einfo(self,db=None):
        """query the einfo endpoint

        :param db: string (optional)
        :rtype: EInfo or EInfoDB object

        If db is None, the reply is a list of databases, which is returned
        in an EInfo object (which has a databases() method).

        If db is not None, the reply is information about the specified
        database, which is returned in an EInfoDB object.  (Version 2.0
        data is automatically requested.)
        """

        if db is None:
            return EInfo( self._qs.einfo() )
        return EInfoDB( self._qs.einfo({'db':db, 'version':'2.0'}) )
        

    def esearch(self, db, term, **kw):
        """query the esearch endpoint
        """
        kw.update({'db':db,'term':term})
        return ESearchResults( self._qs.esearch(kw) )


    def efetch(self, db, id=None, webenv=None, query_key=None, **kw):
        """query the efetch endpoint
        """
        db = db.lower()
        kw.update({'db':db})
        if id:
            kw.update({'id': id})
        else:
            kw.update({'webenv': webenv, 'query_key': query_key})
        xml = self._qs.efetch(kw)
        if db in ['gene']:
            return Gene(xml)
        if db in ['nuccore']:
            # TODO: GBSet is misnamed; it should be GBSeq and get the GBSeq XML node as root (see gbset.py)
            return GBSet(xml)
        if db in ['pubmed']:
            return PubMedArticleSet(xml)
        if db in ['snp']:
            return ExchangeSet(xml)
        raise EutilsError('database {db} is not currently supported by eutils'.format(db=db))

Example #2

0

Show file

File: client.py Project: Grindizer/eutils

class Client(object):
    def __init__(self, cache_path=default_cache_path, request_interval=0.4):
        self._qs = QueryService(cache_path=cache_path,
                                request_interval=request_interval)
        #self.databases = self.einfo().databases

    def einfo(self, db=None):
        """query the einfo endpoint

        :param db: string (optional)
        :rtype: EInfo or EInfoDB object

        If db is None, the reply is a list of databases, which is returned
        in an EInfo object (which has a databases() method).

        If db is not None, the reply is information about the specified
        database, which is returned in an EInfoDB object.  (Version 2.0
        data is automatically requested.)
        """

        if db is None:
            return EInfo(self._qs.einfo())
        return EInfoDB(self._qs.einfo({'db': db, 'version': '2.0'}))

    def esearch(self, db, term, **kw):
        """query the esearch endpoint
        """
        kw.update({'db': db, 'term': term})
        return ESearchResults(self._qs.esearch(kw))

    def efetch(self, db, id=None, webenv=None, query_key=None, **kw):
        """query the efetch endpoint
        """
        db = db.lower()
        kw.update({'db': db})
        if id:
            kw.update({'id': id})
        else:
            kw.update({'webenv': webenv, 'query_key': query_key})
        xml = self._qs.efetch(kw)
        if db in ['gene']:
            return Gene(xml)
        if db in ['nuccore']:
            # TODO: GBSet is misnamed; it should be GBSeq and get the GBSeq XML node as root (see gbset.py)
            return GBSet(xml)
        if db in ['pubmed']:
            return PubMedArticleSet(xml)
        if db in ['snp']:
            return ExchangeSet(xml)
        raise EutilsError(
            'database {db} is not currently supported by eutils'.format(db=db))

Example #3

0

Show file

File: test_eutils_queryservice.py Project: biocommons/eutils

class TestEutilsQueries(unittest.TestCase):

    def setUp(self):
        self.qs = QueryService()

    def tearDown(self):
        pass

    @vcr.use_cassette
    def test_efetch(self):
        '''Testing efetch.fcgi by looking up a known pubmed article.'''

        pmid = 1234567
        result = self.qs.efetch(args={'db': 'pubmed', 'id': pmid})
        assert_in_xml(result, str(pmid))

    @vcr.use_cassette
    def test_esearch(self):
        '''Testing esearch.fcgi by searching medgen db for concepts related to OCRL gene.'''
        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=medgen&term=OCRL
        result = self.qs.esearch( { 'db': 'medgen', 'term': 'OCRL' } )

        # eSearchResult should contain something like this:
        #<IdList><Id>763754</Id><Id>336867</Id><Id>336322</Id><Id>168056</Id><Id>18145</Id></IdList>
        assert_in_xml(result, 'IdList')

    @vcr.use_cassette
    def test_elink(self):
        '''Testing elink.fcgi by looking up related pmids in pubmed.''' 
        #   Expected response should contain the following information:
            
        #    * pubmed    (all related links)
        #    * citedin   (papers that cited this paper)
        #    * five      (the "five" that pubmed displays as the top related results)
        #    * reviews   (review papers that cite this paper)
        #    * combined  (?)

        expected_keys = ['pubmed', 'citedin', 'five', 'reviews', 'combined']
        xmlstr = self.qs.elink( { 'dbfrom': 'pubmed', 'id': 1234567, 'cmd': 'neighbor' } )
 
        resd = parse_related_pmids_result(xmlstr)
        assert 'pubmed' in resd.keys()

    @vcr.use_cassette
    def test_esummary(self):
        '''Testing esummary.fcgi by looking up a known medgen concept'''

        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=medgen&id=336867
        result = self.qs.esummary({ 'db': 'medgen', 'id': 336867 })
        assert_in_xml(result, 'ConceptId')

    @vcr.use_cassette
    @patch('eutils.queryservice.requests')
    def test_handles_malformed_xml_errors(self, mock_requests):
        post_return_value = MagicMock()
        post_return_value.status_code = 404
        post_return_value.reason = 'dunno'
        post_return_value.test = 'Bad XML'
        post_return_value.ok = False
        mock_requests.post.return_value = post_return_value
        with self.assertRaises(EutilsNCBIError):
            pmid = 1234569
            self.qs.efetch(args={'db': 'pubmed', 'id': pmid})

Example #4

0

Show file

class Client(object):
    """class-based access to NCBI E-Utilities, returning Python classes
    with rich data accessors

    """
    def __init__(self, cache_path=default_cache_path):
        """
        :param str cache_path: full path to sqlite database file (created if necessary)
        :raises EutilsError: if cache file couldn't be created
        """
        cache_dir = os.path.dirname(cache_path)
        if not os.path.exists(cache_dir):
            try:
                os.mkdir(cache_dir)
                logger.info("Made cache directory " + cache_dir)
            except OSError:
                raise EutilsError("Failed to make cache directory " +
                                  cache_dir)
        self._qs = QueryService(cache_path=cache_path)

    @property
    def databases(self):
        """
        list of databases available from eutils (per einfo query)
        """
        try:
            return self._databases
        except AttributeError:
            self._databases = self.einfo().databases
            return self._databases

    def einfo(self, db=None):
        """query the einfo endpoint

        :param db: string (optional)
        :rtype: EInfo or EInfoDB object

        If db is None, the reply is a list of databases, which is returned
        in an EInfo object (which has a databases() method).

        If db is not None, the reply is information about the specified
        database, which is returned in an EInfoDB object.  (Version 2.0
        data is automatically requested.)
        """

        if db is None:
            return EInfoResult(self._qs.einfo()).dblist
        return EInfoResult(self._qs.einfo({'db': db, 'version': '2.0'})).dbinfo

    def esearch(self, db, term):
        """query the esearch endpoint
        """
        esr = ESearchResult(self._qs.esearch({'db': db, 'term': term}))
        if esr.count > esr.retmax:
            logger.warn(
                "NCBI found {esr.count} results, but we truncated the reply at {esr.retmax}"
                " results; see https://github.com/biocommons/eutils/issues/124/"
                .format(esr=esr))
        return esr

    def efetch(self, db, id):
        """query the efetch endpoint
        """
        db = db.lower()
        xml = self._qs.efetch({'db': db, 'id': str(id)})
        doc = le.XML(xml)
        if db in ['gene']:
            return EntrezgeneSet(doc)
        if db in ['nuccore']:
            # TODO: GBSet is misnamed; it should be GBSeq and get the GBSeq XML node as root (see gbset.py)
            return GBSet(doc)
        if db in ['pubmed']:
            return PubmedArticleSet(doc)
        if db in ['snp']:
            return ExchangeSet(xml)
        if db in ['pmc']:
            return PubmedCentralArticleSet(doc)
        raise EutilsError(
            'database {db} is not currently supported by eutils'.format(db=db))

Example #5

0

Show file

File: client.py Project: cariaso/eutils

class Client(object):
    """class-based access to NCBI E-Utilities, returning Python classes
    with rich data accessors

    """

    def __init__(self, cache_path=default_cache_path):
        """
        :param str cache_path: full path to sqlite database file (created if necessary)
        :raises EutilsError: if cache file couldn't be created
        """
        cache_dir = os.path.dirname(cache_path)
        if not os.path.exists(cache_dir):
            try:
                os.mkdir(cache_dir)
                logger.info("Made cache directory " + cache_dir)
            except OSError:
                raise EutilsError("Failed to make cache directory " + cache_dir)
        self._qs = QueryService(cache_path=cache_path)

    @property
    def databases(self):
        """
        list of databases available from eutils (per einfo query)
        """
        try:
            return self._databases
        except AttributeError:
            self._databases = self.einfo().databases
            return self._databases

    def einfo(self, db=None):
        """query the einfo endpoint

        :param db: string (optional)
        :rtype: EInfo or EInfoDB object

        If db is None, the reply is a list of databases, which is returned
        in an EInfo object (which has a databases() method).

        If db is not None, the reply is information about the specified
        database, which is returned in an EInfoDB object.  (Version 2.0
        data is automatically requested.)
        """

        if db is None:
            return EInfoResult(self._qs.einfo()).dblist
        return EInfoResult(self._qs.einfo({'db': db, 'version': '2.0'})).dbinfo

    def esearch(self, db, term):
        """query the esearch endpoint
        """
        esr = ESearchResult(self._qs.esearch({'db': db, 'term': term}))
        if esr.count > esr.retmax:
            logger.warn("NCBI found {esr.count} results, but we truncated the reply at {esr.retmax}"
                        " results; see https://github.com/biocommons/eutils/issues/124/".format(esr=esr))
        return esr

    def efetch(self, db, id):
        """query the efetch endpoint
        """
        db = db.lower()
        xml = self._qs.efetch({'db': db, 'id': str(id)})
        doc = le.XML(xml)
        if db in ['gene']:
            return EntrezgeneSet(doc)
        if db in ['nuccore', 'nucest']:
            # TODO: GBSet is misnamed; it should be GBSeq and get the GBSeq XML node as root (see gbset.py)
            return GBSet(doc)
        if db in ['pubmed']:
            return PubmedArticleSet(doc)
        if db in ['snp']:
            return ExchangeSet(xml)
        if db in ['pmc']:
            return PubmedCentralArticleSet(doc)
        raise EutilsError('database {db} is not currently supported by eutils'.format(db=db))

Example #6

0

Show file

class TestEutilsQueries(unittest.TestCase):
    def setUp(self):
        self.qs = QueryService()

    def tearDown(self):
        pass

    @vcr.use_cassette
    def test_efetch(self):
        '''Testing efetch.fcgi by looking up a known pubmed article.'''

        pmid = 1234567
        result = self.qs.efetch(args={'db': 'pubmed', 'id': pmid})
        assert_in_xml(result, str(pmid))

    @vcr.use_cassette
    def test_esearch(self):
        '''Testing esearch.fcgi by searching medgen db for concepts related to OCRL gene.'''
        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=medgen&term=OCRL
        result = self.qs.esearch({'db': 'medgen', 'term': 'OCRL'})

        # eSearchResult should contain something like this:
        #<IdList><Id>763754</Id><Id>336867</Id><Id>336322</Id><Id>168056</Id><Id>18145</Id></IdList>
        assert_in_xml(result, 'IdList')

    @vcr.use_cassette
    def test_elink(self):
        '''Testing elink.fcgi by looking up related pmids in pubmed.'''
        #   Expected response should contain the following information:

        #    * pubmed    (all related links)
        #    * citedin   (papers that cited this paper)
        #    * five      (the "five" that pubmed displays as the top related results)
        #    * reviews   (review papers that cite this paper)
        #    * combined  (?)

        expected_keys = ['pubmed', 'citedin', 'five', 'reviews', 'combined']
        xmlstr = self.qs.elink({
            'dbfrom': 'pubmed',
            'id': 1234567,
            'cmd': 'neighbor'
        })

        resd = parse_related_pmids_result(xmlstr)
        assert 'pubmed' in resd.keys()

    @vcr.use_cassette
    def test_esummary(self):
        '''Testing esummary.fcgi by looking up a known medgen concept'''

        # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=medgen&id=336867
        result = self.qs.esummary({'db': 'medgen', 'id': 336867})
        assert_in_xml(result, 'ConceptId')

    @vcr.use_cassette
    @patch('eutils.queryservice.requests')
    def test_handles_malformed_xml_errors(self, mock_requests):
        post_return_value = MagicMock()
        post_return_value.status_code = 404
        post_return_value.reason = 'dunno'
        post_return_value.test = 'Bad XML'
        post_return_value.ok = False
        mock_requests.post.return_value = post_return_value
        with self.assertRaises(EutilsNCBIError):
            pmid = 1234569
            self.qs.efetch(args={'db': 'pubmed', 'id': pmid})