Exemple #1
0
    def _query(self, path, args=None, skip_cache=False, skip_sleep=False):
        """return results for a NCBI query, possibly from the cache

        :param: path: relative query path (e.g., 'einfo.fcgi')
        :param: args: dictionary of query args
        :param: skip_cache: whether to bypass the cache on reading
        :param: skip_sleep: whether to bypass query throttling
        :rtype: xml string

        The args are joined with args required by NCBI (tool and email
        address) and with the default args declared when instantiating
        the client.
        """
        if args is None:
            args = {}

        def _cacheable(r):
            """return False if r shouldn't be cached (contains a no-cache meta
            line); True otherwise"""
            return not ("no-cache" in r  # obviate parsing, maybe
                        and
                        lxml.etree.XML(r).xpath("//meta/@content='no-cache'"))

        # cache key: the key associated with this endpoint and args The
        # key intentionally excludes the identifying args (tool and email)
        # and is independent of the request method (GET/POST) args are
        # sorted for canonicalization

        url = url_base + path

        # next 3 lines converted by 2to3 -nm
        defining_args = dict(
            list(self.default_args.items()) + list(args.items()))
        full_args = dict(
            list(self._ident_args.items()) + list(defining_args.items()))
        cache_key = hashlib.md5(
            pickle.dumps((url, sorted(defining_args.items())))).hexdigest()

        sqas = ';'.join([k + '=' + str(v) for k, v in sorted(args.items())])
        full_args_str = ';'.join(
            [k + '=' + str(v) for k, v in sorted(full_args.items())])

        logging.debug("CACHE:" + str(skip_cache) + "//" + str(self._cache))
        if not skip_cache and self._cache:
            try:
                v = self._cache[cache_key]
                _logger.debug(
                    'cache hit for key {cache_key} ({url}, {sqas}) '.format(
                        cache_key=cache_key, url=url, sqas=sqas))
                return v
            except KeyError:
                _logger.debug(
                    'cache miss for key {cache_key} ({url}, {sqas}) '.format(
                        cache_key=cache_key, url=url, sqas=sqas))
                pass

        if self.api_key:
            url += '?api_key={self.api_key}'.format(self=self)

        # --

        if not skip_sleep:
            req_int = self.request_interval
            sleep_time = req_int - (time.clock() - self._last_request_clock)
            if sleep_time > 0:
                _logger.debug(
                    'sleeping {sleep_time:.3f}'.format(sleep_time=sleep_time))
                time.sleep(sleep_time)

        r = requests.post(url, full_args)
        self._last_request_clock = time.clock()
        _logger.debug(
            'post({url}, {fas}): {r.status_code} {r.reason}, {len})'.format(
                url=url, fas=full_args_str, r=r, len=len(r.text)))

        if not r.ok:
            # TODO: discriminate between types of errors
            if r.headers["Content-Type"] == "application/json":
                json = r.json()
                raise EutilsRequestError(
                    '{r.reason} ({r.status_code}): {error}'.format(
                        r=r, error=json["error"]))
            try:
                xml = lxml.etree.fromstring(r.text.encode('utf-8'))
                raise EutilsRequestError(
                    '{r.reason} ({r.status_code}): {error}'.format(
                        r=r, error=xml.find('ERROR').text))
            except Exception as ex:
                raise EutilsNCBIError(
                    'Error parsing response object from NCBI: {}'.format(ex))

        if any(bad_word in r.text for bad_word in ['<error>', '<ERROR>']):
            if r.text is not None:
                try:
                    xml = lxml.etree.fromstring(r.text.encode('utf-8'))
                    raise EutilsRequestError(
                        '{r.reason} ({r.status_code}): {error}'.format(
                            r=r, error=xml.find('ERROR').text))
                except Exception as ex:
                    raise EutilsNCBIError(
                        'Error parsing response object from NCBI: {}'.format(
                            ex))

        if '<h1 class="error">Access Denied</h1>' in r.text:
            raise EutilsRequestError('Access Denied: {url}'.format(url=url))

        if self._cache and _cacheable(r.text):
            # N.B. we cache results even when skip_cache (read) is true
            self._cache[cache_key] = r.content
            _logger.info(
                'cached results for key {cache_key} ({url}, {sqas}) '.format(
                    cache_key=cache_key, url=url, sqas=sqas))

        return r.content
Exemple #2
0
def val_to(obj, compress):
    pobj = pickle.dumps(obj)
    return zlib.compress(pobj) if compress else pobj
Exemple #3
0
    def _query(self, path, args={}, skip_cache=False, skip_sleep=False):
        """return results for a NCBI query, possibly from the cache

        :param: path: relative query path (e.g., 'einfo.fcgi')
        :param: args: dictionary of query args
        :param: skip_cache: whether to bypass the cache on reading
        :param: skip_sleep: whether to bypass query throttling
        :rtype: xml string

        The args are joined with args required by NCBI (tool and email
        address) and with the default args declared when instantiating
        the client.
        """
        def _cacheable(r):
            """return False if r shouldn't be cached (contains a no-cache meta
            line); True otherwise"""
            return not ("no-cache" in r  # obviate parsing, maybe
                        and lxml.etree.XML(r).xpath("//meta/@content='no-cache'"))
        
        # cache key: the key associated with this endpoint and args The
        # key intentionally excludes the identifying args (tool and email)
        # and is independent of the request method (GET/POST) args are
        # sorted for canonicalization

        url = url_base + path

        # next 3 lines converted by 2to3 -nm
        defining_args = dict(list(self.default_args.items()) + list(args.items()))
        full_args = dict(list(self._ident_args.items()) + list(defining_args.items()))
        cache_key = hashlib.md5(pickle.dumps((url, sorted(defining_args.items())))).hexdigest()
        
        sqas = ';'.join([k + '=' + str(v) for k, v in sorted(args.items())])
        full_args_str = ';'.join([k + '=' + str(v) for k, v in sorted(full_args.items())])

        logging.debug("CACHE:" + str(skip_cache) + "//" + str(self._cache))
        if not skip_cache and self._cache:
            try:
                v = self._cache[cache_key]
                logger.debug('cache hit for key {cache_key} ({url}, {sqas}) '.format(
                    cache_key=cache_key,
                    url=url,
                    sqas=sqas))
                return v
            except KeyError:
                logger.debug('cache miss for key {cache_key} ({url}, {sqas}) '.format(
                    cache_key=cache_key,
                    url=url,
                    sqas=sqas))
                pass

        if not skip_sleep:
            req_int = self.request_interval() if isinstance(self.request_interval, collections.Callable) else self.request_interval
            sleep_time = req_int - (time.clock() - self._last_request_clock)
            if sleep_time > 0:
                logger.debug('sleeping {sleep_time:.3f}'.format(sleep_time=sleep_time))
                time.sleep(sleep_time)
        r = requests.post(url, full_args)
        self._last_request_clock = time.clock()
        logger.debug('post({url}, {fas}): {r.status_code} {r.reason}, {len})'.format(
            url=url,
            fas=full_args_str,
            r=r,
            len=len(r.text)))

        if not r.ok:
            # TODO: discriminate between types of errors
            try:
                xml = lxml.etree.fromstring(r.text.encode('utf-8'))
                raise EutilsRequestError('{r.reason} ({r.status_code}): {error}'.format(r=r, error=xml.find('ERROR').text))
            except Exception as ex:
                raise EutilsNCBIError('Error parsing response object from NCBI: {}'.format(ex))

        if any(bad_word in r.text for bad_word in ['<error>', '<ERROR>']):
            if r.text is not None:
                try:
                    xml = lxml.etree.fromstring(r.text.encode('utf-8'))
                    raise EutilsRequestError('{r.reason} ({r.status_code}): {error}'.format(r=r, error=xml.find('ERROR').text))
                except Exception as ex:
                    raise EutilsNCBIError('Error parsing response object from NCBI: {}'.format(ex))

        if '<h1 class="error">Access Denied</h1>' in r.text:
            raise EutilsRequestError('Access Denied: {url}'.format(url=url))

        if self._cache and _cacheable(r.text):
            # N.B. we cache results even when skip_cache (read) is true
            self._cache[cache_key] = r.content
            logger.info('cached results for key {cache_key} ({url}, {sqas}) '.format(
                cache_key=cache_key,
                url=url,
                sqas=sqas))

        return r.content
Exemple #4
0
def key_to(obj):
    return pickle.dumps(obj)