def _query(self, path, args=None, skip_cache=False, skip_sleep=False): """return results for a NCBI query, possibly from the cache :param: path: relative query path (e.g., 'einfo.fcgi') :param: args: dictionary of query args :param: skip_cache: whether to bypass the cache on reading :param: skip_sleep: whether to bypass query throttling :rtype: xml string The args are joined with args required by NCBI (tool and email address) and with the default args declared when instantiating the client. """ if args is None: args = {} def _cacheable(r): """return False if r shouldn't be cached (contains a no-cache meta line); True otherwise""" return not ("no-cache" in r # obviate parsing, maybe and lxml.etree.XML(r).xpath("//meta/@content='no-cache'")) # cache key: the key associated with this endpoint and args The # key intentionally excludes the identifying args (tool and email) # and is independent of the request method (GET/POST) args are # sorted for canonicalization url = url_base + path # next 3 lines converted by 2to3 -nm defining_args = dict( list(self.default_args.items()) + list(args.items())) full_args = dict( list(self._ident_args.items()) + list(defining_args.items())) cache_key = hashlib.md5( pickle.dumps((url, sorted(defining_args.items())))).hexdigest() sqas = ';'.join([k + '=' + str(v) for k, v in sorted(args.items())]) full_args_str = ';'.join( [k + '=' + str(v) for k, v in sorted(full_args.items())]) logging.debug("CACHE:" + str(skip_cache) + "//" + str(self._cache)) if not skip_cache and self._cache: try: v = self._cache[cache_key] _logger.debug( 'cache hit for key {cache_key} ({url}, {sqas}) '.format( cache_key=cache_key, url=url, sqas=sqas)) return v except KeyError: _logger.debug( 'cache miss for key {cache_key} ({url}, {sqas}) '.format( cache_key=cache_key, url=url, sqas=sqas)) pass if self.api_key: url += '?api_key={self.api_key}'.format(self=self) # -- if not skip_sleep: req_int = self.request_interval sleep_time = req_int - (time.clock() - self._last_request_clock) if sleep_time > 0: _logger.debug( 'sleeping {sleep_time:.3f}'.format(sleep_time=sleep_time)) time.sleep(sleep_time) r = requests.post(url, full_args) self._last_request_clock = time.clock() _logger.debug( 'post({url}, {fas}): {r.status_code} {r.reason}, {len})'.format( url=url, fas=full_args_str, r=r, len=len(r.text))) if not r.ok: # TODO: discriminate between types of errors if r.headers["Content-Type"] == "application/json": json = r.json() raise EutilsRequestError( '{r.reason} ({r.status_code}): {error}'.format( r=r, error=json["error"])) try: xml = lxml.etree.fromstring(r.text.encode('utf-8')) raise EutilsRequestError( '{r.reason} ({r.status_code}): {error}'.format( r=r, error=xml.find('ERROR').text)) except Exception as ex: raise EutilsNCBIError( 'Error parsing response object from NCBI: {}'.format(ex)) if any(bad_word in r.text for bad_word in ['<error>', '<ERROR>']): if r.text is not None: try: xml = lxml.etree.fromstring(r.text.encode('utf-8')) raise EutilsRequestError( '{r.reason} ({r.status_code}): {error}'.format( r=r, error=xml.find('ERROR').text)) except Exception as ex: raise EutilsNCBIError( 'Error parsing response object from NCBI: {}'.format( ex)) if '<h1 class="error">Access Denied</h1>' in r.text: raise EutilsRequestError('Access Denied: {url}'.format(url=url)) if self._cache and _cacheable(r.text): # N.B. we cache results even when skip_cache (read) is true self._cache[cache_key] = r.content _logger.info( 'cached results for key {cache_key} ({url}, {sqas}) '.format( cache_key=cache_key, url=url, sqas=sqas)) return r.content
def val_to(obj, compress): pobj = pickle.dumps(obj) return zlib.compress(pobj) if compress else pobj
def _query(self, path, args={}, skip_cache=False, skip_sleep=False): """return results for a NCBI query, possibly from the cache :param: path: relative query path (e.g., 'einfo.fcgi') :param: args: dictionary of query args :param: skip_cache: whether to bypass the cache on reading :param: skip_sleep: whether to bypass query throttling :rtype: xml string The args are joined with args required by NCBI (tool and email address) and with the default args declared when instantiating the client. """ def _cacheable(r): """return False if r shouldn't be cached (contains a no-cache meta line); True otherwise""" return not ("no-cache" in r # obviate parsing, maybe and lxml.etree.XML(r).xpath("//meta/@content='no-cache'")) # cache key: the key associated with this endpoint and args The # key intentionally excludes the identifying args (tool and email) # and is independent of the request method (GET/POST) args are # sorted for canonicalization url = url_base + path # next 3 lines converted by 2to3 -nm defining_args = dict(list(self.default_args.items()) + list(args.items())) full_args = dict(list(self._ident_args.items()) + list(defining_args.items())) cache_key = hashlib.md5(pickle.dumps((url, sorted(defining_args.items())))).hexdigest() sqas = ';'.join([k + '=' + str(v) for k, v in sorted(args.items())]) full_args_str = ';'.join([k + '=' + str(v) for k, v in sorted(full_args.items())]) logging.debug("CACHE:" + str(skip_cache) + "//" + str(self._cache)) if not skip_cache and self._cache: try: v = self._cache[cache_key] logger.debug('cache hit for key {cache_key} ({url}, {sqas}) '.format( cache_key=cache_key, url=url, sqas=sqas)) return v except KeyError: logger.debug('cache miss for key {cache_key} ({url}, {sqas}) '.format( cache_key=cache_key, url=url, sqas=sqas)) pass if not skip_sleep: req_int = self.request_interval() if isinstance(self.request_interval, collections.Callable) else self.request_interval sleep_time = req_int - (time.clock() - self._last_request_clock) if sleep_time > 0: logger.debug('sleeping {sleep_time:.3f}'.format(sleep_time=sleep_time)) time.sleep(sleep_time) r = requests.post(url, full_args) self._last_request_clock = time.clock() logger.debug('post({url}, {fas}): {r.status_code} {r.reason}, {len})'.format( url=url, fas=full_args_str, r=r, len=len(r.text))) if not r.ok: # TODO: discriminate between types of errors try: xml = lxml.etree.fromstring(r.text.encode('utf-8')) raise EutilsRequestError('{r.reason} ({r.status_code}): {error}'.format(r=r, error=xml.find('ERROR').text)) except Exception as ex: raise EutilsNCBIError('Error parsing response object from NCBI: {}'.format(ex)) if any(bad_word in r.text for bad_word in ['<error>', '<ERROR>']): if r.text is not None: try: xml = lxml.etree.fromstring(r.text.encode('utf-8')) raise EutilsRequestError('{r.reason} ({r.status_code}): {error}'.format(r=r, error=xml.find('ERROR').text)) except Exception as ex: raise EutilsNCBIError('Error parsing response object from NCBI: {}'.format(ex)) if '<h1 class="error">Access Denied</h1>' in r.text: raise EutilsRequestError('Access Denied: {url}'.format(url=url)) if self._cache and _cacheable(r.text): # N.B. we cache results even when skip_cache (read) is true self._cache[cache_key] = r.content logger.info('cached results for key {cache_key} ({url}, {sqas}) '.format( cache_key=cache_key, url=url, sqas=sqas)) return r.content
def key_to(obj): return pickle.dumps(obj)