def clear_cache(self): """ Clear the cache database. """ msg = 'Cached Data has been cleared' requests_cache.get_cache().clear() xbmcgui.Dialog().notification(_plugin, msg, _icon, 3000, False)
def cache_initiated(): """Hackish function to test if there is an existing requests_cache""" try: requests_cache.get_cache() return True except AttributeError: return False
def uninstall_cache(): """Deactives caches""" try: requests_cache.get_cache() except AttributeError: pass else: requests_cache.uninstall_cache()
def get_one_page(self, url, usecache=True): if not usecache: if requests_cache.get_cache().has_url(url): requests_cache.get_cache().delete_url(url) print "# fetching: %s" % url i = requests.get(url, auth=(self.username, self.password)) #print "# fetched: %s" % url return i
def get_comments(self, usecache=True): for k in self.datadict.keys(): if 'comments_url' in self.datadict[k]: if not usecache: if requests_cache.get_cache().has_url(self.datadict[k]['comments_url']): requests_cache.get_cache().delete_url(self.datadict[k]['comments_url']) i = self.get_one_page(self.datadict[k]['comments_url']) idict = json.loads(i.content) self.datadict[k]['comments'] = idict
def has_url(url): try: cache = requests_cache.get_cache() except AttributeError: return False return cache.has_url(url)
def load_annotations(data_folder): contents = os.listdir(path=data_folder) try: infile = [i for i in contents if '.tsv' in i][0] except IndexError: raise Exception(f"No .tsv found in {contents}") with open(infile, 'r') as litcovid_tsv: tsv_reader = csv.reader(litcovid_tsv, delimiter='\t') for i in range(32): next(tsv_reader) pmids = [line[0] for line in tsv_reader] doc_id_set = set() requests_cache.install_cache('litcovid_cache') requests_cache.clear() s = requests_cache.CachedSession() s.hooks = {'response': throttle} logging.debug("requests_cache: %s", requests_cache.get_cache().responses.filename) for i, pmid in enumerate(pmids,start=1): # NCBI eutils API limits requests to 10/sec if i % 100 == 0: logging.info("litcovid.parser.load_annotations progress %s", i) doc = getPubMedDataFor(pmid, session=s) if doc['_id'] not in doc_id_set: yield doc doc_id_set.add(doc['_id']) remove_expired(s)
def get_timestamp(url): """ get the timestamp of an HTTP get request :param url: the URL of the request :return the timestamp of the request, of None if the request is not in the cache """ def _to_bytes(s, encoding='utf-8'): return bytes(s, encoding) def create_key(request): url, body = request.url, request.body key = hashlib.sha256() key.update(_to_bytes(request.method.upper())) key.update(_to_bytes(url)) if request.body: key.update(_to_bytes(body)) return key.hexdigest() def url_to_key(url): session = requests.Session() return create_key(session.prepare_request(requests.Request('GET', url))) # get the cache from request_cache results = requests_cache.get_cache() # create the key according to the url key_url = url_to_key(url) # results.responses is a dictionary and follows the following format: # { 'key': (requests_cache.backends objects, timestamp), ..., } # for example: '4c28e3e4a61e325e520d9c02e0caee99e30c00951a223e67': # (<requests_cache.backends.base._Store object at 0x12697e630>, # datetime.datetime(2018, 10, 16, 0, 19, 8, 130204)), if key_url in results.responses: back_obj, timestamp = results.responses[key_url] return timestamp return None
def __init__(self, cache_name="cache", expire_after=300, http_get_timeout=5, delay_between_requests=8, headers=HEADERS): self.http_get_timeout = http_get_timeout self.delay_between_requests = delay_between_requests self.headers = headers self.expire_after = expire_after self.cache_name = cache_name # Install cache requests_cache.install_cache(self.cache_name, backend='sqlite', expire_after=self.expire_after) self._cache = requests_cache.get_cache() # Establish a session to be used for the GET requests # IMPORTANT: the session must be established after installing the cache # if you want all your responses to be cached, i.e. monkey-patching # requests. Ref.: https://bit.ly/2MCDCeD self._req_session = requests.Session() # Add headers to the request session self._req_session.headers = self.headers self._last_request_time = -sys.float_info.max self.response = None
def load_annotations(data_folder): infile = os.path.join(data_folder, "litcovid2BioCJSON.gz") assert os.path.exists(infile) with open_anyfile(infile, mode='r') as file: a = file.read() data_list = json.loads(a) # First item is a comment by provider data = data_list[1] doc_id_set = set() with requests_cache.enabled('litcovid_cache', expire_after=expire_after): logging.debug("requests_cache: %s", requests_cache.get_cache().responses.filename) for i, rec in enumerate(data, start=1): # NCBI eutils API limits requests to 10/sec if i % 100 == 0: logging.info("litcovid.parser.load_annotations progress %s", i) doc = getPubMedDataFor(rec["pmid"]) if not doc['from_cache']: time.sleep(.2) doc.pop('from_cache') if doc['_id'] not in doc_id_set: yield doc doc_id_set.add(doc['_id']) requests_cache.core.remove_expired_responses()
def weather(): lat = request.form['lat'] lon = request.form['lon'] t_threshold = request.form['t_threshold'] w_threshold = request.form['w_threshold'] metric = request.form['metric'] r = requests.get( 'https://api.darksky.net/forecast/25526a9ace577c46efe91103749d39ed/' + lat + ',' + lon + '?units=' + metric + '&exclude=minutely,flags,hourly') old_stdout = sys.stdout log_file = open("cache.log", "w") sys.stdout = log_file print('Cache enabled for this request:', r.from_cache) print('Request:', requests_cache.get_cache()) sys.stdout = old_stdout log_file.close() json = r.json() epoch = json['currently']['time'] currenttime = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime(epoch)) return render_template('newweather.html', json=json, currenttime=currenttime, t_threshold=t_threshold, w_threshold=w_threshold)
def install(self): """install cache""" requests # require `import requests` in case this is essential for monkey patching by requests_cache. requests_cache.install_cache(self.path, include_get_headers=True) self.cache = requests_cache.get_cache() logging.info( f"requests-cache starting with {len(self.cache.responses)} cached responses" )
def _wait_for_limiting(self): # https://api.github.com/users/whatever url = 'https://api.github.com/users/' + self.username sleeptime = True while sleeptime: if requests_cache.get_cache().has_url(url): requests_cache.get_cache().delete_url(url) i = requests.get(url, auth=(self.username, self.password)) sleeptime = i.headers.get('X-RateLimit-Reset', None) if sleeptime: sleeptime = calendar.timegm(time.gmtime()) - int(sleeptime) if sleeptime < 0: sleeptime = sleeptime * -1 n_time = time.time() n_time = datetime.fromtimestamp(n_time).strftime('%Y-%m-%d $H:%M:%S') print "# %s sleeping %s" % (n_time, sleeptime) time.sleep(sleeptime)
def setup(self): ds_args = dict(id=self.name) defaults = dict(name='HelMet-kirjastot') self.data_source, _ = DataSource.objects.get_or_create( defaults=defaults, **ds_args) self.tprek_data_source = DataSource.objects.get(id='tprek') self.ahjo_data_source = DataSource.objects.get(id='ahjo') system_data_source_defaults = {'user_editable': True} self.system_data_source, _ = DataSource.objects.get_or_create(id=settings.SYSTEM_DATA_SOURCE_ID, defaults=system_data_source_defaults) org_args = dict(origin_id='u4804001010', data_source=self.ahjo_data_source) defaults = dict(name='Helsingin kaupunginkirjasto') self.organization, _ = Organization.objects.get_or_create(defaults=defaults, **org_args) org_args = dict(origin_id='00001', data_source=self.ahjo_data_source) defaults = dict(name='Helsingin kaupunki') self.city, _ = Organization.objects.get_or_create(defaults=defaults, **org_args) # Build a cached list of Places loc_id_list = [l[1] for l in LOCATIONS.values()] place_list = Place.objects.filter( data_source=self.tprek_data_source ).filter(origin_id__in=loc_id_list) self.tprek_by_id = {p.origin_id: p.id for p in place_list} # Create "Tapahtuma vain internetissä" location if not present defaults = dict(data_source=self.system_data_source, publisher=self.city, name='Internet', description='Tapahtuma vain internetissä.',) self.internet_location, _ = Place.objects.get_or_create(id=INTERNET_LOCATION_ID, defaults=defaults) try: yso_data_source = DataSource.objects.get(id='yso') except DataSource.DoesNotExist: yso_data_source = None if yso_data_source: # Build a cached list of YSO keywords cat_id_set = set() for yso_val in YSO_KEYWORD_MAPS.values(): if isinstance(yso_val, tuple): for t_v in yso_val: cat_id_set.add('yso:' + t_v) else: cat_id_set.add('yso:' + yso_val) keyword_list = Keyword.objects.filter(data_source=yso_data_source).\ filter(id__in=cat_id_set) self.yso_by_id = {p.id: p for p in keyword_list} else: self.yso_by_id = {} if self.options['cached']: requests_cache.install_cache('helmet') self.cache = requests_cache.get_cache() else: self.cache = None
def get_one_page(self, url, usecache=True, ignoreerrors=True): limited = True while limited: if not usecache: if requests_cache.get_cache().has_url(url): requests_cache.get_cache().delete_url(url) if self.username and self.password: print "# fetching (BASIC): %s" % url i = requests.get(url, auth=(self.username, self.password)) elif self.token: print "# fetching (TOKEN): %s" % url i = requests.get(url, headers={'Authorization': "token %s" % self.token}) else: print "# fetching (NONE): %s" % url i = requests.get(url) if not i.ok: print "# ERROR: %s for %s " % (i.reason, url) #import epdb; epdb.st() if 'rate limit exceeded' in i.content: self._wait_for_limiting() elif i.reason == 'Not Found': limited = False else: import epdb; epdb.st() sys.exit(1) else: data = None try: data = json.loads(i.content) except: pass if data: if 'documentation_url' in data: limited = True print "# hit rate limit, sleeping 300s" time.sleep(300) else: limited = False else: limited = False return i
def run(self): def _parse_date(str): if str is None: return datetime.new() return datetime(*parsedate(str)[:6]) self.start_time = clock() try: requests_cache.install_cache('.cache') if not self.enable_cache: log.debug("removing '%s' from cache" % self.url) requests_cache.get_cache().delete_url(self.url) log.debug("fetching '%s'" % self.url) if self.url.startswith('file://'): path = self.url[7:] if not os.path.exists(path): raise IOError("file not found: %s" % path) with open(path, 'r') as fd: self.result = fd.read() self.cached = False self.date = datetime.now() self.last_modified = datetime.fromtimestamp( os.stat(path).st_mtime) else: self.resp = requests.get(self.url, timeout=60, verify=False) self.last_modified = _parse_date( self.resp.headers.get('last-modified', self.resp.headers.get('date', None))) self.date = _parse_date(self.resp.headers['date']) self.cached = getattr(self.resp, 'from_cache', False) self.status = self.resp.status_code if self.resp.status_code != 200: raise IOError(self.resp.reason) self.result = self.resp.content log.debug("got %d bytes from '%s'" % (len(self.result), self.url)) except Exception, ex: traceback.print_exc() log.warn("unable to fetch '%s': %s" % (self.url, ex)) self.ex = ex self.result = None
def main(): # The real request will only be made once; afterward, the cached response is used for i in range(5): response = requests.get('http://httpbin.org/get') # This is more obvious when calling a slow endpoint for i in range(5): response = requests.get('http://httpbin.org/delay/2') # Caching can be disabled if we want to get a fresh page and not cache it with requests_cache.disabled(): print(requests.get('http://httpbin.org/ip').text) # Get some debugging info about the cache print(requests_cache.get_cache()) print('Cached URLS:', requests_cache.get_cache().urls) # Uninstall to remove caching from all requests functions requests_cache.uninstall_cache()
def run(self): def _parse_date(str): if str is None: return datetime.new() return datetime(*parsedate(str)[:6]) self.start_time = clock() try: requests_cache.install_cache('.cache') if not self.enable_cache: log.debug("removing '%s' from cache" % self.url) requests_cache.get_cache().delete_url(self.url) log.debug("fetching '%s'" % self.url) if self.url.startswith('file://'): path = self.url[7:] if not os.path.exists(path): raise IOError("file not found: %s" % path) with open(path, 'r') as fd: self.result = fd.read() self.cached = False self.date = datetime.now() self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime) else: self.resp = requests.get(self.url, timeout=60, verify=False) self.last_modified = _parse_date(self.resp.headers.get('last-modified', self.resp.headers.get('date', None))) self.date = _parse_date(self.resp.headers['date']) self.cached = getattr(self.resp, 'from_cache', False) self.status = self.resp.status_code if self.resp.status_code != 200: raise IOError(self.resp.reason) self.result = self.resp.content log.debug("got %d bytes from '%s'" % (len(self.result), self.url)) except Exception, ex: traceback.print_exc() log.warn("unable to fetch '%s': %s" % (self.url, ex)) self.ex = ex self.result = None
def _url_in_cache(url): """ If requests_cache is in use, return whether or not the URL is in the cache. If not, return False. """ try: return requests_cache.get_cache().has_url(url) except AttributeError as e: # requests_cache not enabled if e.message == "'Session' object has no attribute 'cache'": return False raise
def generate_csl_items(args, citekeys_df): """ General CSL (citeproc) items for standard_citekeys in citekeys_df. Writes references.json to disk and logs warnings for potential problems. """ # Read manual references (overrides) in JSON CSL manual_refs = load_manual_references(args.manual_references_paths) requests_cache.install_cache(args.requests_cache_path, include_get_headers=True) cache = requests_cache.get_cache() if args.clear_requests_cache: logging.info('Clearing requests-cache') requests_cache.clear() logging.info( f'requests-cache starting with {len(cache.responses)} cached responses' ) csl_items = list() failures = list() for standard_citekey in citekeys_df.standard_citekey.unique(): if standard_citekey in manual_refs: csl_items.append(manual_refs[standard_citekey]) continue elif standard_citekey.startswith('raw:'): logging.error( f'CSL JSON Data with a standard_citekey of {standard_citekey!r} not found in manual-references.json. ' 'Metadata must be provided for raw citekeys.') failures.append(standard_citekey) try: csl_item = citekey_to_csl_item(standard_citekey) csl_items.append(csl_item) except Exception: logging.exception( f'Citeproc retrieval failure for {standard_citekey!r}') failures.append(standard_citekey) logging.info( f'requests-cache finished with {len(cache.responses)} cached responses' ) requests_cache.uninstall_cache() if failures: message = 'CSL JSON Data retrieval failed for the following standardized citation keys:\n{}'.format( '\n'.join(failures)) logging.error(message) # Write JSON CSL bibliography for Pandoc. with args.references_path.open('w', encoding='utf-8') as write_file: json.dump(csl_items, write_file, indent=2, ensure_ascii=False) write_file.write('\n') return csl_items
def main(): # Once cached, delayed page will be taken from cache # redirects also handled for i in range(5): requests.get('http://httpbin.org/delay/2') r = requests.get('http://httpbin.org/redirect/5') print(r.text) # And if we need to get fresh page or don't want to cache it? with requests_cache.disabled(): print(requests.get('http://httpbin.org/ip').text) # Debugging info about cache print(requests_cache.get_cache())
def setup(self): ds_args = dict(id=self.name) defaults = dict(name='HelMet-kirjastot') self.data_source, _ = DataSource.objects.get_or_create( defaults=defaults, **ds_args) self.tprek_data_source = DataSource.objects.get(id='tprek') ahjo_ds, _ = DataSource.objects.get_or_create(defaults=defaults, **ds_args) org_args = dict(id='ahjo:45400') defaults = dict(name='Helsingin kaupunginkirjasto', data_source=ahjo_ds) self.organization, _ = Organization.objects.get_or_create( defaults=defaults, **org_args) # Build a cached list of Places loc_id_list = [l[1] for l in LOCATIONS.values()] place_list = Place.objects.filter( data_source=self.tprek_data_source).filter( origin_id__in=loc_id_list) self.tprek_by_id = {p.origin_id: p.id for p in place_list} try: yso_data_source = DataSource.objects.get(id='yso') except DataSource.DoesNotExist: yso_data_source = None if yso_data_source: # Build a cached list of YSO keywords cat_id_set = set() for yso_val in YSO_KEYWORD_MAPS.values(): if isinstance(yso_val, tuple): for t_v in yso_val: cat_id_set.add('yso:' + t_v) else: cat_id_set.add('yso:' + yso_val) keyword_list = Keyword.objects.filter(data_source=yso_data_source).\ filter(id__in=cat_id_set) self.yso_by_id = {p.id: p for p in keyword_list} else: self.yso_by_id = {} if self.options['cached']: requests_cache.install_cache('helmet') self.cache = requests_cache.get_cache() else: self.cache = None
def _is_response_cached(method, full_url): """Returns True if response to GET request is in requests_cache. Args: method (str): http verb ('GET', 'POST', etc.) full_url (str): url, including the protocol Returns: is_cached (bool): """ if method != 'GET': return False # pragma: no cover try: cache = requests_cache.get_cache() except AttributeError: # pragma: no cover cache = None return cache.has_url(full_url) if cache else False
def _is_response_cached(method, full_url): # pragma: no cover """Returns True if response to GET request is in requests_cache. Args: method (str): http verb ('GET', 'POST', etc.) full_url (str): url, including the protocol Returns: is_cached (bool): """ if method != 'GET': return False try: cache = requests_cache.get_cache() except AttributeError: cache = None return cache.has_url(full_url) if cache else False
def generate_csl_items(args, citation_df): """ General CSL (citeproc) items for standard_citations in citation_df. Writes references.json to disk and logs warnings for potential problems. """ # Read manual references (overrides) in JSON CSL manual_refs = read_manual_references(args.manual_references_path) requests_cache.install_cache(args.requests_cache_path, include_get_headers=True) cache = requests_cache.get_cache() if args.clear_requests_cache: logging.info('Clearing requests-cache') requests_cache.clear() logging.info( f'requests-cache starting with {len(cache.responses)} cached responses' ) csl_items = list() failures = list() for citation in citation_df.standard_citation.unique(): if citation in manual_refs: csl_items.append(manual_refs[citation]) continue try: citeproc = citation_to_citeproc(citation) csl_items.append(citeproc) except Exception as error: logging.exception(f'Citeproc retrieval failure for {citation}') failures.append(citation) logging.info( f'requests-cache finished with {len(cache.responses)} cached responses' ) requests_cache.uninstall_cache() if failures: message = 'Citeproc retrieval failed for:\n{}'.format( '\n'.join(failures)) logging.error(message) # Write JSON CSL bibliography for Pandoc. with args.references_path.open('w') as write_file: json.dump(csl_items, write_file, indent=2, ensure_ascii=False) write_file.write('\n') return csl_items
def call_api(url, doi): req = requests.Request('GET', url + doi) cache = requests_cache.get_cache() prepped = requests.Session().prepare_request(req) cache_key = cache.create_key(prepped) try: response = cache.get_response(cache_key) except (ImportError, TypeError): response = None if response: return response.json() return call_api_server(url, doi)
def setup(self): ds_args = dict(id=self.name) defaults = dict(name='HelMet-kirjastot') self.data_source, _ = DataSource.objects.get_or_create( defaults=defaults, **ds_args) self.tprek_data_source = DataSource.objects.get(id='tprek') ahjo_ds, _ = DataSource.objects.get_or_create(defaults=defaults, **ds_args) org_args = dict(id='ahjo:45400') defaults = dict(name='Helsingin kaupunginkirjasto', data_source=ahjo_ds) self.organization, _ = Organization.objects.get_or_create(defaults=defaults, **org_args) # Build a cached list of Places loc_id_list = [l[1] for l in LOCATIONS.values()] place_list = Place.objects.filter( data_source=self.tprek_data_source ).filter(origin_id__in=loc_id_list) self.tprek_by_id = {p.origin_id: p.id for p in place_list} try: yso_data_source = DataSource.objects.get(id='yso') except DataSource.DoesNotExist: yso_data_source = None if yso_data_source: # Build a cached list of YSO keywords cat_id_set = set() for yso_val in YSO_KEYWORD_MAPS.values(): if isinstance(yso_val, tuple): for t_v in yso_val: cat_id_set.add('yso:' + t_v) else: cat_id_set.add('yso:' + yso_val) keyword_list = Keyword.objects.filter(data_source=yso_data_source).\ filter(id__in=cat_id_set) self.yso_by_id = {p.id: p for p in keyword_list} else: self.yso_by_id = {} if self.options['cached']: requests_cache.install_cache('helmet') self.cache = requests_cache.get_cache() else: self.cache = None
def get_cached_ids(self): with requests_cache.enabled(cache_name=GENAPI_CACHE, backend=CACHE_BACKEND): cached_object = requests_cache.get_cache() responses = [ cached_object.get_response(response) for response in cached_object.responses ] gen_ids = [] for url in [response.url for response in responses]: gen_id = re.search(r'{}(.*?)/'.format(self._data_endpoint), url) if gen_id is not None: gen_ids.append(gen_id.group(1)) return gen_ids
def setup(self): self.tprek_data_source = DataSource.objects.get(id='tprek') ds_args = dict(id=self.name) ds_defaults = dict(name='City of Espoo') self.data_source, _ = DataSource.objects.get_or_create(defaults=ds_defaults, **ds_args) org_args = dict(origin_id='kaupunki', data_source=self.data_source) org_defaults = dict(name='Espoon kaupunki') self.organization, _ = Organization.objects.get_or_create(defaults=org_defaults, **org_args) self._build_cache_places() self._cache_yso_keywords() if self.options['cached']: requests_cache.install_cache('espoo') self.cache = requests_cache.get_cache() else: self.cache = None
def setup(self): self.tprek_data_source = DataSource.objects.get(id='tprek') ds_args = dict(id=self.name) ds_defaults = dict(name='City of Espoo') self.data_source, _ = DataSource.objects.get_or_create(defaults=ds_defaults, **ds_args) org_args = dict(id='espoo:kaupunki') org_defaults = dict(name='Espoon kaupunki', data_source=self.data_source) self.organization, _ = Organization.objects.get_or_create(defaults=org_defaults, **org_args) self._build_cache_places() self._cache_yso_keywords() if self.options['cached']: requests_cache.install_cache('espoo') self.cache = requests_cache.get_cache() else: self.cache = None
def _is_url_in_cache(*args, **kwargs): """ Return True if request has been cached or False otherwise. """ # Only include allowed arguments for a PreparedRequest. allowed_args = inspect.getargspec( requests.models.PreparedRequest.prepare).args # self is in there as .prepare() is a method. allowed_args.remove('self') kwargs_cleaned = {} for key, value in dict(kwargs).items(): if key in allowed_args: kwargs_cleaned[key] = value prepared_request = _prepare(*args, **kwargs_cleaned) request_hash = _get_hash(prepared_request) try: return requests_cache.get_cache().has_key(request_hash) except AttributeError as e: # requests_cache not enabled if str(e) == "'Session' object has no attribute 'cache'": return False raise
def main(argv): parser = build_cli_parser() opts, args = parser.parse_args(argv) if not opts.url or not opts.token: parser.print_help() sys.exit(-1) global cache_file_name cache_file_name = opts.cache_name requests_cache.install_cache(cache_file_name, allowable_methods=('GET', 'POST')) global cbserverurl cbserverurl = opts.url # # build a cbapi object # global cb cb = cbapi.CbApi(opts.url, token=opts.token, ssl_verify=opts.ssl_verify) # # Run Tests to get cached responses # large_process_search() large_binary_search() test_sensors() test_watchlist() test_base_endpoints() test_feeds() cache = requests_cache.get_cache()
def extract_datareader(tickers, data_source="av-daily-adjusted", pause=None): """ Retrieve daily data with web.DataReader. Parameters ---------- tickers : str or list of strs Ticker or tickers to extract data_source : str, optional The data source ("quandl", "av-daily-adjusted", "iex", "fred", "ff", etc.) pause : float, optional Time, in seconds, to pause between consecutive queries of chunks Warns ----- On remote data error or invalid ticker. If using AlphaVantage API, extra pause is made in those cases (see notes). Yields ------ ticker : str Short name for the extract data data : dataframe A dataframe with all extract data metadata : dict Extra information about the extract data Notes ----- Index is forced to datetime. Attempt is made not to go over AlphaVantage API limits (maximum of five calls per minute for non-premium accounts). This means that some pauses are made, but they are not on the conservative side so be aware of remote data errors and try again latter. Examples -------- >>> for ticker, data, metadata in extract_datareader("^BVSP"): ... print(ticker, metadata) ^BVSP {'price_column': 'adjusted close'} >>> for ticker, data, metadata in extract_datareader(["PETR4.SAO", "ITUB4.SAO"], pause=1.0): ... print(ticker, metadata) PETR4.SAO {'price_column': 'adjusted close'} ITUB4.SAO {'price_column': 'adjusted close'} >>> for ticker, data, metadata in extract_datareader("BCB/11", data_source="quandl"): ... print(ticker, metadata) BCB/11 {'price_column': 'Value'} """ global _last_api_call if isinstance(tickers, str): tickers = [tickers] if pause is None: if data_source == "av-daily-adjusted": pause = 12.0 # max. 5 calls per minute for non-premium accounts else: pause = 1.0 extra_pause = 0.25 * pause if data_source == "quandl": metadata = {"price_column": "Value"} else: metadata = {"price_column": "adjusted close"} for ticker in tickers: end = timer() if end - _last_api_call > pause: _last_api_call = end else: time_interval = end - _last_api_call + np.random.uniform( 0.0, extra_pause) logging.info(f"Waiting {time_interval:.3f} seconds.") time.sleep(time_interval) _last_api_call = timer() logging.info(f"Attempting to retrieve {ticker}.") try: # TODO: support start_date and end_date data = web.DataReader(ticker, data_source) except ValueError as e: logging.warning(e) time_interval = np.random.uniform(0.0, 2.0 * pause) logging.info(f"Waiting {time_interval:.3f} extra seconds.") time.sleep(time_interval) continue except pandas_datareader._utils.RemoteDataError as e: logging.warning(f"Remote data error{e} for {ticker}.") time_interval = np.random.uniform(0.0, 2.0 * pause) logging.info(f"Waiting {time_interval:.3f} extra seconds.") time.sleep(time_interval) requests_cache.get_cache().remove_old_entries( datetime.datetime.now()) continue data.index = pd.to_datetime(data.index) yield ticker, data, metadata
from pandas import read_csv import time from datetime import datetime import json from biothings.utils.common import open_anyfile from biothings import config logger = config.logger import requests_cache expire_after = datetime.timedelta(days=7) requests_cache.install_cache('litcovidtopics_cache', expire_after=expire_after) logger.debug("requests_cache: %s", requests_cache.get_cache().responses.filename) def get_pmids(res): data = [] litcovid_data = res.text.split('\n')[34:] for line in litcovid_data: if line.startswith('#') or line.startswith('p'): continue if len(line.strip()) < 5: continue data.append('pmid' + line.split('\t')[0]) return (data) def get_topics():
def schema_validate(instance, options): """Perform STIX JSON Schema validation against the input JSON. Find the correct schema by looking at the 'type' property of the `instance` JSON object. Args: instance: A STIX JSON string. options: ValidationOptions instance with validation options for this validation run. Returns: A dictionary of validation results """ if 'type' not in instance: raise ValidationError( "Input must be an object with a 'type' property.") # Find and load the schema try: schema_path = find_schema(options.schema_dir, instance['type']) schema = load_schema(schema_path) except (KeyError, TypeError): # Assume a custom object with no schema try: schema_path = find_schema(options.schema_dir, 'core') schema = load_schema(schema_path) except (KeyError, TypeError): raise SchemaInvalidError("Cannot locate a schema for the object's " "type, nor the base schema (core.json).") # Validate against schemas for specific object types later if instance['type'] == 'bundle': schema['properties']['objects'] = { "objects": { "type": "array", "minItems": 1 } } elif instance['type'] == 'observed-data': schema['allOf'][1]['properties']['objects'] = { "objects": { "type": "object", "minProperties": 1 } } # Validate the schema first try: CustomDraft4Validator.check_schema(schema) except schema_exceptions.SchemaError as e: raise SchemaInvalidError('Invalid JSON schema: ' + str(e)) # Cache data from external sources; used in some checks if not options.no_cache: requests_cache.install_cache(expire_after=datetime.timedelta(weeks=1)) if options.refresh_cache: now = datetime.datetime.utcnow() requests_cache.get_cache().remove_old_entries(now) validator = load_validator(schema_path, schema, options) output.info("Running the following additional checks: %s." % ", ".join(x.__name__ for x in validator.get_list())) # Actual validation of JSON document try: some_errors = validator.iter_errors(instance) more_errors = validator.iter_errors_more(instance) warnings = validator.iter_errors_more(instance, False) if options.strict: chained_errors = chain(some_errors, more_errors, warnings) warnings = [] else: chained_errors = chain(some_errors, more_errors) warnings = [pretty_error(x, options.verbose) for x in warnings] except schema_exceptions.RefResolutionError: raise SchemaInvalidError('Invalid JSON schema: a JSON reference ' 'failed to resolve') # List of error generators and message prefixes (to denote which object the # error comes from) error_gens = [(chained_errors, '')] # Validate each object in a bundle separately if instance['type'] == 'bundle' and 'objects' in instance: for sdo in instance['objects']: object_validate(sdo, options, error_gens) else: object_validate(instance, options, error_gens) # Clear requests cache if commandline flag was set if options.clear_cache: now = datetime.datetime.utcnow() requests_cache.get_cache().remove_old_entries(now) # Prepare the list of errors error_list = [] for gen, prefix in error_gens: for error in gen: msg = prefix + pretty_error(error, options.verbose) error_list.append(SchemaError(msg)) if error_list: valid = False else: valid = True return ValidationResults(is_valid=valid, errors=error_list, warnings=warnings)
def install_cache(path='cache'): """Activates cache located at path""" try: requests_cache.get_cache() except AttributeError: requests_cache.install_cache(path)
def generate_csl_items( citekeys: list, manual_refs: dict = {}, requests_cache_path: Optional[str] = None, clear_requests_cache: Optional[bool] = False, ) -> list: """ General CSL (citeproc) items for standard_citekeys in citekeys_df. Parameters: - citekeys: list of standard_citekeys - manual_refs: mapping from standard_citekey to csl_item for manual references - requests_cache_path: path for the requests cache database. Passed as cache_name to `requests_cache.install_cache`. requests_cache may append an extension to this path, so it is not always the exact path to the cache. If None, do not use requests_cache. - clear_requests_cache: If True, clear the requests cache before generating citekey metadata. """ # Deduplicate citations citekeys = list(dict.fromkeys(citekeys)) # Install cache if requests_cache_path is not None: requests # require `import requests` in case this is essential for monkey patching by requests_cache. requests_cache.install_cache(requests_cache_path, include_get_headers=True) cache = requests_cache.get_cache() if clear_requests_cache: logging.info("Clearing requests-cache") requests_cache.clear() logging.info( f"requests-cache starting with {len(cache.responses)} cached responses" ) csl_items = list() failures = list() for standard_citekey in citekeys: if standard_citekey in manual_refs: csl_items.append(manual_refs[standard_citekey]) continue elif standard_citekey.startswith("raw:"): logging.error( f"CSL JSON Data with a standard_citekey of {standard_citekey!r} not found in manual-references.json. " "Metadata must be provided for raw citekeys.") failures.append(standard_citekey) try: csl_item = citekey_to_csl_item(standard_citekey) csl_items.append(csl_item) except Exception: logging.exception( f"Citeproc retrieval failure for {standard_citekey!r}") failures.append(standard_citekey) # Uninstall cache if requests_cache_path is not None: logging.info( f"requests-cache finished with {len(cache.responses)} cached responses" ) requests_cache.uninstall_cache() if failures: message = "CSL JSON Data retrieval failed for the following standardized citation keys:\n{}".format( "\n".join(failures)) logging.error(message) return csl_items
from datetime import datetime, timedelta import requests import requests_cache requests_cache.install_cache('demo_cache') CACHE = requests_cache.get_cache() from django.core.management.base import BaseCommand from django.utils.text import slugify from organisations.models import (Organisation, OrganisationDivision, OrganisationDivisionSet) from organisations.constants import PARENT_TO_CHILD_AREAS class Command(BaseCommand): skip_gss = [ "E12000007", "W08000001" # See import_welsh_areas ] BASE = "http://mapit.mysociety.org" def add_arguments(self, parser): parser.add_argument('--always_pick_option', action='store', type=int, default=0) def handle(self, **options): self.always_pick_option = int(options['always_pick_option'])
def delete_cached_url(url): """Deletes the given URL from the cache""" try: requests_cache.get_cache().delete_url(url) except AttributeError: pass
def clear_requests_cache(): """ Clears all cached responses. """ now = datetime.datetime.utcnow() requests_cache.get_cache().remove_old_entries(now)
import requests_cache requests_cache.configure("example_cache") def main(): # Once cached, delayed page will be taken from cache # redirects also handled for i in range(5): requests.get("http://httpbin.org/delay/2") r = requests.get("http://httpbin.org/redirect/5") print(r.text) # What about async? It's also supported! rs = [async.get("http://httpbin.org/delay/%s" % i) for i in range(5)] for r in async.map(rs): print(r.text) # And if we need to get fresh page or don't want to cache it? with requests_cache.disabled(): print(requests.get("http://httpbin.org/ip").text) # Debugging info about cache print(requests_cache.get_cache()) if __name__ == "__main__": t = time.time() main() print("Elapsed: %.3f seconds" % (time.time() - t))