def overpass_nsrid(nsrid='*', bbox_scandinavia = '[bbox=3.33984375,57.468589192089325,38.408203125,81.1203884020757]'): # bbox_scandinavia: limit request size, should contains all of Norway. filename = 'overpass_api_cache_%s_%s.xml' % (nsrid, bbox_scandinavia) filename = filename.replace(',', '') filename = filename.replace('*', '-') filename = filename.replace('[', '') filename = filename.replace(']', '') filename = filename.replace('=', '') logger.debug('cached overpass filename "%s"', filename) cached, outdated = file_util.cached_file(filename, old_age_days=1) if cached is not None and not(outdated): return cached r = request_session.get('http://www.overpass-api.de/api/xapi_meta?*[no-barnehage:nsrid=%s]%s' % (nsrid, bbox_scandinavia)) ret = r.content if r.status_code == 200: file_util.write_file(filename, ret) return ret else: logger.error('Invalid status code %s', r.status_code) return None
def barnehagefakta_get_json(nbr_id, old_age_days=5, cache_dir='data', keep_history=True): """Returns json string for the given nbr_id, caches result to file in directory cache_dir. If the cached result is older than old_age_days a new version is fetched. By default (if keep_history is True) changes in the response will detected and archived for further processing. In other words: (1) The first time this is called, barnehagefakta.no/api/barnehage/{nbr_id} is visited, the response is stored in cache_dir/barnehagefakta_no_nbrId{nbr_id}.json, the file may consist of only the string '404' if the request returned 404. (2a) Calling the function again with the same {nbr_id} within old_age_days, will simply return the content of the previously stored file (2b) Calling the function again with the same {nbr_id} after old_age_days has passed, will visit barnehagefakta again, refreshing and returning the local .json file. If the responce has changed from last time, the previous result is archived as cache_dir/barnehagefakta_no_nbrId{nbr_id}-{%Y-%m-%d}-OUTDATED.json May raise requests.ConnectionError if the connection fails. """ filename = os.path.join(cache_dir, 'barnehagefakta_no_nbrId{0}.json'.format(nbr_id)) cached, outdated = file_util.cached_file(filename, old_age_days) if cached is not None and not(outdated): return cached # else, else: url = 'http://barnehagefakta.no/api/barnehage/{0}'.format(nbr_id) # try: r = request_session.get(url) # except requests.ConnectionError as e: # logger.error('Could not connect to %s, try again later? %s', url, e) # return None logger.info('requested %s, got %s', url, r) ret = None if r.status_code == 200: ret = r.content elif r.status_code == 404: ret = '404' else: logger.error('Unknown status code %s', r.status_code) if ret is not None: if keep_history and cached is not None and not(equal_json_responses(ret, cached)): # avoid overriding previous cache d = datetime.utcnow() # note: the date will represent the date we discovered this to be outdated # which is not all that logical, but we just need a unique filename (assuming old_age_days > 1). logger.warning('Change in response for id=%s, archiving old result', nbr_id) file_util.rename_file(filename, d.strftime("-%Y-%m-%d-OUTDATED")) # move old one #return ret, cached file_util.write_file(filename, ret) # write return ret
def overpass_xml(xml, old_age_days=7, conflate_cache_filename=None): ''' Query the OverpassAPI with the given xml query, cache result for old_age_days days in file conflate_cache_filename (defaults to conflate_cache_<md5(xml)>.osm) ''' if conflate_cache_filename is None: filename = 'conflate_cache_' + hashlib.md5(xml).hexdigest() + '.osm' else: filename = conflate_cache_filename cached, outdated = file_util.cached_file(filename, old_age_days=old_age_days) if cached is not None and not(outdated): print 'Using overpass responce stored as "%s". Delete this file if you want an updated version' % filename return osmapis.OSMnsrid.from_xml(cached) o = osmapis.OverpassAPI() osm = o.interpreter(query) print 'Overpass responce stored as %s' % filename osm.save(filename) return osm
def wrapper_cached(self, callback, url, cache_filename, old_age_days=30, file_mode='', **kwargs): cached, outdated = file_util.cached_file(cache_filename, old_age_days, mode='r' + file_mode) if cached is not None and not (outdated): #logger.info('returning cached %s %s', cached is not None, not(outdated)) return cached # # Hmm, getting some half-downloaded files with requests library, lets try urllib instead # try: # urlretrieve(url, cache_filename) # except Exception as e: # try: os.remove(cache_filename) # ensure we don't have a half finished download # except: pass # logger.error('Failure downloading %s, %s', url, e) # return None # return file_util.read_file(cache_filename) try: r = callback(url, **kwargs) #self.get(url, **kwargs) except requests.ConnectionError as e: logger.error('Could not connect to %s, try again later? %s', url, e) return None logger.info('requested %s %s, got %s', url, cache_filename, r) if r.status_code == 200: ret = r.content file_util.write_file(cache_filename, ret, mode='w' + file_mode) return ret else: logger.error('Invalid status code %s', r.status_code) return None