Esempio n. 1
0
def overpass_nsrid(nsrid='*',
                   bbox_scandinavia = '[bbox=3.33984375,57.468589192089325,38.408203125,81.1203884020757]'):
     # bbox_scandinavia: limit request size, should contains all of Norway.

    filename = 'overpass_api_cache_%s_%s.xml' % (nsrid, bbox_scandinavia)
    filename = filename.replace(',', '')
    filename = filename.replace('*', '-')
    filename = filename.replace('[', '')
    filename = filename.replace(']', '')
    filename = filename.replace('=', '')
    logger.debug('cached overpass filename "%s"', filename)
    
    cached, outdated = file_util.cached_file(filename, old_age_days=1)
    if cached is not None and not(outdated):
        return cached
    
    r = request_session.get('http://www.overpass-api.de/api/xapi_meta?*[no-barnehage:nsrid=%s]%s' % (nsrid, bbox_scandinavia))
    ret = r.content

    if r.status_code == 200:
        file_util.write_file(filename, ret)
        return ret
    else:
        logger.error('Invalid status code %s', r.status_code)
        return None
def barnehagefakta_get_json(nbr_id, old_age_days=5, cache_dir='data', keep_history=True):
    """Returns json string for the given nbr_id, caches result to file in directory cache_dir. 
    If the cached result is older than old_age_days a new version is fetched.
    By default (if keep_history is True) changes in the response will detected 
    and archived for further processing. 

    In other words:
    (1) The first time this is called, barnehagefakta.no/api/barnehage/{nbr_id} is visited, 
    the response is stored in cache_dir/barnehagefakta_no_nbrId{nbr_id}.json, 
    the file may consist of only the string '404' if the request returned 404.
    (2a) Calling the function again with the same {nbr_id} within old_age_days, 
    will simply return the content of the previously stored file
    (2b) Calling the function again with the same {nbr_id} after old_age_days has passed,
    will visit barnehagefakta again, refreshing and returning the local .json file.
    If the responce has changed from last time, the previous result is archived as
    cache_dir/barnehagefakta_no_nbrId{nbr_id}-{%Y-%m-%d}-OUTDATED.json

    May raise requests.ConnectionError if the connection fails.
    """
    
    filename = os.path.join(cache_dir, 'barnehagefakta_no_nbrId{0}.json'.format(nbr_id))
    cached, outdated = file_util.cached_file(filename, old_age_days)
    if cached is not None and not(outdated):
        return cached
    # else, else:

    url = 'http://barnehagefakta.no/api/barnehage/{0}'.format(nbr_id)
    # try:
    r = request_session.get(url)
    # except requests.ConnectionError as e:
    #     logger.error('Could not connect to %s, try again later? %s', url, e)
    #     return None
    
    logger.info('requested %s, got %s', url, r)
    ret = None
    if r.status_code == 200:
        ret = r.content
    elif r.status_code == 404:
        ret = '404'
    else:
        logger.error('Unknown status code %s', r.status_code)
        
    if ret is not None:
        if keep_history and cached is not None and not(equal_json_responses(ret, cached)): # avoid overriding previous cache
            d = datetime.utcnow()
            # note: the date will represent the date we discovered this to be outdated
            # which is not all that logical, but we just need a unique filename (assuming old_age_days > 1).
            logger.warning('Change in response for id=%s, archiving old result', nbr_id)
            file_util.rename_file(filename, d.strftime("-%Y-%m-%d-OUTDATED")) # move old one
            #return ret, cached

        file_util.write_file(filename, ret) # write
    
    return ret
Esempio n. 3
0
def overpass_xml(xml, old_age_days=7, conflate_cache_filename=None):
    ''' Query the OverpassAPI with the given xml query, cache result for old_age_days days
    in file conflate_cache_filename (defaults to conflate_cache_<md5(xml)>.osm)
    '''
    if conflate_cache_filename is None:
        filename = 'conflate_cache_' + hashlib.md5(xml).hexdigest() + '.osm'
    else:
        filename = conflate_cache_filename
    
    cached, outdated = file_util.cached_file(filename, old_age_days=old_age_days)
    if cached is not None and not(outdated):
        print 'Using overpass responce stored as "%s". Delete this file if you want an updated version' % filename
        return osmapis.OSMnsrid.from_xml(cached)

    o = osmapis.OverpassAPI()
    osm = o.interpreter(query)

    print 'Overpass responce stored as %s' % filename
    osm.save(filename)

    return osm
Esempio n. 4
0
    def wrapper_cached(self,
                       callback,
                       url,
                       cache_filename,
                       old_age_days=30,
                       file_mode='',
                       **kwargs):
        cached, outdated = file_util.cached_file(cache_filename,
                                                 old_age_days,
                                                 mode='r' + file_mode)
        if cached is not None and not (outdated):
            #logger.info('returning cached %s %s', cached is not None, not(outdated))
            return cached

        # # Hmm, getting some half-downloaded files with requests library, lets try urllib instead
        # try:
        #     urlretrieve(url, cache_filename)
        # except Exception as e:
        #     try: os.remove(cache_filename) # ensure we don't have a half finished download
        #     except: pass
        #     logger.error('Failure downloading %s, %s', url, e)
        #     return None

        # return file_util.read_file(cache_filename)

        try:
            r = callback(url, **kwargs)  #self.get(url, **kwargs)
        except requests.ConnectionError as e:
            logger.error('Could not connect to %s, try again later? %s', url,
                         e)
            return None

        logger.info('requested %s %s, got %s', url, cache_filename, r)
        if r.status_code == 200:
            ret = r.content
            file_util.write_file(cache_filename, ret, mode='w' + file_mode)
            return ret
        else:
            logger.error('Invalid status code %s', r.status_code)
            return None