def get_pause_duration(recursive_delay=5, default_duration=10): """ Check the Overpass API status endpoint to determine how long to wait until next slot is available. Parameters ---------- recursive_delay : int how long to wait between recursive calls if server is currently running a query default_duration : int if fatal error, function falls back on returning this value Returns ------- pause_duration : int """ try: response = requests.get('http://overpass-api.de/api/status') status = response.text.split('\n')[3] status_first_token = status.split(' ')[0] except Exception: # if status endpoint cannot be reached or output parsed, log error # and return default duration log('Unable to query http://overpass-api.de/api/status', level=lg.ERROR) return default_duration try: # if first token is numeric, it indicates the number of slots # available - no wait required available_slots = int(status_first_token) pause_duration = 0 except Exception: # if first token is 'Slot', it tells you when your slot will be free if status_first_token == 'Slot': utc_time_str = status.split(' ')[3] utc_time = date_parser.parse(utc_time_str).replace(tzinfo=None) pause_duration = math.ceil( (utc_time - dt.datetime.utcnow()).total_seconds()) pause_duration = max(pause_duration, 1) # if first token is 'Currently', it is currently running a query so # check back in recursive_delay seconds elif status_first_token == 'Currently': time.sleep(recursive_delay) pause_duration = get_pause_duration() else: # any other status is unrecognized - log an error and return # default duration log('Unrecognized server status: "{}"'.format(status), level=lg.ERROR) return default_duration return pause_duration
def network_from_bbox(lat_min=None, lng_min=None, lat_max=None, lng_max=None, bbox=None, network_type='walk', two_way=True, timeout=180, memory=None, max_query_area_size=50 * 1000 * 50 * 1000, custom_osm_filter=None): """ Make a graph network from a bounding lat/lon box composed of nodes and edges for use in Pandana street network accessibility calculations. You may either enter a lat/long box via the four lat_min, lng_min, lat_max, lng_max parameters or the bbox parameter as a tuple. Parameters ---------- lat_min : float southern latitude of bounding box, if this parameter is used the bbox parameter should be None. lng_min : float eastern latitude of bounding box, if this parameter is used the bbox parameter should be None. lat_max : float northern longitude of bounding box, if this parameter is used the bbox parameter should be None. lng_max : float western longitude of bounding box, if this parameter is used the bbox parameter should be None. bbox : tuple Bounding box formatted as a 4 element tuple: (lng_max, lat_min, lng_min, lat_max) example: (-122.304611,37.798933,-122.263412,37.822802) a bbox can be extracted for an area using: the CSV format bbox from http://boundingbox.klokantech.com/. If this parameter is used the lat_min, lng_min, lat_max, lng_max parameters in this function should be None. network_type : {'walk', 'drive'}, optional Specify the network type where value of 'walk' includes roadways where pedestrians are allowed and pedestrian pathways and 'drive' includes driveable roadways. Default is walk. two_way : bool, optional Whether the routes are two-way. If True, node pairs will only occur once. timeout : int, optional the timeout interval for requests and to pass to Overpass API memory : int, optional server memory allocation size for the query, in bytes. If none, server will use its default allocation size max_query_area_size : float, optional max area for any part of the geometry, in the units the geometry is in: any polygon bigger will get divided up for multiple queries to Overpass API (default is 50,000 * 50,000 units (ie, 50km x 50km in area, if units are meters)) remove_lcn : bool, optional remove low connectivity nodes from the resulting pandana network. This ensures the resulting network does not have nodes that are unconnected from the rest of the larger network custom_osm_filter : string, optional specify custom arguments for the query to OSM Returns ------- nodesfinal, edgesfinal : pandas.DataFrame """ start_time = time.time() if bbox is not None: assert isinstance(bbox, tuple) \ and len(bbox) == 4, 'bbox must be a 4 element tuple' assert (lat_min is None) and (lng_min is None) and \ (lat_max is None) and (lng_max is None), \ 'lat_min, lng_min, lat_max and lng_max must be None ' \ 'if you are using bbox' lng_max, lat_min, lng_min, lat_max = bbox assert lat_min is not None, 'lat_min cannot be None' assert lng_min is not None, 'lng_min cannot be None' assert lat_max is not None, 'lat_max cannot be None' assert lng_max is not None, 'lng_max cannot be None' assert isinstance(lat_min, float) and isinstance(lng_min, float) and \ isinstance(lat_max, float) and isinstance(lng_max, float), \ 'lat_min, lng_min, lat_max, and lng_max must be floats' nodes, ways, waynodes = ways_in_bbox( lat_min=lat_min, lng_min=lng_min, lat_max=lat_max, lng_max=lng_max, network_type=network_type, timeout=timeout, memory=memory, max_query_area_size=max_query_area_size, custom_osm_filter=custom_osm_filter) log('Returning OSM data with {:,} nodes and {:,} ways...'.format( len(nodes), len(ways))) edgesfinal = node_pairs(nodes, ways, waynodes, two_way=two_way) # make the unique set of nodes that ended up in pairs node_ids = sorted( set(edgesfinal['from_id'].unique()).union( set(edgesfinal['to_id'].unique()))) nodesfinal = nodes.loc[node_ids] nodesfinal = nodesfinal[['lon', 'lat']] nodesfinal.rename(columns={'lon': 'x', 'lat': 'y'}, inplace=True) nodesfinal['id'] = nodesfinal.index edgesfinal.rename(columns={'from_id': 'from', 'to_id': 'to'}, inplace=True) log('Returning processed graph with {:,} nodes and {:,} edges...'.format( len(nodesfinal), len(edgesfinal))) log('Completed OSM data download and Pandana node and edge table ' 'creation in {:,.2f} seconds'.format(time.time() - start_time)) return nodesfinal, edgesfinal
def node_pairs(nodes, ways, waynodes, two_way=True): """ Create a table of node pairs with the distances between them. Parameters ---------- nodes : pandas.DataFrame Must have 'lat' and 'lon' columns. ways : pandas.DataFrame Table of way metadata. waynodes : pandas.DataFrame Table linking way IDs to node IDs. Way IDs should be in the index, with a column called 'node_ids'. two_way : bool, optional Whether the routes are two-way. If True, node pairs will only occur once. Default is True. Returns ------- pairs : pandas.DataFrame Will have columns of 'from_id', 'to_id', and 'distance'. The index will be a MultiIndex of (from id, to id). The distance metric is in meters. """ start_time = time.time() def pairwise(l): return zip(islice(l, 0, len(l)), islice(l, 1, None)) intersections = intersection_nodes(waynodes) waymap = waynodes.groupby(level=0, sort=False) pairs = [] for id, row in ways.iterrows(): nodes_in_way = waymap.get_group(id).node_id.values nodes_in_way = [x for x in nodes_in_way if x in intersections] if len(nodes_in_way) < 2: # no nodes to connect in this way continue for from_node, to_node in pairwise(nodes_in_way): if from_node != to_node: fn = nodes.loc[from_node] tn = nodes.loc[to_node] distance = round(gcd(fn.lat, fn.lon, tn.lat, tn.lon), 6) col_dict = { 'from_id': from_node, 'to_id': to_node, 'distance': distance } for tag in config.settings.keep_osm_tags: try: col_dict.update({tag: row[tag]}) except KeyError: pass pairs.append(col_dict) if not two_way: col_dict = { 'from_id': to_node, 'to_id': from_node, 'distance': distance } for tag in config.settings.keep_osm_tags: try: col_dict.update({tag: row[tag]}) except KeyError: pass pairs.append(col_dict) pairs = pd.DataFrame.from_records(pairs) pairs.index = pd.MultiIndex.from_arrays( [pairs['from_id'].values, pairs['to_id'].values]) log('Edge node pairs completed. Took {:,.2f} seconds'.format(time.time() - start_time)) return pairs
def osm_net_download(lat_min=None, lng_min=None, lat_max=None, lng_max=None, network_type='walk', timeout=180, memory=None, max_query_area_size=50 * 1000 * 50 * 1000, custom_osm_filter=None): """ Download OSM ways and nodes within a bounding box from the Overpass API. Parameters ---------- lat_min : float southern latitude of bounding box lng_min : float eastern longitude of bounding box lat_max : float northern latitude of bounding box lng_max : float western longitude of bounding box network_type : string Specify the network type where value of 'walk' includes roadways where pedestrians are allowed and pedestrian pathways and 'drive' includes driveable roadways. timeout : int the timeout interval for requests and to pass to Overpass API memory : int server memory allocation size for the query, in bytes. If none, server will use its default allocation size max_query_area_size : float max area for any part of the geometry, in the units the geometry is in: any polygon bigger will get divided up for multiple queries to Overpass API (default is 50,000 * 50,000 units (ie, 50km x 50km in area, if units are meters)) custom_osm_filter : string, optional specify custom arguments for the query to OSM Returns ------- response_json : dict """ # create a filter to exclude certain kinds of ways based on the requested # network_type if custom_osm_filter is None: request_filter = osm_filter(network_type) else: request_filter = custom_osm_filter response_jsons_list = [] response_jsons = [] # server memory allocation in bytes formatted for Overpass API query if memory is None: maxsize = '' else: maxsize = '[maxsize:{}]'.format(memory) # define the Overpass API query # way["highway"] denotes ways with highway keys and {filters} returns # ways with the requested key/value. the '>' makes it recurse so we get # ways and way nodes. maxsize is in bytes. # turn bbox into a polygon and project to local UTM polygon = Polygon([(lng_max, lat_min), (lng_min, lat_min), (lng_min, lat_max), (lng_max, lat_max)]) geometry_proj, crs_proj = project_geometry(polygon, crs={'init': 'epsg:4326'}) # subdivide the bbox area poly if it exceeds the max area size # (in meters), then project back to WGS84 geometry_proj_consolidated_subdivided = consolidate_subdivide_geometry( geometry_proj, max_query_area_size=max_query_area_size) geometry, crs = project_geometry(geometry_proj_consolidated_subdivided, crs=crs_proj, to_latlong=True) log('Requesting network data within bounding box from Overpass API ' 'in {:,} request(s)'.format(len(geometry))) start_time = time.time() # loop through each polygon in the geometry for poly in geometry: # represent bbox as lng_max, lat_min, lng_min, lat_max and round # lat-longs to 8 decimal places to create # consistent URL strings lng_max, lat_min, lng_min, lat_max = poly.bounds query_template = '[out:json][timeout:{timeout}]{maxsize};' \ '(way["highway"]' \ '{filters}({lat_min:.8f},{lng_max:.8f},' \ '{lat_max:.8f},{lng_min:.8f});>;);out;' query_str = query_template.format(lat_max=lat_max, lat_min=lat_min, lng_min=lng_min, lng_max=lng_max, filters=request_filter, timeout=timeout, maxsize=maxsize) response_json = overpass_request(data={'data': query_str}, timeout=timeout) response_jsons_list.append(response_json) log('Downloaded OSM network data within bounding box from Overpass ' 'API in {:,} request(s) and' ' {:,.2f} seconds'.format(len(geometry), time.time() - start_time)) # stitch together individual json results for json in response_jsons_list: try: response_jsons.extend(json['elements']) except KeyError: pass # remove duplicate records resulting from the json stitching start_time = time.time() record_count = len(response_jsons) response_jsons_df = pd.DataFrame.from_records(response_jsons, index='id') nodes = response_jsons_df[response_jsons_df['type'] == 'node'] nodes = nodes[~nodes.index.duplicated(keep='first')] ways = response_jsons_df[response_jsons_df['type'] == 'way'] ways = ways[~ways.index.duplicated(keep='first')] response_jsons_df = pd.concat([nodes, ways], axis=0) response_jsons_df.reset_index(inplace=True) response_jsons = response_jsons_df.to_dict(orient='records') if record_count - len(response_jsons) > 0: log('{:,} duplicate records removed. Took {:,.2f} seconds'.format( record_count - len(response_jsons), time.time() - start_time)) return {'elements': response_jsons}
def project_gdf(gdf, to_latlong=False, verbose=False): """ Project a GeoDataFrame to the UTM zone appropriate for its geometries' centroid. The calculation works well for most latitudes, however it will not work well for some far northern locations. Parameters ---------- gdf : GeoDataFrame the gdf to be projected to UTM to_latlong : bool if True, projects to WGS84 instead of to UTM Returns ------- gdf : GeoDataFrame """ assert len(gdf) > 0, 'You cannot project an empty GeoDataFrame.' start_time = time.time() if to_latlong: # if to_latlong is True, project the gdf to WGS84 latlong_crs = {'init': 'epsg:4326'} projected_gdf = gdf.to_crs(latlong_crs) if not hasattr(gdf, 'name'): gdf.name = 'unnamed' if verbose: log('Projected the GeoDataFrame "{}" to EPSG 4326 in {:,.2f} ' 'seconds'.format(gdf.name, time.time() - start_time)) else: # else, project the gdf to UTM # if GeoDataFrame is already in UTM, return it if (gdf.crs is not None) and ('proj' in gdf.crs) \ and (gdf.crs['proj'] == 'utm'): return gdf # calculate the centroid of the union of all the geometries in the # GeoDataFrame avg_longitude = gdf['geometry'].unary_union.centroid.x # calculate the UTM zone from this avg longitude and define the # UTM CRS to project utm_zone = int(math.floor((avg_longitude + 180) / 6.) + 1) utm_crs = { 'datum': 'NAD83', 'ellps': 'GRS80', 'proj': 'utm', 'zone': utm_zone, 'units': 'm' } # project the GeoDataFrame to the UTM CRS projected_gdf = gdf.to_crs(utm_crs) if not hasattr(gdf, 'name'): gdf.name = 'unnamed' if verbose: log('Projected the GeoDataFrame "{}" to UTM-{} in {:,.2f} ' 'seconds'.format(gdf.name, utm_zone, time.time() - start_time)) projected_gdf.name = gdf.name return projected_gdf
def overpass_request(data, pause_duration=None, timeout=180, error_pause_duration=None): """ Send a request to the Overpass API via HTTP POST and return the JSON response Parameters ---------- data : dict or OrderedDict key-value pairs of parameters to post to Overpass API pause_duration : int how long to pause in seconds before requests, if None, will query Overpass API status endpoint to find when next slot is available timeout : int the timeout interval for the requests library error_pause_duration : int how long to pause in seconds before re-trying requests if error Returns ------- response_json : dict """ # define the Overpass API URL, then construct a GET-style URL url = 'http://www.overpass-api.de/api/interpreter' start_time = time.time() log('Posting to {} with timeout={}, "{}"'.format(url, timeout, data)) response = requests.post(url, data=data, timeout=timeout) # get the response size and the domain, log result size_kb = len(response.content) / 1000. domain = re.findall(r'//(?s)(.*?)/', url)[0] log('Downloaded {:,.1f}KB from {} in {:,.2f} seconds'.format( size_kb, domain, time.time() - start_time)) try: response_json = response.json() if 'remark' in response_json: log('Server remark: "{}"'.format(response_json['remark'], level=lg.WARNING)) except Exception: # 429 = 'too many requests' and 504 = 'gateway timeout' from server # overload. handle these errors by recursively # calling overpass_request until a valid response is achieved if response.status_code in [429, 504]: # pause for error_pause_duration seconds before re-trying request if error_pause_duration is None: error_pause_duration = get_pause_duration() log('Server at {} returned status code {} and no JSON data. ' 'Re-trying request in {:.2f} seconds.'.format( domain, response.status_code, error_pause_duration), level=lg.WARNING) time.sleep(error_pause_duration) response_json = overpass_request(data=data, pause_duration=pause_duration, timeout=timeout) # else, this was an unhandled status_code, throw an exception else: log('Server at {} returned status code {} and no JSON data'.format( domain, response.status_code), level=lg.ERROR) raise Exception('Server returned no JSON data.\n{} {}\n{}'.format( response, response.reason, response.text)) return response_json
def test_logging(): log('test debug message', level=lg.DEBUG) log('test info message', level=lg.INFO) log('test warning message', level=lg.WARNING) log('test error message', level=lg.ERROR)