Example #1
0
def get_pause_duration(recursive_delay=5, default_duration=10):
    """
    Check the Overpass API status endpoint to determine how long to wait until
    next slot is available.

    Parameters
    ----------
    recursive_delay : int
        how long to wait between recursive calls if server is currently
        running a query
    default_duration : int
        if fatal error, function falls back on returning this value

    Returns
    -------
    pause_duration : int
    """
    try:
        response = requests.get('http://overpass-api.de/api/status')
        status = response.text.split('\n')[3]
        status_first_token = status.split(' ')[0]
    except Exception:
        # if status endpoint cannot be reached or output parsed, log error
        # and return default duration
        log('Unable to query http://overpass-api.de/api/status',
            level=lg.ERROR)
        return default_duration

    try:
        # if first token is numeric, it indicates the number of slots
        # available - no wait required
        available_slots = int(status_first_token)
        pause_duration = 0
    except Exception:
        # if first token is 'Slot', it tells you when your slot will be free
        if status_first_token == 'Slot':
            utc_time_str = status.split(' ')[3]
            utc_time = date_parser.parse(utc_time_str).replace(tzinfo=None)
            pause_duration = math.ceil(
                (utc_time - dt.datetime.utcnow()).total_seconds())
            pause_duration = max(pause_duration, 1)

        # if first token is 'Currently', it is currently running a query so
        # check back in recursive_delay seconds
        elif status_first_token == 'Currently':
            time.sleep(recursive_delay)
            pause_duration = get_pause_duration()

        else:
            # any other status is unrecognized - log an error and return
            # default duration
            log('Unrecognized server status: "{}"'.format(status),
                level=lg.ERROR)
            return default_duration

    return pause_duration
Example #2
0
def network_from_bbox(lat_min=None,
                      lng_min=None,
                      lat_max=None,
                      lng_max=None,
                      bbox=None,
                      network_type='walk',
                      two_way=True,
                      timeout=180,
                      memory=None,
                      max_query_area_size=50 * 1000 * 50 * 1000,
                      custom_osm_filter=None):
    """
    Make a graph network from a bounding lat/lon box composed of nodes and
    edges for use in Pandana street network accessibility calculations.
    You may either enter a lat/long box via the four lat_min,
    lng_min, lat_max, lng_max parameters or the bbox parameter as a tuple.

    Parameters
    ----------
    lat_min : float
        southern latitude of bounding box, if this parameter is used the bbox
        parameter should be None.
    lng_min : float
        eastern latitude of bounding box, if this parameter is used the bbox
        parameter should be None.
    lat_max : float
        northern longitude of bounding box, if this parameter is used the bbox
        parameter should be None.
    lng_max : float
        western longitude of bounding box, if this parameter is used the bbox
        parameter should be None.
    bbox : tuple
        Bounding box formatted as a 4 element tuple:
        (lng_max, lat_min, lng_min, lat_max)
        example: (-122.304611,37.798933,-122.263412,37.822802)
        a bbox can be extracted for an area using: the CSV format bbox from
        http://boundingbox.klokantech.com/. If this parameter is used the
        lat_min, lng_min, lat_max, lng_max parameters in this function
        should be None.
    network_type : {'walk', 'drive'}, optional
        Specify the network type where value of 'walk' includes roadways where
        pedestrians are allowed and pedestrian pathways and 'drive' includes
        driveable roadways. Default is walk.
    two_way : bool, optional
        Whether the routes are two-way. If True, node pairs will only
        occur once.
    timeout : int, optional
        the timeout interval for requests and to pass to Overpass API
    memory : int, optional
        server memory allocation size for the query, in bytes. If none,
        server will use its default allocation size
    max_query_area_size : float, optional
        max area for any part of the geometry, in the units the geometry is
        in: any polygon bigger will get divided up for multiple queries to
        Overpass API (default is 50,000 * 50,000 units (ie, 50km x 50km in
        area, if units are meters))
    remove_lcn : bool, optional
        remove low connectivity nodes from the resulting pandana network.
        This ensures the resulting network does not have nodes that are
        unconnected from the rest of the larger network
    custom_osm_filter : string, optional
        specify custom arguments for the query to OSM

    Returns
    -------
    nodesfinal, edgesfinal : pandas.DataFrame

    """

    start_time = time.time()

    if bbox is not None:
        assert isinstance(bbox, tuple) \
               and len(bbox) == 4, 'bbox must be a 4 element tuple'
        assert (lat_min is None) and (lng_min is None) and \
               (lat_max is None) and (lng_max is None), \
            'lat_min, lng_min, lat_max and lng_max must be None ' \
            'if you are using bbox'

        lng_max, lat_min, lng_min, lat_max = bbox

    assert lat_min is not None, 'lat_min cannot be None'
    assert lng_min is not None, 'lng_min cannot be None'
    assert lat_max is not None, 'lat_max cannot be None'
    assert lng_max is not None, 'lng_max cannot be None'
    assert isinstance(lat_min, float) and isinstance(lng_min, float) and \
        isinstance(lat_max, float) and isinstance(lng_max, float), \
        'lat_min, lng_min, lat_max, and lng_max must be floats'

    nodes, ways, waynodes = ways_in_bbox(
        lat_min=lat_min,
        lng_min=lng_min,
        lat_max=lat_max,
        lng_max=lng_max,
        network_type=network_type,
        timeout=timeout,
        memory=memory,
        max_query_area_size=max_query_area_size,
        custom_osm_filter=custom_osm_filter)
    log('Returning OSM data with {:,} nodes and {:,} ways...'.format(
        len(nodes), len(ways)))

    edgesfinal = node_pairs(nodes, ways, waynodes, two_way=two_way)

    # make the unique set of nodes that ended up in pairs
    node_ids = sorted(
        set(edgesfinal['from_id'].unique()).union(
            set(edgesfinal['to_id'].unique())))
    nodesfinal = nodes.loc[node_ids]
    nodesfinal = nodesfinal[['lon', 'lat']]
    nodesfinal.rename(columns={'lon': 'x', 'lat': 'y'}, inplace=True)
    nodesfinal['id'] = nodesfinal.index
    edgesfinal.rename(columns={'from_id': 'from', 'to_id': 'to'}, inplace=True)
    log('Returning processed graph with {:,} nodes and {:,} edges...'.format(
        len(nodesfinal), len(edgesfinal)))
    log('Completed OSM data download and Pandana node and edge table '
        'creation in {:,.2f} seconds'.format(time.time() - start_time))

    return nodesfinal, edgesfinal
Example #3
0
def node_pairs(nodes, ways, waynodes, two_way=True):
    """
    Create a table of node pairs with the distances between them.

    Parameters
    ----------
    nodes : pandas.DataFrame
        Must have 'lat' and 'lon' columns.
    ways : pandas.DataFrame
        Table of way metadata.
    waynodes : pandas.DataFrame
        Table linking way IDs to node IDs. Way IDs should be in the index,
        with a column called 'node_ids'.
    two_way : bool, optional
        Whether the routes are two-way. If True, node pairs will only
        occur once. Default is True.

    Returns
    -------
    pairs : pandas.DataFrame
        Will have columns of 'from_id', 'to_id', and 'distance'.
        The index will be a MultiIndex of (from id, to id).
        The distance metric is in meters.

    """
    start_time = time.time()

    def pairwise(l):
        return zip(islice(l, 0, len(l)), islice(l, 1, None))

    intersections = intersection_nodes(waynodes)
    waymap = waynodes.groupby(level=0, sort=False)
    pairs = []

    for id, row in ways.iterrows():
        nodes_in_way = waymap.get_group(id).node_id.values
        nodes_in_way = [x for x in nodes_in_way if x in intersections]

        if len(nodes_in_way) < 2:
            # no nodes to connect in this way
            continue

        for from_node, to_node in pairwise(nodes_in_way):
            if from_node != to_node:
                fn = nodes.loc[from_node]
                tn = nodes.loc[to_node]

                distance = round(gcd(fn.lat, fn.lon, tn.lat, tn.lon), 6)

                col_dict = {
                    'from_id': from_node,
                    'to_id': to_node,
                    'distance': distance
                }

                for tag in config.settings.keep_osm_tags:
                    try:
                        col_dict.update({tag: row[tag]})
                    except KeyError:
                        pass

                pairs.append(col_dict)

                if not two_way:

                    col_dict = {
                        'from_id': to_node,
                        'to_id': from_node,
                        'distance': distance
                    }

                    for tag in config.settings.keep_osm_tags:
                        try:
                            col_dict.update({tag: row[tag]})
                        except KeyError:
                            pass

                    pairs.append(col_dict)

    pairs = pd.DataFrame.from_records(pairs)
    pairs.index = pd.MultiIndex.from_arrays(
        [pairs['from_id'].values, pairs['to_id'].values])
    log('Edge node pairs completed. Took {:,.2f} seconds'.format(time.time() -
                                                                 start_time))

    return pairs
Example #4
0
def osm_net_download(lat_min=None,
                     lng_min=None,
                     lat_max=None,
                     lng_max=None,
                     network_type='walk',
                     timeout=180,
                     memory=None,
                     max_query_area_size=50 * 1000 * 50 * 1000,
                     custom_osm_filter=None):
    """
    Download OSM ways and nodes within a bounding box from the Overpass API.

    Parameters
    ----------
    lat_min : float
        southern latitude of bounding box
    lng_min : float
        eastern longitude of bounding box
    lat_max : float
        northern latitude of bounding box
    lng_max : float
        western longitude of bounding box
    network_type : string
        Specify the network type where value of 'walk' includes roadways
        where pedestrians are allowed and pedestrian
        pathways and 'drive' includes driveable roadways.
    timeout : int
        the timeout interval for requests and to pass to Overpass API
    memory : int
        server memory allocation size for the query, in bytes. If none,
        server will use its default allocation size
    max_query_area_size : float
        max area for any part of the geometry, in the units the geometry is
        in: any polygon bigger will get divided up for multiple queries to
        Overpass API (default is 50,000 * 50,000 units (ie, 50km x 50km in
        area, if units are meters))
    custom_osm_filter : string, optional
        specify custom arguments for the query to OSM

    Returns
    -------
    response_json : dict
    """

    # create a filter to exclude certain kinds of ways based on the requested
    # network_type
    if custom_osm_filter is None:
        request_filter = osm_filter(network_type)
    else:
        request_filter = custom_osm_filter

    response_jsons_list = []
    response_jsons = []

    # server memory allocation in bytes formatted for Overpass API query
    if memory is None:
        maxsize = ''
    else:
        maxsize = '[maxsize:{}]'.format(memory)

    # define the Overpass API query
    # way["highway"] denotes ways with highway keys and {filters} returns
    # ways with the requested key/value. the '>' makes it recurse so we get
    # ways and way nodes. maxsize is in bytes.

    # turn bbox into a polygon and project to local UTM
    polygon = Polygon([(lng_max, lat_min), (lng_min, lat_min),
                       (lng_min, lat_max), (lng_max, lat_max)])
    geometry_proj, crs_proj = project_geometry(polygon,
                                               crs={'init': 'epsg:4326'})

    # subdivide the bbox area poly if it exceeds the max area size
    # (in meters), then project back to WGS84
    geometry_proj_consolidated_subdivided = consolidate_subdivide_geometry(
        geometry_proj, max_query_area_size=max_query_area_size)
    geometry, crs = project_geometry(geometry_proj_consolidated_subdivided,
                                     crs=crs_proj,
                                     to_latlong=True)
    log('Requesting network data within bounding box from Overpass API '
        'in {:,} request(s)'.format(len(geometry)))
    start_time = time.time()

    # loop through each polygon in the geometry
    for poly in geometry:
        # represent bbox as lng_max, lat_min, lng_min, lat_max and round
        # lat-longs to 8 decimal places to create
        # consistent URL strings
        lng_max, lat_min, lng_min, lat_max = poly.bounds
        query_template = '[out:json][timeout:{timeout}]{maxsize};' \
                         '(way["highway"]' \
                         '{filters}({lat_min:.8f},{lng_max:.8f},' \
                         '{lat_max:.8f},{lng_min:.8f});>;);out;'
        query_str = query_template.format(lat_max=lat_max,
                                          lat_min=lat_min,
                                          lng_min=lng_min,
                                          lng_max=lng_max,
                                          filters=request_filter,
                                          timeout=timeout,
                                          maxsize=maxsize)
        response_json = overpass_request(data={'data': query_str},
                                         timeout=timeout)

        response_jsons_list.append(response_json)

    log('Downloaded OSM network data within bounding box from Overpass '
        'API in {:,} request(s) and'
        ' {:,.2f} seconds'.format(len(geometry),
                                  time.time() - start_time))

    # stitch together individual json results
    for json in response_jsons_list:
        try:
            response_jsons.extend(json['elements'])
        except KeyError:
            pass

    # remove duplicate records resulting from the json stitching
    start_time = time.time()
    record_count = len(response_jsons)

    response_jsons_df = pd.DataFrame.from_records(response_jsons, index='id')
    nodes = response_jsons_df[response_jsons_df['type'] == 'node']
    nodes = nodes[~nodes.index.duplicated(keep='first')]
    ways = response_jsons_df[response_jsons_df['type'] == 'way']
    ways = ways[~ways.index.duplicated(keep='first')]
    response_jsons_df = pd.concat([nodes, ways], axis=0)
    response_jsons_df.reset_index(inplace=True)
    response_jsons = response_jsons_df.to_dict(orient='records')
    if record_count - len(response_jsons) > 0:
        log('{:,} duplicate records removed. Took {:,.2f} seconds'.format(
            record_count - len(response_jsons),
            time.time() - start_time))

    return {'elements': response_jsons}
Example #5
0
def project_gdf(gdf, to_latlong=False, verbose=False):
    """
    Project a GeoDataFrame to the UTM zone appropriate for its geometries'
    centroid. The calculation works well for most latitudes,
    however it will not work well for some far northern locations.

    Parameters
    ----------
    gdf : GeoDataFrame
        the gdf to be projected to UTM
    to_latlong : bool
        if True, projects to WGS84 instead of to UTM

    Returns
    -------
    gdf : GeoDataFrame
    """
    assert len(gdf) > 0, 'You cannot project an empty GeoDataFrame.'
    start_time = time.time()

    if to_latlong:
        # if to_latlong is True, project the gdf to WGS84
        latlong_crs = {'init': 'epsg:4326'}
        projected_gdf = gdf.to_crs(latlong_crs)
        if not hasattr(gdf, 'name'):
            gdf.name = 'unnamed'
        if verbose:
            log('Projected the GeoDataFrame "{}" to EPSG 4326 in {:,.2f} '
                'seconds'.format(gdf.name,
                                 time.time() - start_time))
    else:
        # else, project the gdf to UTM
        # if GeoDataFrame is already in UTM, return it
        if (gdf.crs is not None) and ('proj' in gdf.crs) \
                and (gdf.crs['proj'] == 'utm'):
            return gdf

        # calculate the centroid of the union of all the geometries in the
        # GeoDataFrame
        avg_longitude = gdf['geometry'].unary_union.centroid.x

        # calculate the UTM zone from this avg longitude and define the
        # UTM CRS to project
        utm_zone = int(math.floor((avg_longitude + 180) / 6.) + 1)
        utm_crs = {
            'datum': 'NAD83',
            'ellps': 'GRS80',
            'proj': 'utm',
            'zone': utm_zone,
            'units': 'm'
        }

        # project the GeoDataFrame to the UTM CRS
        projected_gdf = gdf.to_crs(utm_crs)
        if not hasattr(gdf, 'name'):
            gdf.name = 'unnamed'
        if verbose:
            log('Projected the GeoDataFrame "{}" to UTM-{} in {:,.2f} '
                'seconds'.format(gdf.name, utm_zone,
                                 time.time() - start_time))

    projected_gdf.name = gdf.name
    return projected_gdf
Example #6
0
def overpass_request(data,
                     pause_duration=None,
                     timeout=180,
                     error_pause_duration=None):
    """
    Send a request to the Overpass API via HTTP POST and return the
    JSON response

    Parameters
    ----------
    data : dict or OrderedDict
        key-value pairs of parameters to post to Overpass API
    pause_duration : int
        how long to pause in seconds before requests, if None, will query
        Overpass API status endpoint
        to find when next slot is available
    timeout : int
        the timeout interval for the requests library
    error_pause_duration : int
        how long to pause in seconds before re-trying requests if error

    Returns
    -------
    response_json : dict
    """

    # define the Overpass API URL, then construct a GET-style URL
    url = 'http://www.overpass-api.de/api/interpreter'

    start_time = time.time()
    log('Posting to {} with timeout={}, "{}"'.format(url, timeout, data))
    response = requests.post(url, data=data, timeout=timeout)

    # get the response size and the domain, log result
    size_kb = len(response.content) / 1000.
    domain = re.findall(r'//(?s)(.*?)/', url)[0]
    log('Downloaded {:,.1f}KB from {} in {:,.2f} seconds'.format(
        size_kb, domain,
        time.time() - start_time))

    try:
        response_json = response.json()
        if 'remark' in response_json:
            log('Server remark: "{}"'.format(response_json['remark'],
                                             level=lg.WARNING))

    except Exception:
        # 429 = 'too many requests' and 504 = 'gateway timeout' from server
        # overload. handle these errors by recursively
        # calling overpass_request until a valid response is achieved
        if response.status_code in [429, 504]:
            # pause for error_pause_duration seconds before re-trying request
            if error_pause_duration is None:
                error_pause_duration = get_pause_duration()
            log('Server at {} returned status code {} and no JSON data. '
                'Re-trying request in {:.2f} seconds.'.format(
                    domain, response.status_code, error_pause_duration),
                level=lg.WARNING)
            time.sleep(error_pause_duration)
            response_json = overpass_request(data=data,
                                             pause_duration=pause_duration,
                                             timeout=timeout)

        # else, this was an unhandled status_code, throw an exception
        else:
            log('Server at {} returned status code {} and no JSON data'.format(
                domain, response.status_code),
                level=lg.ERROR)
            raise Exception('Server returned no JSON data.\n{} {}\n{}'.format(
                response, response.reason, response.text))

    return response_json
Example #7
0
def test_logging():
    log('test debug message', level=lg.DEBUG)
    log('test info message', level=lg.INFO)
    log('test warning message', level=lg.WARNING)
    log('test error message', level=lg.ERROR)