Beispiel #1
0
def invalid_multipoly_handler(gdf, relation, way_ids):
    """
    Handles invalid multipolygon geometries when there exists e.g. a feature without 
    geometry (geometry == NaN)

    Parameters
    ----------

    gdf : gpd.GeoDataFrame
        GeoDataFrame with Polygon geometries that should be converted into a MultiPolygon object.
    relation : dict
        OSM 'relation' dictionary
    way_ids : list
        A list of 'way' ids that should be converted into a MultiPolygon object. 
    """

    try:
        gdf_clean = gdf.dropna(subset=['geometry'])
        multipoly = MultiPolygon(list(gdf_clean['geometry']))
        return multipoly

    except Exception:
        log("Invalid geometry at relation id %s.\nWay-ids of the invalid MultiPolygon:"
            % (relation['id'], str(way_ids)))
        return None
def parse_polygonal_poi(coords, response):
    """
    Parse areal POI way polygons from OSM node coords.
    Parameters
    ----------
    coords : dict
        dict of node IDs and their lat, lon coordinates
    Returns
    -------
    dict of POIs containing each's nodes, polygon geometry, and osmid
    """

    if 'type' in response and response['type'] == 'way':
        nodes = response['nodes']
        try:
            polygon = Polygon([(coords[node]['lon'], coords[node]['lat'])
                               for node in nodes])

            poi = {
                'nodes': nodes,
                'geometry': polygon,
                'osmid': response['id']
            }

            if 'tags' in response:
                for tag in response['tags']:
                    poi[tag] = response['tags'][tag]
            return poi

        except Exception:
            log('Polygon has invalid geometry: {}'.format(nodes))

    return None
Beispiel #3
0
def parse_osm_node(response):
    """
    Parse points from OSM nodes.

    Parameters
    ----------
    response : JSON 
        Nodes from OSM response.  

    Returns
    -------
    Dict of vertex IDs and their lat, lon coordinates.
    """

    try:
        point = Point(response['lon'], response['lat'])

        poi = {'osmid': response['id'], 'geometry': point}
        if 'tags' in response:
            for tag in response['tags']:
                poi[tag] = response['tags'][tag]

    except Exception:
        log('Point has invalid geometry: {}'.format(response['id']))

    return poi
Beispiel #4
0
    def add_footprints(self, osm_data, category):
        """
        Plot a GeoDataFrame of footprints.

        Parameters
        ----------
        osm_data : JSON
            OSM footprint data for
        category : string
            item category to plot
        ax : axes object
            axes object to add footprints to

        Returns
        -------
        ax: axes object
        """

        osm_elements = osm_data.get(category).get('elements')
        plot_format = self.plot_format_dict[category]
        if osm_elements is None:
            print('No OSM elements to plot')
            return self

        # Generate GeoPandas DataFrame
        gdf = self.create_footprint_gdf(osm_elements)
        if gdf is None:
            log('Empty GDF for {}'.format(category))
            return self

        # Store existing xlims and ylims to reset the adjusted lims later
        xlim = self.ax.get_xlim()
        ylim = self.ax.get_ylim()

        # extract each polygon as a descartes patch, and add to a matplotlib patch
        #collection
        patches = []
        for geometry in gdf['geometry']:
            if isinstance(geometry, Polygon):
                patches.append(PolygonPatch(geometry))
            elif isinstance(geometry, MultiPolygon):
                for subpolygon in geometry:  #if geometry is multipolygon, go through each constituent subpolygon
                    patches.append(PolygonPatch(subpolygon))

        pc = PatchCollection(patches,
                             facecolor=plot_format['facecolor'],
                             edgecolor=plot_format['edge_color'],
                             linewidth=plot_format['linewidth'],
                             alpha=plot_format['alpha'],
                             zorder=plot_format['zorder'])
        self.ax.add_collection(pc)

        # Reset the lims to original lims
        self.ax.set_xlim(xlim)
        self.ax.set_ylim(ylim)

        return self
Beispiel #5
0
    def multithreading_get_venues(self, category_dict, bbox_mapping):
        '''
        Multi-processes the get_venues Foursquare API requests

        Parameters
        ----------
        categories_dict : dict
            Category dictionary from user profile
        bbox : tuple of tuples
            Tuple with (0) south-west and (1) north_east lat/lon coordinates

        Returns
        -------
        List of all venues across subcategories in category dictionary
        '''

        log('Multiprocessing get_venue search with Foursquare API...')
        start_time = time.time()

        # Function to multi-process
        def single_thread_get_venues(category, subcategories, bb):
            try:
                result = self.fs_client.venues.search(params={'intent': self.intent,
                                                      'sw': ','.join(str(i) for i in bb[0]),
                                                      'ne': ','.join(str(i) for i in bb[1]),
                                                      'categoryId': ','.join(i for i in subcategories.values()),
                                                      'limit': self.limit})
                for venue in result.get('venues', None):
                    venue['recommendation_category'] = category
            except:
                result = {}
                log('get_venues request fails for category {}...'.format(category))
            return_dict[category] = result

        jobs = []
        manager = mp.Manager()
        return_dict = manager.dict()
        for category, subcategories in category_dict.items():
            thread = threading.Thread(name=category,
                                      target=single_thread_get_venues,
                                      args=(category, subcategories,
                                            bbox_mapping[category]))
            jobs.append(thread)
            thread.start()

        for j in jobs:
            j.join()

        # Get process results from the output queue
        venues = [venue
                  for category in return_dict.values()
                  for venue in category.get('venues', None)]

        log('Downloaded {:,} venues in {:,.2f} seconds'.format(len(venues),
                                                                time.time()-start_time))

        return venues
Beispiel #6
0
def get_extract_population_data(city_ref, data_source, pop_shapefile=None, pop_data_file=None, to_crs={'init': 'epsg:4326'}, df_osm_built=None):
	"""
	Get data population extract of desired data source for input city
	The population data frame is projected to the desired coordiante reference system
	Stores the extracted shapefile
	Returns the stored population data for input 'data source' and 'city reference' if it was previously stored

	Parameters
	----------
	city_ref : string
		name of input city
	data_source : string
		desired population data source
	pop_shapefile : string
		population count shapefile
	pop_data_file : string
		population data additional file (required for INSEE format)
	to_crs : dict
		desired coordinate reference system
	df_osm_built : geopandas.GeoDataFrame
		buildings for input region of interest

	Returns
	----------
	geopandas.GeoDataFrame
		returns the extracted population data
	"""
	# Input data source type given?
	assert( data_source in DATA_SOURCES )

	# Population extract exists?
	if ( os.path.exists( get_population_extract_filename(city_ref, data_source) ) ):
		log("Population extract exists for input city: "+city_ref)
		return gpd.read_file( get_population_extract_filename(city_ref, data_source) )

	# Input shape given?
	assert( not ( (np.all(df_osm_built is None) ) and (polygon is None) ) )
	# Input population shapefile given?
	assert( not pop_shapefile is None )
	# All input files given?
	assert( not ( (data_source == 'insee') and (pop_data_file is None) ) )

	# Get buildings convex hull
	polygon = GeometryCollection( df_osm_built.geometry.values.tolist() ).convex_hull
	# Convert to geo-dataframe with defined CRS
	poly_gdf = gpd.GeoDataFrame([polygon], columns=["geometry"], crs=df_osm_built.crs)
	
	# Compute extract
	df_pop = get_population_df(pop_shapefile, pop_data_file, data_source, to_crs, poly_gdf)
	
	# Save to shapefile
	df_pop.to_file( get_population_extract_filename(city_ref, data_source), driver='ESRI Shapefile' )
	return df_pop	
Beispiel #7
0
 def single_thread_get_venues(category, subcategories, bb):
     try:
         result = self.fs_client.venues.search(params={'intent': self.intent,
                                               'sw': ','.join(str(i) for i in bb[0]),
                                               'ne': ','.join(str(i) for i in bb[1]),
                                               'categoryId': ','.join(i for i in subcategories.values()),
                                               'limit': self.limit})
         for venue in result.get('venues', None):
             venue['recommendation_category'] = category
     except:
         result = {}
         log('get_venues request fails for category {}...'.format(category))
     return_dict[category] = result
Beispiel #8
0
def print_processing_time(start_time, message, id=None, logging=True):
    stop = time.time()
    if id is not None:
        message += ' (id = {})'.format(id)
        id += 1
    # Assumes max message length of 52 chars and 8 chars per tab
    space = (52-len(message))*' '
    print_message = "{}{} --- {:.2f} seconds ---".format(message, space,
                                                       stop - start_time)
    print(print_message)
    if logging:
        log(print_message)
    return stop, id
Beispiel #9
0
def get_nearest_edge(G, point, return_geom=False, return_dist=False):
    """
    Return the nearest edge to a point, by minimum euclidean distance.

    Parameters
    ----------
    G : networkx.MultiDiGraph
        input graph
    point : tuple
        the (lat, lng) or (y, x) point for which we will find the nearest edge
        in the graph
    return_geom : bool
        Optionally return the geometry of the nearest edge
    return_dist : bool
        Optionally return the distance in graph's coordinates' units between
        the point and the nearest edge

    Returns
    -------
    tuple
        Graph edge unique identifier as a tuple of (u, v, key).
        Or a tuple of (u, v, key, geom) if return_geom is True.
        Or a tuple of (u, v, key, dist) if return_dist is True.
        Or a tuple of (u, v, key, geom, dist) if return_geom and return_dist are True.
    """
    # get u, v, key, geom from all the graph edges
    gdf_edges = utils_graph.graph_to_gdfs(G,
                                          nodes=False,
                                          fill_edge_geometry=True)

    edges = gdf_edges[["u", "v", "key", "geometry"]].values

    # convert lat/lng point to x/y for shapely distance operation
    xy_point = Point(reversed(point))

    # calculate euclidean distance from each edge's geometry to this point
    edge_distances = [(edge, xy_point.distance(edge[3])) for edge in edges]

    # the nearest edge minimizes the distance to the point
    (u, v, key, geom), dist = min(edge_distances, key=lambda x: x[1])
    utils.log(f"Found nearest edge ({u, v, key}) to point {point}")

    # return results requested by caller
    if return_dist and return_geom:
        return u, v, key, geom, dist
    elif return_dist:
        return u, v, key, dist
    elif return_geom:
        return u, v, key, geom
    else:
        return u, v, key
Beispiel #10
0
    def create_footprint_gdf(self, osm_elements):
        """
        Assemble OSM footprint data into a GeoDataFrame.

        Parameters
        ----------
        osm_elements : JSON
            OSM footprint data for specific category

        Returns
        -------
        GeoDataFrame
        """

        vertices = {}
        for result in osm_elements:
            if 'type' in result and result['type'] == 'node':
                vertices[result['id']] = {
                    'lat': result['lat'],
                    'lon': result['lon']
                }

        footprints = {}
        for result in osm_elements:
            if 'type' in result and result['type'] == 'way':
                nodes = result['nodes']
                try:

                    polygon = Polygon([(vertices[node]['lon'],
                                        vertices[node]['lat'])
                                       for node in nodes])
                    footprint = {'nodes': nodes, 'geometry': polygon}

                    if 'tags' in result:
                        for tag in result['tags']:
                            footprint[tag] = result['tags'][tag]

                    footprints[result['id']] = footprint
                except Exception:
                    log('Polygon of has invalid geometry: {}'.format(nodes))

        if footprints != {}:
            gdf = gpd.GeoDataFrame(footprints).T

            # drop all invalid geometries
            gdf = gdf[gdf['geometry'].is_valid]

            return gdf
        else:
            return None
Beispiel #11
0
    def wrapper(*args, **kwargs):
        int_time = time.time()

        ret = func(*args, **kwargs)

        # Assumes max message length of 52 chars and 8 chars per tab
        message = '.'.join([func.__module__,func.__name__])
        space = (52-len(message))*' '
        print_message = "{}{} --- {:.2f} seconds ---".format(message, space,
                                                           time.time() - int_time)
        #print(print_message)
        log(print_message)

        return ret
Beispiel #12
0
        def single_thread_get_details(venue):
            data_dict = fs_data_dict()
            venue_id = venue.get('id')
            try:
                result = self.fs_client.venues(venue_id).get('venue', None)
            except:
                result = {}
                log('get_details request fails for venue_id {}...'.format(venue_id))

            venue['details'] = result
            for key, value in data_dict.items():
                insert_value = get_nested(venue, value['location'])
                data_dict[key]['values'] = insert_value
            venue = None
            return_list.append(data_dict)
Beispiel #13
0
def get_map(city, state):
    file_name = "%s%s.pkl" % (city, state)
    projected_file_name = "%s%s_projected.pkl" % (city, state)

    projected_file_path = Path(projected_file_name)
    file_path = Path(file_name)

    if projected_file_path.is_file() and file_path.is_file():
        return pkl.load(open(file_name,
                             "rb")), pkl.load(open(projected_file_name, "rb"))
    else:
        query = {'city': city, 'state': state, 'country': 'USA'}
        graph = ox.graph_from_place(query, network_type='drive')
        graph = add_node_elevations_open(graph)
        graph = ox.add_edge_grades(graph)
        log(graph.nodes[5637885552])
        pkl.dump(graph, open(file_name, "wb"))
        graph_proj = ox.project_graph(graph)
        pkl.dump(graph_proj, open(projected_file_name, "wb"))
        return graph, graph_proj
Beispiel #14
0
    def single_process_overpass_request(poly_bbox, query_key, query_dict):
        # represent bbox as south,west,north,east and round lat-longs to 8
        # decimal places (ie, within 1 mm) so URL strings aren't different
        # due to float rounding issues (for consistent caching)
        west, south, east, north = poly_bbox
        query_str = get_osm_query(north=north,
                                  east=east,
                                  south=south,
                                  west=west,
                                  key=query_dict['key'],
                                  value=query_dict['value'],
                                  way=query_dict['way'],
                                  relation=query_dict['relation'],
                                  node=query_dict['node'],
                                  filter_type=query_dict.get('filter_type'))

        try:
            response_json = overpass_request(data={'data': query_str},
                                             timeout=timeout)
            result = {query_key: response_json}
        except:
            log('No OSM data found for {}'.format(query_key))
            result = {query_key: {}}
        result_list.append(result)
Beispiel #15
0
def get_Y_X_features_population_data(cities_selection=None, cities_skip=None):
    """
	Returns the Y and X arrays for training/testing population downscaling estimates.
	It gathers either a selection of cities or all stored cities but a selected list to skip

	Y contains vectors with the correspondent population densities
	X contains vectors with normalised urban features
	X_columns columns referring to X values
	Numpy arrays are previously stored

	Parameters
	----------
	cities_selection : string
		list of cities to select
	cities_skip : string
		list of cities to skip (retrieve the rest)

	Returns
	----------
	np.array, np.array, np.array
		Y vector, X vector, X column names vector
	"""
    arr_X, arr_Y = [], []

    # Get the complete training-testig dataset
    for Y_X_data_city in os.listdir("data/training"):
        # Only if it contains a valid extension
        if ('.npz' not in Y_X_data_city): continue

        # Get city's name
        city_ref = Y_X_data_city.replace('_X_Y.npz', '')

        # Only retrieve data from cities_selection (if ever given)
        if ((cities_selection is not None)
                and (city_ref not in cities_selection)):
            log('Skipping city:', city_ref)
            continue

        # Skip cities data from from cities_skip (if ever given)
        if ((cities_skip is not None) and (city_ref in cities_skip)):
            log('Skipping city:', city_ref)
            continue

        log('Retrieving data:', city_ref)

        # Get stored data
        city_Y, city_X, city_X_cols = get_training_testing_data(city_ref)
        # Append values
        arr_Y.append(city_Y)
        arr_X.append(city_X)

    # Assumption: All generated testing-training data contain the same X columns
    return np.concatenate(arr_Y), np.concatenate(arr_X), city_X_cols
def get_paths_to_simplify(G, strict=True):
    """
    Create a list of all the paths to be simplified between endpoint nodes.
    The path is ordered from the first endpoint, through the interstitial nodes, to the second endpoint.

    Parameters
    ----------
    G : graph
    strict : bool, if False, allow nodes to be end points even if they fail all other rules but have edges with different OSM IDs

    Returns
    -------
    paths_to_simplify : list
    """

    # first identify all the nodes that are endpoints
    start_time = time.time()
    endpoints = set(
        [node for node in G.nodes() if is_endpoint(G, node, strict=strict)])
    log('Identified {:,} edge endpoints in {:,.2f} seconds'.format(
        len(endpoints),
        time.time() - start_time))

    start_time = time.time()
    paths_to_simplify = []

    # for each endpoint node, look at each of its successor nodes
    for node in endpoints:
        for successor in G.successors(node):
            if not successor in endpoints:
                # if the successor is not an endpoint, build a path from the endpoint node to the next endpoint node
                try:
                    path = build_path(G,
                                      successor,
                                      endpoints,
                                      path=[node, successor])
                    paths_to_simplify.append(path)
                except RuntimeError:
                    log('Recursion error: exceeded max depth, moving on to next endpoint successor',
                        level=lg.WARNING)
                    # recursion errors occur if some connected component is a self-contained ring in which all nodes are not end points
                    # handle it by just ignoring that component and letting its topology remain intact (this should be a rare occurrence)
                    # RuntimeError is what Python <3.5 will throw, Py3.5+ throws RecursionError but it is a subtype of RuntimeError so it still gets handled

    log('Constructed all paths to simplify in {:,.2f} seconds'.format(
        time.time() - start_time))
    return paths_to_simplify
Beispiel #17
0
def create_graphGeoJson(geoJson, name='unnamed', retain_all=True, 
                        network_type='all_private', valid_road_types=set([]),
                        roadTypeField='type',
                        verbose=True, osmidx=0, osmNodeidx=0):
    """
    Create a networkx graph from OSM data.

    Parameters
    ----------
    geoJson : geoJsonFile Name
        will support any file format supported by OGR
    name : string
        the name of the graph
    retain_all : bool
        if True, return the entire graph even if it is not connected
    network_type : string
        what type of network to create

    Returns
    -------
    networkx multidigraph
    """

    log('Creating networkx graph from downloaded OSM data...')
    start_time = time.time()

    # make sure we got data back from the server requests


    # create the graph as a MultiDiGraph and set the original CRS to EPSG 4326
    G = nx.MultiDiGraph(name=name, crs={'init':'epsg:4326'})

    # extract nodes and paths from the downloaded osm data
    nodes = {}
    paths = {}

    nodes_temp, paths_temp = parse_OGR_nodes_paths(geoJson, 
                                                   valid_road_types=valid_road_types,
                                                   verbose=verbose,
                                                   osmidx=osmidx, osmNodeidx=osmNodeidx,
                                                   roadTypeField=roadTypeField)

    if len(nodes_temp)==0:
        return G
    if verbose:
        print("nodes_temp:", nodes_temp)
        print("paths_temp:", paths_temp)
        
    for key, value in list(nodes_temp.items()):
        nodes[key] = value
        if verbose:
            print("node key:", key)
            print("  node value:", value)
    for key, value in list(paths_temp.items()):
        paths[key] = value
        if verbose:
            print("path key:", key)
            print("  path value:", value)
    # add each osm node to the graph
    for node, data in list(nodes.items()):
        G.add_node(node, **data)

    # add each osm way (aka, path) to the graph
    if verbose:
        print("paths:", paths)
    G = core.add_paths(G, paths, network_type)

    # retain only the largest connected component, if caller did not set retain_all=True
    if not retain_all:
        G = core.get_largest_component(G)

    log('Created graph with {:,} nodes and {:,} edges in {:,.2f} seconds'.format(len(list(G.nodes())), len(list(G.edges())), time.time()-start_time))

    # add length (great circle distance between nodes) attribute to each edge to use as weight
    G = core.add_edge_lengths(G)

    return G
Beispiel #18
0
def compute_grid_dispersion(df_indices,
                            df_osm_built,
                            kwargs={
                                "radius_search": 750,
                                "use_median": True,
                                "K_nearest": 50
                            }):
    """ 
	Creates grid and calculates dispersion indices.

	Parameters
	----------
	df_indices : geopandas.GeoDataFrame
		data frame containing the (x,y) reference points to calculate indices
	df_osm_built : geopandas.GeoDataFrame
		data frame containing the building's geometries
	kw_args: dict
		additional keyword arguments for the indices calculation
			radius_search: int
				circle radius to consider the dispersion calculation at a local point
			use_median : bool
				denotes whether the median or mean should be used to calculate the indices
			K_nearest : int
				number of neighboring buildings to consider in evaluation

	Returns
	----------
	geopandas.GeoDataFrame
		data frame with the added column for dispersion indices
	"""
    log("Dispersion calculation")
    start = time.time()

    # Get radius search: circle radius to consider the dispersion calculation at a local point
    radius_search = kwargs["radius_search"]
    # Use the median or mean computation ?
    use_median = kwargs["use_median"]

    # Assign dispersion calculation method
    if (kwargs["use_median"]):
        _calculate_dispersion = closest_building_distance_median
    else:
        _calculate_dispersion = closest_building_distance_average

    # Calculate the closest distance for each building within K_nearest centroid buildings
    _apply_polygon_closest_distance_neighbor(df_osm_built,
                                             K_nearest=kwargs["K_nearest"])

    # For dispersion calculation approximation, create KDTree with buildings centroid
    coords_data = [
        point.coords[0]
        for point in df_osm_built.loc[df_osm_built.closest_d.notnull()].
        geometry.apply(lambda x: x.centroid)
    ]
    # Create KDTree
    tree = spatial.KDTree(coords_data)

    # Compute dispersion indices
    index_column = "dispersion"
    df_indices[index_column] = df_indices.geometry.apply(
        lambda x: _calculate_dispersion(x, tree, df_osm_built.closest_d,
                                        radius_search))

    # Remove added column
    df_osm_built.drop('closest_d', axis=1, inplace=True)

    end = time.time()
    log("Dispersion calculation time: " + str(end - start))
def simplify_graph(G_, strict=True):
    """
    Simplify a graph's topology by removing all nodes that are not intersections or dead-ends.
    Create an edge directly between the end points that encapsulate them,
    but retain the geometry of the original edges, saved as attribute in new edge

    Parameters
    ----------
    G_ : graph
    strict : bool, if False, allow nodes to be end points even if they fail all other rules but have edges with different OSM IDs

    Returns
    -------
    G : graph
    """

    if is_simplified(G_):
        raise Exception(
            'This graph has already been simplified, cannot simplify it again.'
        )

    G = G_.copy()
    initial_node_count = len(list(G.nodes()))
    initial_edge_count = len(list(G.edges()))
    all_nodes_to_remove = []
    all_edges_to_add = []

    # construct a list of all the paths that need to be simplified
    paths = get_paths_to_simplify(G, strict=strict)

    start_time = time.time()
    for path in paths:

        # add the interstitial edges we're removing to a list so we can retain their spatial geometry
        edge_attributes = {}
        for u, v in zip(path[:-1], path[1:]):

            # there shouldn't be multiple edges between interstitial nodes
            edges = G.edge[u][v]
            if not len(edges) == 1:
                log('Multiple edges between "{}" and "{}" found when simplifying'
                    .format(u, v),
                    level=lg.WARNING)

            # the only element in this list as long as above assertion is True (MultiGraphs use keys (the 0 here), indexed with ints from 0 and up)
            edge = edges[0]
            for key in edge:
                if key in edge_attributes:
                    # if this key already exists in the dict, append it to the value list
                    edge_attributes[key].append(edge[key])
                else:
                    # if this key doesn't already exist, set the value to a list containing the one value
                    edge_attributes[key] = [edge[key]]

        for key in edge_attributes:
            # don't touch the length attribute, we'll sum it at the end
            if len(set(edge_attributes[key])) == 1 and not key == 'length':
                # if there's only 1 unique value in this attribute list, consolidate it to the single value (the zero-th)
                edge_attributes[key] = edge_attributes[key][0]
            elif not key == 'length':
                # otherwise, if there are multiple values, keep one of each value
                edge_attributes[key] = list(set(edge_attributes[key]))

        # construct the geometry and sum the lengths of the segments
        edge_attributes['geometry'] = LineString(
            [Point((G.node[node]['x'], G.node[node]['y'])) for node in path])
        edge_attributes['length'] = sum(edge_attributes['length'])

        # add the nodes and edges to their lists for processing at the end
        all_nodes_to_remove.extend(path[1:-1])
        all_edges_to_add.append({
            'origin': path[0],
            'destination': path[-1],
            'attr_dict': edge_attributes
        })

    # for each edge to add in the list we assembled, create a new edge between the origin and destination
    for edge in all_edges_to_add:
        G.add_edge(edge['origin'], edge['destination'], **edge['attr_dict'])

    # finally remove all the interstitial nodes between the new edges
    G.remove_nodes_from(set(all_nodes_to_remove))

    msg = 'Simplified graph (from {:,} to {:,} nodes and from {:,} to {:,} edges) in {:,.2f} seconds'
    log(
        msg.format(initial_node_count, len(list(G.nodes())),
                   initial_edge_count, len(list(G.edges())),
                   time.time() - start_time))
    return G
Beispiel #20
0
        def overpass_request(data,
                             pause_duration=None,
                             timeout=180,
                             error_pause_duration=None):
            """
            Send a request to the Overpass API via HTTP POST and return the JSON
            response.
            Parameters
            ----------
            data : dict or OrderedDict
                key-value pairs of parameters to post to the API
            pause_duration : int
                how long to pause in seconds before requests, if None, will query API
                status endpoint to find when next slot is available
            timeout : int
                the timeout interval for the requests library
            error_pause_duration : int
                how long to pause in seconds before re-trying requests if error
            Returns
            -------
            dict
            """

            # define the Overpass API URL, then construct a GET-style URL as a string to
            # hash to look up/save to cache
            url = settings.overpass_endpoint.rstrip('/') + '/interpreter'
            prepared_url = requests.Request('GET', url,
                                            params=data).prepare().url
            cached_response_json = get_from_cache(prepared_url)

            if cached_response_json is not None:
                # found this request in the cache, just return it instead of making a
                # new HTTP call
                return cached_response_json

            else:
                # if this URL is not already in the cache, pause, then request it
                if pause_duration is None:
                    this_pause_duration = get_pause_duration()
                log('Pausing {:,.2f} seconds before making API POST request'.
                    format(this_pause_duration))
                time.sleep(this_pause_duration)
                start_time = time.time()
                log('Posting to {} with timeout={}, "{}"'.format(
                    url, timeout, data))
                response = requests.post(url,
                                         data=data,
                                         timeout=timeout,
                                         headers=get_http_headers())

                # get the response size and the domain, log result
                size_kb = len(response.content) / 1000.
                domain = re.findall(r'(?s)//(.*?)/', url)[0]
                log('Downloaded {:,.1f}KB from {} in {:,.2f} seconds'.format(
                    size_kb, domain,
                    time.time() - start_time))

                try:
                    response_json = response.json()
                    if 'remark' in response_json:
                        log('Server remark: "{}"'.format(
                            response_json['remark'], level=lg.WARNING))
                    save_to_cache(prepared_url, response_json)
                except Exception:
                    # 429 is 'too many requests' and 504 is 'gateway timeout' from server
                    # overload - handle these errors by recursively calling
                    # overpass_request until we get a valid response
                    if response.status_code in [429, 504]:
                        # pause for error_pause_duration seconds before re-trying request
                        if error_pause_duration is None:
                            error_pause_duration = get_pause_duration()
                        log('Server at {} returned status code {} and no JSON data. Re-trying request in {:.2f} seconds.'
                            .format(domain, response.status_code,
                                    error_pause_duration),
                            level=lg.WARNING)
                        time.sleep(error_pause_duration)
                        response_json = overpass_request(
                            data=data,
                            pause_duration=pause_duration,
                            timeout=timeout)

                    # else, this was an unhandled status_code, throw an exception
                    else:
                        log('Server at {} returned status code {} and no JSON data'
                            .format(domain, response.status_code),
                            level=lg.ERROR)
                        raise Exception(
                            'Server returned no JSON data.\n{} {}\n{}'.format(
                                response, response.reason, response.text))

                return response_json
Beispiel #21
0
def run_sim_and_graph(G,
                      bbox=None,
                      fig_height=6,
                      fig_width=None,
                      margin=0.02,
                      axis_off=True,
                      equal_aspect=False,
                      bgcolor='w',
                      show=True,
                      save=False,
                      close=True,
                      file_format='png',
                      filename='temp',
                      dpi=600,
                      annotate=False,
                      node_color='#66ccff',
                      node_size=15,
                      node_alpha=1,
                      node_edgecolor='none',
                      node_zorder=1,
                      edge_color='#999999',
                      edge_linewidth=1,
                      edge_alpha=1,
                      prior_iters=0,
                      use_geom=True,
                      num_iters=20,
                      draw_freq=50,
                      show_congestion=False):
    # This is mostly lifted directedly from osmnx. Using it to enable real time graphing as more
    # samples are taken
    """
    Plot a networkx spatial graph.
    Parameters, 
    ----------
    G : networkx multidigraph
    bbox : tuple
        bounding box as north,south,east,west - if None will calculate from
        spatial extents of data. if passing a bbox, you probably also want to
        pass margin=0 to constrain it.
    fig_height : int
        matplotlib figure height in inches
    fig_width : int
        matplotlib figure width in inches
    margin : float
        relative margin around the figure
    axis_off : bool
        if True turn off the matplotlib axis
    equal_aspect : bool
        if True set the axis aspect ratio equal
    bgcolor : string
        the background color of the figure and axis
    show : bool
        if True, show the figure
    save : bool
        if True, save the figure as an image file to disk
    close : bool
        close the figure (only if show equals False) to prevent display
    file_format : string
        the format of the file to save (e.g., 'jpg', 'png', 'svg')
    filename : string
        the name of the file if saving
    dpi : int
        the resolution of the image file if saving
    annotate : bool
        if True, annotate the nodes in the figure
    node_color : string
        the color of the nodes
    node_size : int
        the size of the nodes
    node_alpha : float
        the opacity of the nodes
    node_edgecolor : string
        the color of the node's marker's border
    node_zorder : int
        zorder to plot nodes, edges are always 2, so make node_zorder 1 to plot
        nodes beneath them or 3 to plot nodes atop them
    edge_color : string
        the color of the edges' lines
    edge_linewidth : float
        the width of the edges' lines
    edge_alpha : float
        the opacity of the edges' lines
    use_geom : bool
        if True, use the spatial geometry attribute of the edges to draw
        geographically accurate edges, rather than just lines straight from node
        to node
    Returns
    -------
    fig, ax : tuple
    """

    log('Begin plotting the graph...')
    node_Xs = [float(x) for _, x in G.nodes(data='x')]
    node_Ys = [float(y) for _, y in G.nodes(data='y')]
    # get north, south, east, west values either from bbox parameter or from the
    # spatial extent of the edges' geometries
    if bbox is None:
        edges = ox.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True)
        west, south, east, north = edges.total_bounds
    else:
        north, south, east, west = bbox

    # if caller did not pass in a fig_width, calculate it proportionately from
    # the fig_height and bounding box aspect ratio
    bbox_aspect_ratio = (north - south) / (east - west)
    if fig_width is None:
        fig_width = fig_height / bbox_aspect_ratio

    # create the figure and axis
    fig, ax = plt.subplots(figsize=(fig_width, fig_height), facecolor=bgcolor)
    ax.set_facecolor(bgcolor)

    # draw the edges as lines from node to node
    start_time = time.time()
    lines = []
    for u, v, data in G.edges(keys=False, data=True):
        if 'geometry' in data and use_geom:
            # if it has a geometry attribute (a list of line segments), add them
            # to the list of lines to plot
            xs, ys = data['geometry'].xy
            lines.append(list(zip(xs, ys)))
        else:
            # if it doesn't have a geometry attribute, the edge is a straight
            # line from node to node
            x1 = G.nodes[u]['x']
            y1 = G.nodes[u]['y']
            x2 = G.nodes[v]['x']
            y2 = G.nodes[v]['y']
            line = [(x1, y1), (x2, y2)]
            lines.append(line)

    # add the lines to the axis as a linecollection

    log('Drew the graph edges in {:,.2f} seconds'.format(time.time() -
                                                         start_time))
    # scatter plot the nodes
    #ax.scatter(node_Xs, node_Ys, s=node_size, c=node_color, alpha=node_alpha, edgecolor=node_edgecolor, zorder=node_zorder)

    # set the extent of the figure

    margin_ns = (north - south) * margin
    margin_ew = (east - west) * margin
    ax.set_ylim((south - margin_ns, north + margin_ns))
    ax.set_xlim((west - margin_ew, east + margin_ew))

    # configure axis appearance
    xaxis = ax.get_xaxis()
    yaxis = ax.get_yaxis()

    xaxis.get_major_formatter().set_useOffset(False)
    yaxis.get_major_formatter().set_useOffset(False)

    #eColors = get_edge_colors_by_attribute(G, 'traversals', num_bins=250)
    #lc = LineCollection(lines, colors=eColors, linewidths=edge_linewidth, alpha=edge_alpha, zorder=2)
    #ax.add_collection(lc)

    #plt.pause(2)
    #plt.ion()

    for i in range(1, num_iters + 1):
        simulate_random(G)
    # if (i % draw_freq == 0 or i == num_iters):
    plt.cla()
    ax.set_facecolor = bgcolor
    ax.set_ylim((south - margin_ns, north + margin_ns))
    ax.set_xlim((west - margin_ew, east + margin_ew))
    xaxis.get_major_formatter().set_useOffset(False)
    yaxis.get_major_formatter().set_useOffset(False)
    ax.set_title(num_iters + prior_iters)
    ax.set_aspect('equal')
    ax.axis('off')
    if (show_congestion):
        eval_congestion(G)
        eColors = get_edge_colors_by_attribute(G, 'congestion', num_bins=250)
    else:
        eColors = get_edge_colors_by_attribute(G, 'traversals', num_bins=250)
    lc = LineCollection(lines,
                        colors=eColors,
                        linewidths=edge_linewidth,
                        alpha=edge_alpha,
                        zorder=2)
    ax.add_collection(lc)
    #plt.pause(0.000001)

    #plt.ioff()

    #ax.add_collection(lc)

    # if axis_off, turn off the axis display set the margins to zero and point
    # the ticks in so there's no space around the plot
    if axis_off:
        ax.axis('off')
        ax.margins(0)
        ax.tick_params(which='both', direction='in')
        xaxis.set_visible(False)
        yaxis.set_visible(False)
        #fig.canvas.draw()

    if equal_aspect:
        # make everything square
        ax.set_aspect('equal')
        #fig.canvas.draw()
    else:
        # if the graph is not projected, conform the aspect ratio to not stretch the plot
        if G.graph['crs'] == settings.default_crs:
            coslat = np.cos((min(node_Ys) + max(node_Ys)) / 2. / 180. * np.pi)
            ax.set_aspect(1. / coslat)
            #fig.canvas.draw()

    # annotate the axis with node IDs if annotate=True
    if annotate:
        for node, data in G.nodes(data=True):
            ax.annotate(node, xy=(data['x'], data['y']))

    # save and show the figure as specified
    if save == True:
        #fig.canvas.draw()
        fig, ax = save_and_show(fig, ax, save, show, close, filename,
                                file_format, dpi, axis_off)
    ##fig.canvas.draw()
    #fig.canvas.flush_events()
    return G, fig, ax
Beispiel #22
0
def osm_net_download(bbox,
                     query_dict_collection,
                     timeout=180,
                     memory=None,
                     max_query_area_size=50 * 1000 * 50 * 1000):

    if memory is None:
        maxsize = ''
    else:
        maxsize = '[maxsize:{}]'.format(memory)

    # turn bbox into a polygon and project to local UTM
    (south, west), (north, east) = bbox
    polygon = Polygon([(west, south), (east, south), (east, north),
                       (west, north)])
    geom_proj, crs_proj = project_geometry(polygon)

    # subdivide it if it exceeds the max area size (in meters), then project
    # back to lat-long
    geom_proj_consol_sub = consolidate_subdivide_geometry(
        geom_proj, max_query_area_size=max_query_area_size)
    geometry, _ = project_geometry(geom_proj_consol_sub,
                                   crs=crs_proj,
                                   to_latlong=True)
    log(('Requesting footprint data within bounding '
         'box from API in {:,} request(s)').format(len(geometry)))
    start_time = time()

    # Define process function
    def single_process_overpass_request(poly_bbox, query_key, query_dict):
        # represent bbox as south,west,north,east and round lat-longs to 8
        # decimal places (ie, within 1 mm) so URL strings aren't different
        # due to float rounding issues (for consistent caching)
        west, south, east, north = poly_bbox
        query_str = get_osm_query(north=north,
                                  east=east,
                                  south=south,
                                  west=west,
                                  key=query_dict['key'],
                                  value=query_dict['value'],
                                  way=query_dict['way'],
                                  relation=query_dict['relation'],
                                  node=query_dict['node'],
                                  filter_type=query_dict.get('filter_type'))

        try:
            response_json = overpass_request(data={'data': query_str},
                                             timeout=timeout)
            result = {query_key: response_json}
        except:
            log('No OSM data found for {}'.format(query_key))
            result = {query_key: {}}
        result_list.append(result)

    # Setup a list of threads that we want to run
    jobs = []
    manager = mp.Manager()
    result_list = manager.list()
    for poly in geometry:
        for query_key, query_dict in query_dict_collection.items():
            thread = threading.Thread(target=single_process_overpass_request,
                                      args=(poly.bounds, query_key,
                                            query_dict))
            jobs.append(thread)
            thread.start()

    for j in jobs:
        j.join()

    # # Get process results from the output queue
    response_jsons = {
        key: value
        for response in result_list for key, value in response.items()
    }

    msg = ('Got all OSM data within bounding box from '
           'API in {:,} request(s) and {:,.2f} seconds')

    log(msg.format(len(geometry), time() - start_time))

    return response_jsons
def parse_osm_relations(relations, osm_way_df):
    """
    Parses the osm relations (multipolygons) from osm
    ways and nodes. See more information about relations
    from OSM documentation: http://wiki.openstreetmap.org/wiki/Relation
    Parameters
    ----------
    relations : list
        OSM 'relation' items (dictionaries) in a list.
    osm_way_df : gpd.GeoDataFrame
        OSM 'way' features as a GeoDataFrame that contains all the
        'way' features that will constitute the multipolygon relations.
    Returns
    -------
    gpd.GeoDataFrame
        A GeoDataFrame with MultiPolygon representations of the
        relations and the attributes associated with them.
    """

    gdf_relations = gpd.GeoDataFrame()

    # Iterate over relations and extract the items
    for relation in relations:
        if relation['tags']['type'] == 'multipolygon':
            try:
                # Parse member 'way' ids
                member_way_ids = [
                    member['ref'] for member in relation['members']
                    if member['type'] == 'way'
                ]
                # Extract the ways
                member_ways = osm_way_df.reindex(member_way_ids)
                # Extract the nodes of those ways
                member_nodes = list(member_ways['nodes'].values)
                try:
                    # Create MultiPolygon from geometries (exclude NaNs)
                    multipoly = MultiPolygon(list(member_ways['geometry']))
                except Exception:
                    multipoly = invalid_multipoly_handler(
                        gdf=member_ways,
                        relation=relation,
                        way_ids=member_way_ids)

                if multipoly:
                    # Create GeoDataFrame with the tags and the MultiPolygon and its 'ways' (ids), and the 'nodes' of those ways
                    geo = gpd.GeoDataFrame(relation['tags'],
                                           index=[relation['id']])
                    # Initialize columns (needed for .loc inserts)
                    geo = geo.assign(geometry=None,
                                     ways=None,
                                     nodes=None,
                                     element_type=None,
                                     osmid=None)
                    # Add attributes
                    geo.loc[relation['id'], 'geometry'] = multipoly
                    geo.loc[relation['id'], 'ways'] = member_way_ids
                    geo.loc[relation['id'], 'nodes'] = member_nodes
                    geo.loc[relation['id'], 'element_type'] = 'relation'
                    geo.loc[relation['id'], 'osmid'] = relation['id']

                    # Append to relation GeoDataFrame
                    gdf_relations = gdf_relations.append(geo, sort=False)
                    # Remove such 'ways' from 'osm_way_df' that are part of the 'relation'
                    osm_way_df = osm_way_df.drop(member_way_ids)
            except Exception:
                log("Could not handle OSM 'relation': {}".format(
                    relation['id']))

    # Merge 'osm_way_df' and the 'gdf_relations'
    osm_way_df = osm_way_df.append(gdf_relations, sort=False)
    return osm_way_df
Beispiel #24
0
    def multithreading_get_details(self, venues):
        '''
        Multi-processes the get_details Foursquare API requests

        Parameters
        ----------
        client: Foursquare client
            Foursquare api client to do requests
        venues: list
            List of venues retrieved from Foursquare API get_venues request

        Returns
        -------
        DataFrame with venue details used for recommendations
        '''

        log('Multiprocessing get_details search with Foursquare API...')
        start_time = time.time()

        # Define process function
        def single_thread_get_details(venue):
            data_dict = fs_data_dict()
            venue_id = venue.get('id')
            try:
                result = self.fs_client.venues(venue_id).get('venue', None)
            except:
                result = {}
                log('get_details request fails for venue_id {}...'.format(venue_id))

            venue['details'] = result
            for key, value in data_dict.items():
                insert_value = get_nested(venue, value['location'])
                data_dict[key]['values'] = insert_value
            venue = None
            return_list.append(data_dict)

        jobs = []
        manager = mp.Manager()
        return_list = manager.list()
        for venue in venues:
            thread = threading.Thread(name=venue.get('id'),
                                      target=single_thread_get_details,
                                      args=(venue, ))
            jobs.append(thread)
            thread.start()

        for j in jobs:
            j.join()

        # Generate data dict to convert to dataframe
        data_dict = fs_data_dict()
        for venue in return_list:
            for key, value in data_dict.items():
                insert_value = get_nested(venue, value['location'])
                data_dict[key]['values'].append(venue[key]['values'])
        venue_df = pd.DataFrame({key: value['values']
                                 for key, value in data_dict.items()})
        venue_df['chain'] = venue_df['chain'].map(lambda x: 1 if len(x) > 0 else 0)
        log('Downloaded details for {:,} venues in {:,.2f} seconds'.format(venue_df.shape[0],
                                                                time.time()-start_time))

        return venue_df
Beispiel #25
0
def add_node_elevations_open(G, max_locations_per_batch=180,
                             pause_duration=0.02):  # pragma: no cover

    url_template = 'https://api.open-elevation.com/api/v1/lookup?locations={}'

    node_points = pd.Series({node: '{:.5f},{:.5f}'.format(data['y'], data['x']) for node, data in G.nodes(data=True)})
    log('Requesting node elevations from the API in {} calls.'.format(
        math.ceil(len(node_points) / max_locations_per_batch)))

    results = []
    for i in range(0, len(node_points), max_locations_per_batch):
        chunk = node_points.iloc[i: i + max_locations_per_batch]
        locations = '|'.join(chunk)
        url = url_template.format(locations)
        log(len(url))
        # check if this request is already in the cache (if global use_cache=True)
        cached_response_json = get_from_cache(url)
        if cached_response_json is not None:
            response_json = cached_response_json
        else:
            try:
                # request the elevations from the API
                log('Requesting node elevations: {}'.format(url))
                time.sleep(pause_duration)
                response = requests.get(url)
                response_json = response.json()
                save_to_cache(url, response_json)
            except Exception as e:
                log(e)
                log('Server responded with {}: {}'.format(response.status_code, response.reason))

        # append these elevation results to the list of all results
        results.extend(response_json['results'])

    # sanity check that all our vectors have the same number of elements
    if not (len(results) == len(G.nodes()) == len(node_points)):
        raise Exception('Graph has {} nodes but we received {} results from the elevation API.'.format(len(G.nodes()),
                                                                                                       len(results)))
    else:
        log('Graph has {} nodes and we received {} results from the elevation API.'.format(len(G.nodes()),
                                                                                           len(results)))

    # add elevation as an attribute to the nodes
    df = pd.DataFrame(node_points, columns=['node_points'])
    df['elevation'] = [result['elevation'] for result in results]
    log(df['elevation'])
    df['elevation'] = df['elevation'].round(3)  # round to millimeter
    nx.set_node_attributes(G, name='elevation', values=df['elevation'].to_dict())
    log('Added elevation data to all nodes.')

    return G
Beispiel #26
0
def compute_grid_landusemix(df_indices, df_osm_built, df_osm_pois, kw_args={'walkable_distance':600,'compute_activity_types_kde':True,'weighted_kde':True,'pois_weight':9,'log_weighted':True} ):
	""" 
	Calculate land use mix indices on input grid

	Parameters
	----------
	XX_YY : pandas.Panel
		meshgrid with (x,y) reference points to calculate indices
	kde_activities : pandas.DataFrame
		Activity land use densities
	kde_residential : pandas.DataFrame
		Residential land use densities
	kw_args: dict
		additional keyword arguments for the indices calculation
			walkable_distance : int
				the bandwidth assumption for Kernel Density Estimation calculations (meters)
			compute_activity_types_kde : bool
				determines if the densities for each activity type should be computed
			weighted_kde : bool
				use Weighted Kernel Density Estimation or classic version
			pois_weight : int
				Points of interest weight equivalence with buildings (squared meter)
			log_weighted : bool
				apply natural logarithmic function to surface weights

	Returns
	----------
	pandas.DataFrame
		land use mix indices
	"""
	log("Land use mix calculation")
	start = time.time()

	# Get the bandwidth, related to 'walkable distances'
	bandwidth = kw_args["walkable_distance"]
	# Compute a weighted KDE?
	weighted_kde = kw_args["weighted_kde"]
	X_weights = None

	# Get full list of contained POIs
	contained_pois = list(set([element for list_ in df_osm_built.containing_poi[ df_osm_built.containing_poi.notnull() ] for element in list_]))
	# Get the POIs not contained by any building
	df_osm_pois_not_contained = df_osm_pois[ ~ df_osm_pois.index.isin( contained_pois) ]

	############
	### Calculate land use density estimations
	############

	####
	# Residential
	####
	df_osm_built_indexed = df_osm_built[ df_osm_built.classification.isin(["residential","mixed"]) ]
	if (weighted_kde): X_weights = df_osm_built_indexed.landuses_m2.apply(lambda x: x["residential"] )

	df_indices["residential_pdf"] = calculate_kde(df_indices.geometry, df_osm_built_indexed, None, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"] )
	log("Residential density estimation done")

	####
	# Activities
	####
	df_osm_built_indexed = df_osm_built[ df_osm_built.classification.isin(["activity","mixed"]) ]
	df_osm_pois_not_cont_indexed = df_osm_pois_not_contained[ df_osm_pois_not_contained.classification.isin(["activity","mixed"]) ]
	if (weighted_kde): X_weights = df_osm_built_indexed.landuses_m2.apply(lambda x: x["activity"] )
	
	df_indices["activity_pdf"] = calculate_kde(df_indices.geometry, df_osm_built_indexed, df_osm_pois_not_cont_indexed, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"] )
	log("Activity density estimation done")
	
	####
	# Compute activity types densities
	####
	if ( kw_args["compute_activity_types_kde"] ):
		assert('activity_category' in df_osm_built.columns)

		# Get unique category values
		unique_categories_built = [list(x) for x in set(tuple(x) for x in df_osm_built.activity_category.values if isinstance(x,list) ) ]
		unique_categories_pois = [list(x) for x in set(tuple(x) for x in df_osm_pois_not_cont_indexed.activity_category.values if isinstance(x,list) ) ]
		flat_list = [item for sublist in unique_categories_built + unique_categories_pois for item in sublist]
		categories = list( set(flat_list) )

		for cat in categories: # Get data frame selection of input category
			# Buildings and POIs within that category
			df_built_category = df_osm_built_indexed[ df_osm_built_indexed.activity_category.apply(lambda x: (isinstance(x,list)) and (cat in x) ) ]
			df_pois_category = df_osm_pois_not_cont_indexed[ df_osm_pois_not_cont_indexed.activity_category.apply(lambda x: (isinstance(x,list)) and (cat in x) ) ]
			if (weighted_kde): X_weights = df_built_category.landuses_m2.apply(lambda x: x[ cat ] )
			
			df_indices[ cat + "_pdf" ] = calculate_kde( df_indices.geometry, df_built_category, df_pois_category, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"] )
		
		log("Activity grouped by types density estimation done")
	

	# Compute land use mix indices
	index_column = "landusemix"
	df_indices[index_column] = df_indices.apply(lambda x: _land_use_mix(x.activity_pdf, x.residential_pdf), axis=1 )
	df_indices["landuse_intensity"] = df_indices.apply(lambda x: (x.activity_pdf + x.residential_pdf)/2., axis=1 )
	
	end = time.time()
	log("Land use mix calculation time: "+str(end-start))
def graph_to_gdfs_pix(G,
                      nodes=True,
                      edges=True,
                      node_geometry=True,
                      fill_edge_geometry=True):
    """
    Convert a graph into node and/or edge GeoDataFrames
    Parameters
    ----------
    G : networkx multidigraph
    nodes : bool
        if True, convert graph nodes to a GeoDataFrame and return it
    edges : bool
        if True, convert graph edges to a GeoDataFrame and return it
    node_geometry : bool
        if True, create a geometry column from node x and y data
    fill_edge_geometry : bool
        if True, fill in missing edge geometry fields using origin and
        destination nodes
    Returns
    -------
    GeoDataFrame or tuple
        gdf_nodes or gdf_edges or both as a tuple
    """

    if not (nodes or edges):
        raise ValueError('You must request nodes or edges, or both.')

    to_return = []

    if nodes:

        start_time = time.time()

        nodes = {node: data for node, data in G.nodes(data=True)}
        gdf_nodes = gpd.GeoDataFrame(nodes).T
        if node_geometry:
            # gdf_nodes['geometry'] = gdf_nodes.apply(lambda row: Point(row['x'], row['y']), axis=1)
            gdf_nodes['geometry_pix'] = gdf_nodes.apply(
                lambda row: Point(row['x_pix'], row['y_pix']), axis=1)

        gdf_nodes.crs = G.graph['crs']
        gdf_nodes.gdf_name = '{}_nodes'.format(G.graph['name'])
        gdf_nodes['osmid'] = gdf_nodes['osmid'].astype(np.int64).map(make_str)

        to_return.append(gdf_nodes)
        log('Created GeoDataFrame "{}" from graph in {:,.2f} seconds'.format(
            gdf_nodes.gdf_name,
            time.time() - start_time))

    if edges:

        start_time = time.time()

        # create a list to hold our edges, then loop through each edge in the
        # graph
        edges = []
        for u, v, key, data in G.edges(keys=True, data=True):

            # for each edge, add key and all attributes in data dict to the
            # edge_details
            edge_details = {'u': u, 'v': v, 'key': key}
            for attr_key in data:
                edge_details[attr_key] = data[attr_key]

            # if edge doesn't already have a geometry attribute, create one now
            # if fill_edge_geometry==True
            if 'geometry_pix' not in data:
                if fill_edge_geometry:
                    point_u = Point((G.nodes[u]['x_pix'], G.nodes[u]['y_pix']))
                    point_v = Point((G.nodes[v]['x_pix'], G.nodes[v]['y_pix']))
                    edge_details['geometry_pix'] = LineString(
                        [point_u, point_v])
                else:
                    edge_details['geometry_pix'] = np.nan

            edges.append(edge_details)

        # create a GeoDataFrame from the list of edges and set the CRS
        gdf_edges = gpd.GeoDataFrame(edges)
        gdf_edges.crs = G.graph['crs']
        gdf_edges.gdf_name = '{}_edges'.format(G.graph['name'])

        to_return.append(gdf_edges)
        log('Created GeoDataFrame "{}" from graph in {:,.2f} seconds'.format(
            gdf_edges.gdf_name,
            time.time() - start_time))

    if len(to_return) > 1:
        return tuple(to_return)
    else:
        return to_return[0]
def plot_graph_pix(G,
                   im=None,
                   bbox=None,
                   fig_height=6,
                   fig_width=None,
                   margin=0.02,
                   axis_off=True,
                   equal_aspect=False,
                   bgcolor='w',
                   show=True,
                   save=False,
                   close=True,
                   file_format='png',
                   filename='temp',
                   default_dpi=300,
                   annotate=False,
                   node_color='#66ccff',
                   node_size=15,
                   node_alpha=1,
                   node_edgecolor='none',
                   node_zorder=1,
                   edge_color='#999999',
                   edge_linewidth=1,
                   edge_alpha=1,
                   edge_width_key='speed_mph',
                   edge_width_mult=1. / 25,
                   use_geom=True):
    """
    Plot a networkx spatial graph.
    Parameters
    ----------
    G : networkx multidigraph
    bbox : tuple
        bounding box as north,south,east,west - if None will calculate from
        spatial extents of data. if passing a bbox, you probably also want to
        pass margin=0 to constrain it.
    fig_height : int
        matplotlib figure height in inches
    fig_width : int
        matplotlib figure width in inches
    margin : float
        relative margin around the figure
    axis_off : bool
        if True turn off the matplotlib axis
    equal_aspect : bool
        if True set the axis aspect ratio equal
    bgcolor : string
        the background color of the figure and axis
    show : bool
        if True, show the figure
    save : bool
        if True, save the figure as an image file to disk
    close : bool
        close the figure (only if show equals False) to prevent display
    file_format : string
        the format of the file to save (e.g., 'jpg', 'png', 'svg')
    filename : string
        the name of the file if saving
    default_dpi : int
        the resolution of the image file if saving (may get altered for
        large images)
    annotate : bool
        if True, annotate the nodes in the figure
    node_color : string
        the color of the nodes
    node_size : int
        the size of the nodes
    node_alpha : float
        the opacity of the nodes
    node_edgecolor : string
        the color of the node's marker's border
    node_zorder : int
        zorder to plot nodes, edges are always 2, so make node_zorder 1 to plot
        nodes beneath them or 3 to plot nodes atop them
    edge_color : string
        the color of the edges' lines
    edge_linewidth : float
        the width of the edges' lines
    edge_alpha : float
        the opacity of the edges' lines
    edge_width_key : str
        optional: key in edge propwerties to determine edge width,
        supercedes edge_linewidth, default to "speed_mph"
    edge_width_mult : float
        factor to rescale width for plotting, default to 1./25, which gives
        a line width of 1 for 25 mph speed limit.
    use_geom : bool
        if True, use the spatial geometry attribute of the edges to draw
        geographically accurate edges, rather than just lines straight from node
        to node
    Returns
    -------
    fig, ax : tuple
    """

    log('Begin plotting the graph...')
    node_Xs = [float(x) for _, x in G.nodes(data='x_pix')]
    node_Ys = [float(y) for _, y in G.nodes(data='y_pix')]
    # node_Xs = [float(x) for _, x in G.nodes(data='x')]
    # node_Ys = [float(y) for _, y in G.nodes(data='y')]

    # get north, south, east, west values either from bbox parameter or from the
    # spatial extent of the edges' geometries
    if bbox is None:
        edges = graph_to_gdfs_pix(G, nodes=False, fill_edge_geometry=True)
        # print ("plot_graph_pix():, edges:", edges)
        print("plot_graph_pix():, edges.columns:", edges.columns)
        # print ("plot_graph_pix(): edges['geometry_pix']:", edges['geometry_pix'])
        # print ("plot_graph_pix(): edges['geometry']:", edges['geometry'])
        print("type edges['geometry_pix'].:", type(edges['geometry_pix']))
        print("type gpd.GeoSeries(edges['geometry_pix']):",
              type(gpd.GeoSeries(edges['geometry_pix'])))
        print("type gpd.GeoSeries(edges['geometry_pix'][0]):",
              type(gpd.GeoSeries(edges['geometry_pix']).iloc[0]))
        west, south, east, north = gpd.GeoSeries(
            edges['geometry_pix']).total_bounds
        # west, south, east, north = edges.total_bounds
    else:
        north, south, east, west = bbox

    # if caller did not pass in a fig_width, calculate it proportionately from
    # the fig_height and bounding box aspect ratio
    bbox_aspect_ratio = (north - south) / (east - west)
    if fig_width is None:
        fig_width = fig_height / bbox_aspect_ratio

    # create the figure and axis
    print("Creating figure and axis...")
    if im is not None:
        fig, ax = plt.subplots(figsize=(fig_width, fig_height))
        ax.imshow(im)
        print("im.shape:", im.shape)
        # fig, ax = save_and_show(fig, ax, save, show, close, filename, file_format, dpi, axis_off)
        # return
    else:
        fig, ax = plt.subplots(figsize=(fig_width, fig_height),
                               facecolor=bgcolor)
        ax.set_facecolor(bgcolor)
    ## create the figure and axis
    # fig, ax = plt.subplots(figsize=(fig_width, fig_height), facecolor=bgcolor)
    # ax.set_facecolor(bgcolor)

    # draw the edges as lines from node to node
    start_time = time.time()
    lines = []
    widths = []
    for u, v, data in G.edges(keys=False, data=True):
        if 'geometry_pix' in data and use_geom:
            # if it has a geometry attribute (a list of line segments), add them
            # to the list of lines to plot
            xs, ys = data['geometry_pix'].xy
            lines.append(list(zip(xs, ys)))
        else:
            # if it doesn't have a geometry attribute, the edge is a straight
            # line from node to node
            x1 = G.nodes[u]['x_pix']
            y1 = G.nodes[u]['y_pix']
            x2 = G.nodes[v]['x_pix']
            y2 = G.nodes[v]['y_pix']
            line = [(x1, y1), (x2, y2)]
            lines.append(line)

        # get widths
        if edge_width_key in data:
            width = int(np.rint(data[edge_width_key] * edge_width_mult))
        else:
            width = edge_linewidth
        widths.append(width)

    # add the lines to the axis as a linecollection
    lc = LineCollection(lines,
                        colors=edge_color,
                        linewidths=widths,
                        alpha=edge_alpha,
                        zorder=2)
    ax.add_collection(lc)
    log('Drew the graph edges in {:,.2f} seconds'.format(time.time() -
                                                         start_time))

    # scatter plot the nodes
    ax.scatter(node_Xs,
               node_Ys,
               s=node_size,
               c=node_color,
               alpha=node_alpha,
               edgecolor=node_edgecolor,
               zorder=node_zorder)

    # set the extent of the figure
    margin_ns = (north - south) * margin
    margin_ew = (east - west) * margin
    ax.set_ylim((south - margin_ns, north + margin_ns))
    ax.set_xlim((west - margin_ew, east + margin_ew))

    # configure axis appearance
    xaxis = ax.get_xaxis()
    yaxis = ax.get_yaxis()

    xaxis.get_major_formatter().set_useOffset(False)
    yaxis.get_major_formatter().set_useOffset(False)

    # if axis_off, turn off the axis display set the margins to zero and point
    # the ticks in so there's no space around the plot
    if axis_off:
        ax.axis('off')
        ax.margins(0)
        ax.tick_params(which='both', direction='in')
        xaxis.set_visible(False)
        yaxis.set_visible(False)
        fig.canvas.draw()

    if equal_aspect:
        # make everything square
        ax.set_aspect('equal')
        fig.canvas.draw()
    else:
        # if the graph is not projected, conform the aspect ratio to not stretch the plot
        if G.graph['crs'] == ox_settings.default_crs:
            coslat = np.cos((min(node_Ys) + max(node_Ys)) / 2. / 180. * np.pi)
            ax.set_aspect(1. / coslat)
            fig.canvas.draw()

    # annotate the axis with node IDs if annotate=True
    if annotate:
        for node, data in G.nodes(data=True):
            ax.annotate(node, xy=(data['x_pix'], data['y_pix']))

    # update dpi, if image
    if im is not None:
        #   mpl can handle a max of 2^29 pixels, or 23170 on a side
        # recompute max_dpi
        max_dpi = int(23000 / max(fig_height, fig_width))
        h, w = im.shape[:2]
        # try to set dpi to native resolution of imagery
        desired_dpi = max(default_dpi, 1.0 * h / fig_height)
        # desired_dpi = max(default_dpi, int( np.max(im.shape) / max(fig_height, fig_width) ) )
        dpi = int(np.min([max_dpi, desired_dpi]))

    # save and show the figure as specified
    fig, ax = save_and_show(fig, ax, save, show, close, filename, file_format,
                            dpi, axis_off)
    return fig, ax
Beispiel #29
0
def create_graph(mrt_response_json, name='unnamed', retain_all=True, bidirectional=False):
    """
    Create a networkx graph from Overpass API HTTP response objects.

    Parameters
    ----------
    response_jsons : list
        list of dicts of JSON responses from from the Overpass API
    name : string
        the name of the graph
    retain_all : bool
        if True, return the entire graph even if it is not connected
    bidirectional : bool
        if True, create bidirectional edges for one-way streets

    Returns
    -------
    networkx multidigraph
    """

    log('Creating networkx graph from downloaded OSM data...')
    start_time = time.time()

    # make sure we got data back from the server requests
    elements = []
    # for response_json in response_jsons:
    elements.extend(mrt_response_json['elements'])
    if len(elements) < 1:
        raise EmptyOverpassResponse('There are no data elements in the response JSON objects')

    # create the graph as a MultiDiGraph and set the original CRS to default_crs
    G = nx.MultiDiGraph(name=name, crs=settings.default_crs)

    # extract nodes and paths from the downloaded osm data
    nodes = {}
    paths = {}
    # for osm_data in response_jsons:
    nodes_temp, paths_temp = parse_osm_nodes_paths(mrt_response_json)
    for key, value in nodes_temp.items():
        nodes[key] = value
    for key, value in paths_temp.items():
        paths[key] = value

    # add each osm node to the graph
    for node, data in nodes.items():
        G.add_node(node, **data)

    # add each osm way (aka, path) to the graph
    G = ox.add_paths(G, paths, bidirectional=bidirectional)

    # retain only the largest connected component, if caller did not
    # set retain_all=True
    if not retain_all:
        G = get_largest_component(G)

    log('Created graph with {:,} nodes and {:,} edges in {:,.2f} seconds'.format(len(list(G.nodes())),
                                                                                 len(list(G.edges())),
                                                                                 time.time() - start_time))

    # add length (great circle distance between nodes) attribute to each edge to
    # use as weight
    if len(G.edges) > 0:
        G = ox.add_edge_lengths(G)

    return G
Beispiel #30
0
def get_nearest_edges(G, X, Y, method=None, dist=0.0001):
    """
    Return the graph edges nearest to a list of points.

    Pass in points as separate vectors of X and Y coordinates. The 'kdtree'
    method is by far the fastest with large data sets, but only finds
    approximate nearest edges if working in unprojected coordinates like
    lat-lng (it precisely finds the nearest edge if working in projected
    coordinates). The 'balltree' method is second fastest with large data
    sets, but it is precise if working in unprojected coordinates like
    lat-lng. As a rule of thumb, if you have a small graph just use
    method=None. If you have a large graph with lat-lng coordinates, use
    method='balltree'. If you have a large graph with projected coordinates,
    use method='kdtree'. Note that if you are working in units of lat-lng,
    the X vector corresponds to longitude and the Y vector corresponds
    to latitude. The method creates equally distanced points along the edges
    of the network. Then, these points are used in a kdTree or BallTree search
    to identify which is nearest.Note that this method will not give the exact
    perpendicular point along the edge, but the smaller the *dist* parameter,
    the closer the solution will be.

    Parameters
    ----------
    G : networkx.MultiDiGraph
        input graph
    X : list-like
        The vector of longitudes or x's for which we will find the nearest
        edge in the graph. For projected graphs, use the projected coordinates,
        usually in meters.
    Y : list-like
        The vector of latitudes or y's for which we will find the nearest
        edge in the graph. For projected graphs, use the projected coordinates,
        usually in meters.
    method : string {None, 'kdtree', 'balltree'}
        Which method to use for finding nearest edge to each point.
        If None, we manually find each edge one at a time using
        get_nearest_edge. If 'kdtree' we use
        scipy.spatial.cKDTree for very fast euclidean search. Recommended for
        projected graphs. If 'balltree', we use sklearn.neighbors.BallTree for
        fast haversine search. Recommended for unprojected graphs.

    dist : float
        spacing length along edges. Units are the same as the geom; Degrees for
        unprojected geometries and meters for projected geometries. The smaller
        the value, the more points are created.

    Returns
    -------
    ne : np.array
        array of nearest edges represented by u and v (the IDs of the nodes
        they link) and key
    """
    if method is None:
        # calculate nearest edge one at a time for each (y, x) point
        ne = [get_nearest_edge(G, (y, x)) for x, y in tqdm(zip(X, Y))]

    elif method == "kdtree":

        # check if we were able to import scipy.spatial.cKDTree successfully
        if not cKDTree:
            raise ImportError(
                "The scipy package must be installed to use this optional feature."
            )

        # transform graph into DataFrame
        edges = utils_graph.graph_to_gdfs(G,
                                          nodes=False,
                                          fill_edge_geometry=True)

        # transform edges into evenly spaced points
        edges["points"] = edges.apply(
            lambda x: utils_geo.redistribute_vertices(x.geometry, dist),
            axis=1)

        # develop edges data for each created points
        extended = (edges["points"].apply([pd.Series]).stack().reset_index(
            level=1, drop=True).join(edges).reset_index())

        # Prepare btree arrays
        nbdata = np.array(
            list(
                zip(extended["Series"].apply(lambda x: x.x),
                    extended["Series"].apply(lambda x: x.y))))

        # build a k-d tree for euclidean nearest node search
        btree = cKDTree(data=nbdata, compact_nodes=True, balanced_tree=True)

        # query the tree for nearest node to each point
        points = np.array([X, Y]).T
        dist, idx = btree.query(points, k=1)  # Returns ids of closest point
        eidx = extended.loc[idx, "index"]
        ne = edges.loc[eidx, ["u", "v", "key"]]

    elif method == "balltree":
        # check if we were able to import sklearn.neighbors.BallTree successfully
        if not BallTree:
            raise ImportError(
                "The scikit-learn package must be installed to use this optional feature."
            )

        # transform graph into DataFrame
        edges = utils_graph.graph_to_gdfs(G,
                                          nodes=False,
                                          fill_edge_geometry=True)

        # transform edges into evenly spaced points
        edges["points"] = edges.apply(
            lambda x: utils_geo.redistribute_vertices(x.geometry, dist),
            axis=1)

        # develop edges data for each created points
        extended = (edges["points"].apply([pd.Series]).stack().reset_index(
            level=1, drop=True).join(edges).reset_index())

        # haversine requires data in form of [lat, lng] and inputs/outputs in units of radians
        nodes = pd.DataFrame({
            "x": extended["Series"].apply(lambda x: x.x),
            "y": extended["Series"].apply(lambda x: x.y),
        })
        nodes_rad = np.deg2rad(nodes[["y", "x"]].values.astype(np.float))
        points = np.array([Y, X]).T
        points_rad = np.deg2rad(points)

        # build a ball tree for haversine nearest node search
        tree = BallTree(nodes_rad, metric="haversine")

        # query the tree for nearest node to each point
        idx = tree.query(points_rad, k=5, return_distance=False)
        print(idx)
        eidx = extended.loc[idx[:, 0], "index"]
        ne = edges.loc[eidx, ["u", "v", "key"]]

    else:
        raise ValueError("You must pass a valid method name, or None.")

    utils.log(f"Found nearest edges to {len(X)} points")

    return np.array(ne)