def invalid_multipoly_handler(gdf, relation, way_ids): """ Handles invalid multipolygon geometries when there exists e.g. a feature without geometry (geometry == NaN) Parameters ---------- gdf : gpd.GeoDataFrame GeoDataFrame with Polygon geometries that should be converted into a MultiPolygon object. relation : dict OSM 'relation' dictionary way_ids : list A list of 'way' ids that should be converted into a MultiPolygon object. """ try: gdf_clean = gdf.dropna(subset=['geometry']) multipoly = MultiPolygon(list(gdf_clean['geometry'])) return multipoly except Exception: log("Invalid geometry at relation id %s.\nWay-ids of the invalid MultiPolygon:" % (relation['id'], str(way_ids))) return None
def parse_polygonal_poi(coords, response): """ Parse areal POI way polygons from OSM node coords. Parameters ---------- coords : dict dict of node IDs and their lat, lon coordinates Returns ------- dict of POIs containing each's nodes, polygon geometry, and osmid """ if 'type' in response and response['type'] == 'way': nodes = response['nodes'] try: polygon = Polygon([(coords[node]['lon'], coords[node]['lat']) for node in nodes]) poi = { 'nodes': nodes, 'geometry': polygon, 'osmid': response['id'] } if 'tags' in response: for tag in response['tags']: poi[tag] = response['tags'][tag] return poi except Exception: log('Polygon has invalid geometry: {}'.format(nodes)) return None
def parse_osm_node(response): """ Parse points from OSM nodes. Parameters ---------- response : JSON Nodes from OSM response. Returns ------- Dict of vertex IDs and their lat, lon coordinates. """ try: point = Point(response['lon'], response['lat']) poi = {'osmid': response['id'], 'geometry': point} if 'tags' in response: for tag in response['tags']: poi[tag] = response['tags'][tag] except Exception: log('Point has invalid geometry: {}'.format(response['id'])) return poi
def add_footprints(self, osm_data, category): """ Plot a GeoDataFrame of footprints. Parameters ---------- osm_data : JSON OSM footprint data for category : string item category to plot ax : axes object axes object to add footprints to Returns ------- ax: axes object """ osm_elements = osm_data.get(category).get('elements') plot_format = self.plot_format_dict[category] if osm_elements is None: print('No OSM elements to plot') return self # Generate GeoPandas DataFrame gdf = self.create_footprint_gdf(osm_elements) if gdf is None: log('Empty GDF for {}'.format(category)) return self # Store existing xlims and ylims to reset the adjusted lims later xlim = self.ax.get_xlim() ylim = self.ax.get_ylim() # extract each polygon as a descartes patch, and add to a matplotlib patch #collection patches = [] for geometry in gdf['geometry']: if isinstance(geometry, Polygon): patches.append(PolygonPatch(geometry)) elif isinstance(geometry, MultiPolygon): for subpolygon in geometry: #if geometry is multipolygon, go through each constituent subpolygon patches.append(PolygonPatch(subpolygon)) pc = PatchCollection(patches, facecolor=plot_format['facecolor'], edgecolor=plot_format['edge_color'], linewidth=plot_format['linewidth'], alpha=plot_format['alpha'], zorder=plot_format['zorder']) self.ax.add_collection(pc) # Reset the lims to original lims self.ax.set_xlim(xlim) self.ax.set_ylim(ylim) return self
def multithreading_get_venues(self, category_dict, bbox_mapping): ''' Multi-processes the get_venues Foursquare API requests Parameters ---------- categories_dict : dict Category dictionary from user profile bbox : tuple of tuples Tuple with (0) south-west and (1) north_east lat/lon coordinates Returns ------- List of all venues across subcategories in category dictionary ''' log('Multiprocessing get_venue search with Foursquare API...') start_time = time.time() # Function to multi-process def single_thread_get_venues(category, subcategories, bb): try: result = self.fs_client.venues.search(params={'intent': self.intent, 'sw': ','.join(str(i) for i in bb[0]), 'ne': ','.join(str(i) for i in bb[1]), 'categoryId': ','.join(i for i in subcategories.values()), 'limit': self.limit}) for venue in result.get('venues', None): venue['recommendation_category'] = category except: result = {} log('get_venues request fails for category {}...'.format(category)) return_dict[category] = result jobs = [] manager = mp.Manager() return_dict = manager.dict() for category, subcategories in category_dict.items(): thread = threading.Thread(name=category, target=single_thread_get_venues, args=(category, subcategories, bbox_mapping[category])) jobs.append(thread) thread.start() for j in jobs: j.join() # Get process results from the output queue venues = [venue for category in return_dict.values() for venue in category.get('venues', None)] log('Downloaded {:,} venues in {:,.2f} seconds'.format(len(venues), time.time()-start_time)) return venues
def get_extract_population_data(city_ref, data_source, pop_shapefile=None, pop_data_file=None, to_crs={'init': 'epsg:4326'}, df_osm_built=None): """ Get data population extract of desired data source for input city The population data frame is projected to the desired coordiante reference system Stores the extracted shapefile Returns the stored population data for input 'data source' and 'city reference' if it was previously stored Parameters ---------- city_ref : string name of input city data_source : string desired population data source pop_shapefile : string population count shapefile pop_data_file : string population data additional file (required for INSEE format) to_crs : dict desired coordinate reference system df_osm_built : geopandas.GeoDataFrame buildings for input region of interest Returns ---------- geopandas.GeoDataFrame returns the extracted population data """ # Input data source type given? assert( data_source in DATA_SOURCES ) # Population extract exists? if ( os.path.exists( get_population_extract_filename(city_ref, data_source) ) ): log("Population extract exists for input city: "+city_ref) return gpd.read_file( get_population_extract_filename(city_ref, data_source) ) # Input shape given? assert( not ( (np.all(df_osm_built is None) ) and (polygon is None) ) ) # Input population shapefile given? assert( not pop_shapefile is None ) # All input files given? assert( not ( (data_source == 'insee') and (pop_data_file is None) ) ) # Get buildings convex hull polygon = GeometryCollection( df_osm_built.geometry.values.tolist() ).convex_hull # Convert to geo-dataframe with defined CRS poly_gdf = gpd.GeoDataFrame([polygon], columns=["geometry"], crs=df_osm_built.crs) # Compute extract df_pop = get_population_df(pop_shapefile, pop_data_file, data_source, to_crs, poly_gdf) # Save to shapefile df_pop.to_file( get_population_extract_filename(city_ref, data_source), driver='ESRI Shapefile' ) return df_pop
def single_thread_get_venues(category, subcategories, bb): try: result = self.fs_client.venues.search(params={'intent': self.intent, 'sw': ','.join(str(i) for i in bb[0]), 'ne': ','.join(str(i) for i in bb[1]), 'categoryId': ','.join(i for i in subcategories.values()), 'limit': self.limit}) for venue in result.get('venues', None): venue['recommendation_category'] = category except: result = {} log('get_venues request fails for category {}...'.format(category)) return_dict[category] = result
def print_processing_time(start_time, message, id=None, logging=True): stop = time.time() if id is not None: message += ' (id = {})'.format(id) id += 1 # Assumes max message length of 52 chars and 8 chars per tab space = (52-len(message))*' ' print_message = "{}{} --- {:.2f} seconds ---".format(message, space, stop - start_time) print(print_message) if logging: log(print_message) return stop, id
def get_nearest_edge(G, point, return_geom=False, return_dist=False): """ Return the nearest edge to a point, by minimum euclidean distance. Parameters ---------- G : networkx.MultiDiGraph input graph point : tuple the (lat, lng) or (y, x) point for which we will find the nearest edge in the graph return_geom : bool Optionally return the geometry of the nearest edge return_dist : bool Optionally return the distance in graph's coordinates' units between the point and the nearest edge Returns ------- tuple Graph edge unique identifier as a tuple of (u, v, key). Or a tuple of (u, v, key, geom) if return_geom is True. Or a tuple of (u, v, key, dist) if return_dist is True. Or a tuple of (u, v, key, geom, dist) if return_geom and return_dist are True. """ # get u, v, key, geom from all the graph edges gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) edges = gdf_edges[["u", "v", "key", "geometry"]].values # convert lat/lng point to x/y for shapely distance operation xy_point = Point(reversed(point)) # calculate euclidean distance from each edge's geometry to this point edge_distances = [(edge, xy_point.distance(edge[3])) for edge in edges] # the nearest edge minimizes the distance to the point (u, v, key, geom), dist = min(edge_distances, key=lambda x: x[1]) utils.log(f"Found nearest edge ({u, v, key}) to point {point}") # return results requested by caller if return_dist and return_geom: return u, v, key, geom, dist elif return_dist: return u, v, key, dist elif return_geom: return u, v, key, geom else: return u, v, key
def create_footprint_gdf(self, osm_elements): """ Assemble OSM footprint data into a GeoDataFrame. Parameters ---------- osm_elements : JSON OSM footprint data for specific category Returns ------- GeoDataFrame """ vertices = {} for result in osm_elements: if 'type' in result and result['type'] == 'node': vertices[result['id']] = { 'lat': result['lat'], 'lon': result['lon'] } footprints = {} for result in osm_elements: if 'type' in result and result['type'] == 'way': nodes = result['nodes'] try: polygon = Polygon([(vertices[node]['lon'], vertices[node]['lat']) for node in nodes]) footprint = {'nodes': nodes, 'geometry': polygon} if 'tags' in result: for tag in result['tags']: footprint[tag] = result['tags'][tag] footprints[result['id']] = footprint except Exception: log('Polygon of has invalid geometry: {}'.format(nodes)) if footprints != {}: gdf = gpd.GeoDataFrame(footprints).T # drop all invalid geometries gdf = gdf[gdf['geometry'].is_valid] return gdf else: return None
def wrapper(*args, **kwargs): int_time = time.time() ret = func(*args, **kwargs) # Assumes max message length of 52 chars and 8 chars per tab message = '.'.join([func.__module__,func.__name__]) space = (52-len(message))*' ' print_message = "{}{} --- {:.2f} seconds ---".format(message, space, time.time() - int_time) #print(print_message) log(print_message) return ret
def single_thread_get_details(venue): data_dict = fs_data_dict() venue_id = venue.get('id') try: result = self.fs_client.venues(venue_id).get('venue', None) except: result = {} log('get_details request fails for venue_id {}...'.format(venue_id)) venue['details'] = result for key, value in data_dict.items(): insert_value = get_nested(venue, value['location']) data_dict[key]['values'] = insert_value venue = None return_list.append(data_dict)
def get_map(city, state): file_name = "%s%s.pkl" % (city, state) projected_file_name = "%s%s_projected.pkl" % (city, state) projected_file_path = Path(projected_file_name) file_path = Path(file_name) if projected_file_path.is_file() and file_path.is_file(): return pkl.load(open(file_name, "rb")), pkl.load(open(projected_file_name, "rb")) else: query = {'city': city, 'state': state, 'country': 'USA'} graph = ox.graph_from_place(query, network_type='drive') graph = add_node_elevations_open(graph) graph = ox.add_edge_grades(graph) log(graph.nodes[5637885552]) pkl.dump(graph, open(file_name, "wb")) graph_proj = ox.project_graph(graph) pkl.dump(graph_proj, open(projected_file_name, "wb")) return graph, graph_proj
def single_process_overpass_request(poly_bbox, query_key, query_dict): # represent bbox as south,west,north,east and round lat-longs to 8 # decimal places (ie, within 1 mm) so URL strings aren't different # due to float rounding issues (for consistent caching) west, south, east, north = poly_bbox query_str = get_osm_query(north=north, east=east, south=south, west=west, key=query_dict['key'], value=query_dict['value'], way=query_dict['way'], relation=query_dict['relation'], node=query_dict['node'], filter_type=query_dict.get('filter_type')) try: response_json = overpass_request(data={'data': query_str}, timeout=timeout) result = {query_key: response_json} except: log('No OSM data found for {}'.format(query_key)) result = {query_key: {}} result_list.append(result)
def get_Y_X_features_population_data(cities_selection=None, cities_skip=None): """ Returns the Y and X arrays for training/testing population downscaling estimates. It gathers either a selection of cities or all stored cities but a selected list to skip Y contains vectors with the correspondent population densities X contains vectors with normalised urban features X_columns columns referring to X values Numpy arrays are previously stored Parameters ---------- cities_selection : string list of cities to select cities_skip : string list of cities to skip (retrieve the rest) Returns ---------- np.array, np.array, np.array Y vector, X vector, X column names vector """ arr_X, arr_Y = [], [] # Get the complete training-testig dataset for Y_X_data_city in os.listdir("data/training"): # Only if it contains a valid extension if ('.npz' not in Y_X_data_city): continue # Get city's name city_ref = Y_X_data_city.replace('_X_Y.npz', '') # Only retrieve data from cities_selection (if ever given) if ((cities_selection is not None) and (city_ref not in cities_selection)): log('Skipping city:', city_ref) continue # Skip cities data from from cities_skip (if ever given) if ((cities_skip is not None) and (city_ref in cities_skip)): log('Skipping city:', city_ref) continue log('Retrieving data:', city_ref) # Get stored data city_Y, city_X, city_X_cols = get_training_testing_data(city_ref) # Append values arr_Y.append(city_Y) arr_X.append(city_X) # Assumption: All generated testing-training data contain the same X columns return np.concatenate(arr_Y), np.concatenate(arr_X), city_X_cols
def get_paths_to_simplify(G, strict=True): """ Create a list of all the paths to be simplified between endpoint nodes. The path is ordered from the first endpoint, through the interstitial nodes, to the second endpoint. Parameters ---------- G : graph strict : bool, if False, allow nodes to be end points even if they fail all other rules but have edges with different OSM IDs Returns ------- paths_to_simplify : list """ # first identify all the nodes that are endpoints start_time = time.time() endpoints = set( [node for node in G.nodes() if is_endpoint(G, node, strict=strict)]) log('Identified {:,} edge endpoints in {:,.2f} seconds'.format( len(endpoints), time.time() - start_time)) start_time = time.time() paths_to_simplify = [] # for each endpoint node, look at each of its successor nodes for node in endpoints: for successor in G.successors(node): if not successor in endpoints: # if the successor is not an endpoint, build a path from the endpoint node to the next endpoint node try: path = build_path(G, successor, endpoints, path=[node, successor]) paths_to_simplify.append(path) except RuntimeError: log('Recursion error: exceeded max depth, moving on to next endpoint successor', level=lg.WARNING) # recursion errors occur if some connected component is a self-contained ring in which all nodes are not end points # handle it by just ignoring that component and letting its topology remain intact (this should be a rare occurrence) # RuntimeError is what Python <3.5 will throw, Py3.5+ throws RecursionError but it is a subtype of RuntimeError so it still gets handled log('Constructed all paths to simplify in {:,.2f} seconds'.format( time.time() - start_time)) return paths_to_simplify
def create_graphGeoJson(geoJson, name='unnamed', retain_all=True, network_type='all_private', valid_road_types=set([]), roadTypeField='type', verbose=True, osmidx=0, osmNodeidx=0): """ Create a networkx graph from OSM data. Parameters ---------- geoJson : geoJsonFile Name will support any file format supported by OGR name : string the name of the graph retain_all : bool if True, return the entire graph even if it is not connected network_type : string what type of network to create Returns ------- networkx multidigraph """ log('Creating networkx graph from downloaded OSM data...') start_time = time.time() # make sure we got data back from the server requests # create the graph as a MultiDiGraph and set the original CRS to EPSG 4326 G = nx.MultiDiGraph(name=name, crs={'init':'epsg:4326'}) # extract nodes and paths from the downloaded osm data nodes = {} paths = {} nodes_temp, paths_temp = parse_OGR_nodes_paths(geoJson, valid_road_types=valid_road_types, verbose=verbose, osmidx=osmidx, osmNodeidx=osmNodeidx, roadTypeField=roadTypeField) if len(nodes_temp)==0: return G if verbose: print("nodes_temp:", nodes_temp) print("paths_temp:", paths_temp) for key, value in list(nodes_temp.items()): nodes[key] = value if verbose: print("node key:", key) print(" node value:", value) for key, value in list(paths_temp.items()): paths[key] = value if verbose: print("path key:", key) print(" path value:", value) # add each osm node to the graph for node, data in list(nodes.items()): G.add_node(node, **data) # add each osm way (aka, path) to the graph if verbose: print("paths:", paths) G = core.add_paths(G, paths, network_type) # retain only the largest connected component, if caller did not set retain_all=True if not retain_all: G = core.get_largest_component(G) log('Created graph with {:,} nodes and {:,} edges in {:,.2f} seconds'.format(len(list(G.nodes())), len(list(G.edges())), time.time()-start_time)) # add length (great circle distance between nodes) attribute to each edge to use as weight G = core.add_edge_lengths(G) return G
def compute_grid_dispersion(df_indices, df_osm_built, kwargs={ "radius_search": 750, "use_median": True, "K_nearest": 50 }): """ Creates grid and calculates dispersion indices. Parameters ---------- df_indices : geopandas.GeoDataFrame data frame containing the (x,y) reference points to calculate indices df_osm_built : geopandas.GeoDataFrame data frame containing the building's geometries kw_args: dict additional keyword arguments for the indices calculation radius_search: int circle radius to consider the dispersion calculation at a local point use_median : bool denotes whether the median or mean should be used to calculate the indices K_nearest : int number of neighboring buildings to consider in evaluation Returns ---------- geopandas.GeoDataFrame data frame with the added column for dispersion indices """ log("Dispersion calculation") start = time.time() # Get radius search: circle radius to consider the dispersion calculation at a local point radius_search = kwargs["radius_search"] # Use the median or mean computation ? use_median = kwargs["use_median"] # Assign dispersion calculation method if (kwargs["use_median"]): _calculate_dispersion = closest_building_distance_median else: _calculate_dispersion = closest_building_distance_average # Calculate the closest distance for each building within K_nearest centroid buildings _apply_polygon_closest_distance_neighbor(df_osm_built, K_nearest=kwargs["K_nearest"]) # For dispersion calculation approximation, create KDTree with buildings centroid coords_data = [ point.coords[0] for point in df_osm_built.loc[df_osm_built.closest_d.notnull()]. geometry.apply(lambda x: x.centroid) ] # Create KDTree tree = spatial.KDTree(coords_data) # Compute dispersion indices index_column = "dispersion" df_indices[index_column] = df_indices.geometry.apply( lambda x: _calculate_dispersion(x, tree, df_osm_built.closest_d, radius_search)) # Remove added column df_osm_built.drop('closest_d', axis=1, inplace=True) end = time.time() log("Dispersion calculation time: " + str(end - start))
def simplify_graph(G_, strict=True): """ Simplify a graph's topology by removing all nodes that are not intersections or dead-ends. Create an edge directly between the end points that encapsulate them, but retain the geometry of the original edges, saved as attribute in new edge Parameters ---------- G_ : graph strict : bool, if False, allow nodes to be end points even if they fail all other rules but have edges with different OSM IDs Returns ------- G : graph """ if is_simplified(G_): raise Exception( 'This graph has already been simplified, cannot simplify it again.' ) G = G_.copy() initial_node_count = len(list(G.nodes())) initial_edge_count = len(list(G.edges())) all_nodes_to_remove = [] all_edges_to_add = [] # construct a list of all the paths that need to be simplified paths = get_paths_to_simplify(G, strict=strict) start_time = time.time() for path in paths: # add the interstitial edges we're removing to a list so we can retain their spatial geometry edge_attributes = {} for u, v in zip(path[:-1], path[1:]): # there shouldn't be multiple edges between interstitial nodes edges = G.edge[u][v] if not len(edges) == 1: log('Multiple edges between "{}" and "{}" found when simplifying' .format(u, v), level=lg.WARNING) # the only element in this list as long as above assertion is True (MultiGraphs use keys (the 0 here), indexed with ints from 0 and up) edge = edges[0] for key in edge: if key in edge_attributes: # if this key already exists in the dict, append it to the value list edge_attributes[key].append(edge[key]) else: # if this key doesn't already exist, set the value to a list containing the one value edge_attributes[key] = [edge[key]] for key in edge_attributes: # don't touch the length attribute, we'll sum it at the end if len(set(edge_attributes[key])) == 1 and not key == 'length': # if there's only 1 unique value in this attribute list, consolidate it to the single value (the zero-th) edge_attributes[key] = edge_attributes[key][0] elif not key == 'length': # otherwise, if there are multiple values, keep one of each value edge_attributes[key] = list(set(edge_attributes[key])) # construct the geometry and sum the lengths of the segments edge_attributes['geometry'] = LineString( [Point((G.node[node]['x'], G.node[node]['y'])) for node in path]) edge_attributes['length'] = sum(edge_attributes['length']) # add the nodes and edges to their lists for processing at the end all_nodes_to_remove.extend(path[1:-1]) all_edges_to_add.append({ 'origin': path[0], 'destination': path[-1], 'attr_dict': edge_attributes }) # for each edge to add in the list we assembled, create a new edge between the origin and destination for edge in all_edges_to_add: G.add_edge(edge['origin'], edge['destination'], **edge['attr_dict']) # finally remove all the interstitial nodes between the new edges G.remove_nodes_from(set(all_nodes_to_remove)) msg = 'Simplified graph (from {:,} to {:,} nodes and from {:,} to {:,} edges) in {:,.2f} seconds' log( msg.format(initial_node_count, len(list(G.nodes())), initial_edge_count, len(list(G.edges())), time.time() - start_time)) return G
def overpass_request(data, pause_duration=None, timeout=180, error_pause_duration=None): """ Send a request to the Overpass API via HTTP POST and return the JSON response. Parameters ---------- data : dict or OrderedDict key-value pairs of parameters to post to the API pause_duration : int how long to pause in seconds before requests, if None, will query API status endpoint to find when next slot is available timeout : int the timeout interval for the requests library error_pause_duration : int how long to pause in seconds before re-trying requests if error Returns ------- dict """ # define the Overpass API URL, then construct a GET-style URL as a string to # hash to look up/save to cache url = settings.overpass_endpoint.rstrip('/') + '/interpreter' prepared_url = requests.Request('GET', url, params=data).prepare().url cached_response_json = get_from_cache(prepared_url) if cached_response_json is not None: # found this request in the cache, just return it instead of making a # new HTTP call return cached_response_json else: # if this URL is not already in the cache, pause, then request it if pause_duration is None: this_pause_duration = get_pause_duration() log('Pausing {:,.2f} seconds before making API POST request'. format(this_pause_duration)) time.sleep(this_pause_duration) start_time = time.time() log('Posting to {} with timeout={}, "{}"'.format( url, timeout, data)) response = requests.post(url, data=data, timeout=timeout, headers=get_http_headers()) # get the response size and the domain, log result size_kb = len(response.content) / 1000. domain = re.findall(r'(?s)//(.*?)/', url)[0] log('Downloaded {:,.1f}KB from {} in {:,.2f} seconds'.format( size_kb, domain, time.time() - start_time)) try: response_json = response.json() if 'remark' in response_json: log('Server remark: "{}"'.format( response_json['remark'], level=lg.WARNING)) save_to_cache(prepared_url, response_json) except Exception: # 429 is 'too many requests' and 504 is 'gateway timeout' from server # overload - handle these errors by recursively calling # overpass_request until we get a valid response if response.status_code in [429, 504]: # pause for error_pause_duration seconds before re-trying request if error_pause_duration is None: error_pause_duration = get_pause_duration() log('Server at {} returned status code {} and no JSON data. Re-trying request in {:.2f} seconds.' .format(domain, response.status_code, error_pause_duration), level=lg.WARNING) time.sleep(error_pause_duration) response_json = overpass_request( data=data, pause_duration=pause_duration, timeout=timeout) # else, this was an unhandled status_code, throw an exception else: log('Server at {} returned status code {} and no JSON data' .format(domain, response.status_code), level=lg.ERROR) raise Exception( 'Server returned no JSON data.\n{} {}\n{}'.format( response, response.reason, response.text)) return response_json
def run_sim_and_graph(G, bbox=None, fig_height=6, fig_width=None, margin=0.02, axis_off=True, equal_aspect=False, bgcolor='w', show=True, save=False, close=True, file_format='png', filename='temp', dpi=600, annotate=False, node_color='#66ccff', node_size=15, node_alpha=1, node_edgecolor='none', node_zorder=1, edge_color='#999999', edge_linewidth=1, edge_alpha=1, prior_iters=0, use_geom=True, num_iters=20, draw_freq=50, show_congestion=False): # This is mostly lifted directedly from osmnx. Using it to enable real time graphing as more # samples are taken """ Plot a networkx spatial graph. Parameters, ---------- G : networkx multidigraph bbox : tuple bounding box as north,south,east,west - if None will calculate from spatial extents of data. if passing a bbox, you probably also want to pass margin=0 to constrain it. fig_height : int matplotlib figure height in inches fig_width : int matplotlib figure width in inches margin : float relative margin around the figure axis_off : bool if True turn off the matplotlib axis equal_aspect : bool if True set the axis aspect ratio equal bgcolor : string the background color of the figure and axis show : bool if True, show the figure save : bool if True, save the figure as an image file to disk close : bool close the figure (only if show equals False) to prevent display file_format : string the format of the file to save (e.g., 'jpg', 'png', 'svg') filename : string the name of the file if saving dpi : int the resolution of the image file if saving annotate : bool if True, annotate the nodes in the figure node_color : string the color of the nodes node_size : int the size of the nodes node_alpha : float the opacity of the nodes node_edgecolor : string the color of the node's marker's border node_zorder : int zorder to plot nodes, edges are always 2, so make node_zorder 1 to plot nodes beneath them or 3 to plot nodes atop them edge_color : string the color of the edges' lines edge_linewidth : float the width of the edges' lines edge_alpha : float the opacity of the edges' lines use_geom : bool if True, use the spatial geometry attribute of the edges to draw geographically accurate edges, rather than just lines straight from node to node Returns ------- fig, ax : tuple """ log('Begin plotting the graph...') node_Xs = [float(x) for _, x in G.nodes(data='x')] node_Ys = [float(y) for _, y in G.nodes(data='y')] # get north, south, east, west values either from bbox parameter or from the # spatial extent of the edges' geometries if bbox is None: edges = ox.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) west, south, east, north = edges.total_bounds else: north, south, east, west = bbox # if caller did not pass in a fig_width, calculate it proportionately from # the fig_height and bounding box aspect ratio bbox_aspect_ratio = (north - south) / (east - west) if fig_width is None: fig_width = fig_height / bbox_aspect_ratio # create the figure and axis fig, ax = plt.subplots(figsize=(fig_width, fig_height), facecolor=bgcolor) ax.set_facecolor(bgcolor) # draw the edges as lines from node to node start_time = time.time() lines = [] for u, v, data in G.edges(keys=False, data=True): if 'geometry' in data and use_geom: # if it has a geometry attribute (a list of line segments), add them # to the list of lines to plot xs, ys = data['geometry'].xy lines.append(list(zip(xs, ys))) else: # if it doesn't have a geometry attribute, the edge is a straight # line from node to node x1 = G.nodes[u]['x'] y1 = G.nodes[u]['y'] x2 = G.nodes[v]['x'] y2 = G.nodes[v]['y'] line = [(x1, y1), (x2, y2)] lines.append(line) # add the lines to the axis as a linecollection log('Drew the graph edges in {:,.2f} seconds'.format(time.time() - start_time)) # scatter plot the nodes #ax.scatter(node_Xs, node_Ys, s=node_size, c=node_color, alpha=node_alpha, edgecolor=node_edgecolor, zorder=node_zorder) # set the extent of the figure margin_ns = (north - south) * margin margin_ew = (east - west) * margin ax.set_ylim((south - margin_ns, north + margin_ns)) ax.set_xlim((west - margin_ew, east + margin_ew)) # configure axis appearance xaxis = ax.get_xaxis() yaxis = ax.get_yaxis() xaxis.get_major_formatter().set_useOffset(False) yaxis.get_major_formatter().set_useOffset(False) #eColors = get_edge_colors_by_attribute(G, 'traversals', num_bins=250) #lc = LineCollection(lines, colors=eColors, linewidths=edge_linewidth, alpha=edge_alpha, zorder=2) #ax.add_collection(lc) #plt.pause(2) #plt.ion() for i in range(1, num_iters + 1): simulate_random(G) # if (i % draw_freq == 0 or i == num_iters): plt.cla() ax.set_facecolor = bgcolor ax.set_ylim((south - margin_ns, north + margin_ns)) ax.set_xlim((west - margin_ew, east + margin_ew)) xaxis.get_major_formatter().set_useOffset(False) yaxis.get_major_formatter().set_useOffset(False) ax.set_title(num_iters + prior_iters) ax.set_aspect('equal') ax.axis('off') if (show_congestion): eval_congestion(G) eColors = get_edge_colors_by_attribute(G, 'congestion', num_bins=250) else: eColors = get_edge_colors_by_attribute(G, 'traversals', num_bins=250) lc = LineCollection(lines, colors=eColors, linewidths=edge_linewidth, alpha=edge_alpha, zorder=2) ax.add_collection(lc) #plt.pause(0.000001) #plt.ioff() #ax.add_collection(lc) # if axis_off, turn off the axis display set the margins to zero and point # the ticks in so there's no space around the plot if axis_off: ax.axis('off') ax.margins(0) ax.tick_params(which='both', direction='in') xaxis.set_visible(False) yaxis.set_visible(False) #fig.canvas.draw() if equal_aspect: # make everything square ax.set_aspect('equal') #fig.canvas.draw() else: # if the graph is not projected, conform the aspect ratio to not stretch the plot if G.graph['crs'] == settings.default_crs: coslat = np.cos((min(node_Ys) + max(node_Ys)) / 2. / 180. * np.pi) ax.set_aspect(1. / coslat) #fig.canvas.draw() # annotate the axis with node IDs if annotate=True if annotate: for node, data in G.nodes(data=True): ax.annotate(node, xy=(data['x'], data['y'])) # save and show the figure as specified if save == True: #fig.canvas.draw() fig, ax = save_and_show(fig, ax, save, show, close, filename, file_format, dpi, axis_off) ##fig.canvas.draw() #fig.canvas.flush_events() return G, fig, ax
def osm_net_download(bbox, query_dict_collection, timeout=180, memory=None, max_query_area_size=50 * 1000 * 50 * 1000): if memory is None: maxsize = '' else: maxsize = '[maxsize:{}]'.format(memory) # turn bbox into a polygon and project to local UTM (south, west), (north, east) = bbox polygon = Polygon([(west, south), (east, south), (east, north), (west, north)]) geom_proj, crs_proj = project_geometry(polygon) # subdivide it if it exceeds the max area size (in meters), then project # back to lat-long geom_proj_consol_sub = consolidate_subdivide_geometry( geom_proj, max_query_area_size=max_query_area_size) geometry, _ = project_geometry(geom_proj_consol_sub, crs=crs_proj, to_latlong=True) log(('Requesting footprint data within bounding ' 'box from API in {:,} request(s)').format(len(geometry))) start_time = time() # Define process function def single_process_overpass_request(poly_bbox, query_key, query_dict): # represent bbox as south,west,north,east and round lat-longs to 8 # decimal places (ie, within 1 mm) so URL strings aren't different # due to float rounding issues (for consistent caching) west, south, east, north = poly_bbox query_str = get_osm_query(north=north, east=east, south=south, west=west, key=query_dict['key'], value=query_dict['value'], way=query_dict['way'], relation=query_dict['relation'], node=query_dict['node'], filter_type=query_dict.get('filter_type')) try: response_json = overpass_request(data={'data': query_str}, timeout=timeout) result = {query_key: response_json} except: log('No OSM data found for {}'.format(query_key)) result = {query_key: {}} result_list.append(result) # Setup a list of threads that we want to run jobs = [] manager = mp.Manager() result_list = manager.list() for poly in geometry: for query_key, query_dict in query_dict_collection.items(): thread = threading.Thread(target=single_process_overpass_request, args=(poly.bounds, query_key, query_dict)) jobs.append(thread) thread.start() for j in jobs: j.join() # # Get process results from the output queue response_jsons = { key: value for response in result_list for key, value in response.items() } msg = ('Got all OSM data within bounding box from ' 'API in {:,} request(s) and {:,.2f} seconds') log(msg.format(len(geometry), time() - start_time)) return response_jsons
def parse_osm_relations(relations, osm_way_df): """ Parses the osm relations (multipolygons) from osm ways and nodes. See more information about relations from OSM documentation: http://wiki.openstreetmap.org/wiki/Relation Parameters ---------- relations : list OSM 'relation' items (dictionaries) in a list. osm_way_df : gpd.GeoDataFrame OSM 'way' features as a GeoDataFrame that contains all the 'way' features that will constitute the multipolygon relations. Returns ------- gpd.GeoDataFrame A GeoDataFrame with MultiPolygon representations of the relations and the attributes associated with them. """ gdf_relations = gpd.GeoDataFrame() # Iterate over relations and extract the items for relation in relations: if relation['tags']['type'] == 'multipolygon': try: # Parse member 'way' ids member_way_ids = [ member['ref'] for member in relation['members'] if member['type'] == 'way' ] # Extract the ways member_ways = osm_way_df.reindex(member_way_ids) # Extract the nodes of those ways member_nodes = list(member_ways['nodes'].values) try: # Create MultiPolygon from geometries (exclude NaNs) multipoly = MultiPolygon(list(member_ways['geometry'])) except Exception: multipoly = invalid_multipoly_handler( gdf=member_ways, relation=relation, way_ids=member_way_ids) if multipoly: # Create GeoDataFrame with the tags and the MultiPolygon and its 'ways' (ids), and the 'nodes' of those ways geo = gpd.GeoDataFrame(relation['tags'], index=[relation['id']]) # Initialize columns (needed for .loc inserts) geo = geo.assign(geometry=None, ways=None, nodes=None, element_type=None, osmid=None) # Add attributes geo.loc[relation['id'], 'geometry'] = multipoly geo.loc[relation['id'], 'ways'] = member_way_ids geo.loc[relation['id'], 'nodes'] = member_nodes geo.loc[relation['id'], 'element_type'] = 'relation' geo.loc[relation['id'], 'osmid'] = relation['id'] # Append to relation GeoDataFrame gdf_relations = gdf_relations.append(geo, sort=False) # Remove such 'ways' from 'osm_way_df' that are part of the 'relation' osm_way_df = osm_way_df.drop(member_way_ids) except Exception: log("Could not handle OSM 'relation': {}".format( relation['id'])) # Merge 'osm_way_df' and the 'gdf_relations' osm_way_df = osm_way_df.append(gdf_relations, sort=False) return osm_way_df
def multithreading_get_details(self, venues): ''' Multi-processes the get_details Foursquare API requests Parameters ---------- client: Foursquare client Foursquare api client to do requests venues: list List of venues retrieved from Foursquare API get_venues request Returns ------- DataFrame with venue details used for recommendations ''' log('Multiprocessing get_details search with Foursquare API...') start_time = time.time() # Define process function def single_thread_get_details(venue): data_dict = fs_data_dict() venue_id = venue.get('id') try: result = self.fs_client.venues(venue_id).get('venue', None) except: result = {} log('get_details request fails for venue_id {}...'.format(venue_id)) venue['details'] = result for key, value in data_dict.items(): insert_value = get_nested(venue, value['location']) data_dict[key]['values'] = insert_value venue = None return_list.append(data_dict) jobs = [] manager = mp.Manager() return_list = manager.list() for venue in venues: thread = threading.Thread(name=venue.get('id'), target=single_thread_get_details, args=(venue, )) jobs.append(thread) thread.start() for j in jobs: j.join() # Generate data dict to convert to dataframe data_dict = fs_data_dict() for venue in return_list: for key, value in data_dict.items(): insert_value = get_nested(venue, value['location']) data_dict[key]['values'].append(venue[key]['values']) venue_df = pd.DataFrame({key: value['values'] for key, value in data_dict.items()}) venue_df['chain'] = venue_df['chain'].map(lambda x: 1 if len(x) > 0 else 0) log('Downloaded details for {:,} venues in {:,.2f} seconds'.format(venue_df.shape[0], time.time()-start_time)) return venue_df
def add_node_elevations_open(G, max_locations_per_batch=180, pause_duration=0.02): # pragma: no cover url_template = 'https://api.open-elevation.com/api/v1/lookup?locations={}' node_points = pd.Series({node: '{:.5f},{:.5f}'.format(data['y'], data['x']) for node, data in G.nodes(data=True)}) log('Requesting node elevations from the API in {} calls.'.format( math.ceil(len(node_points) / max_locations_per_batch))) results = [] for i in range(0, len(node_points), max_locations_per_batch): chunk = node_points.iloc[i: i + max_locations_per_batch] locations = '|'.join(chunk) url = url_template.format(locations) log(len(url)) # check if this request is already in the cache (if global use_cache=True) cached_response_json = get_from_cache(url) if cached_response_json is not None: response_json = cached_response_json else: try: # request the elevations from the API log('Requesting node elevations: {}'.format(url)) time.sleep(pause_duration) response = requests.get(url) response_json = response.json() save_to_cache(url, response_json) except Exception as e: log(e) log('Server responded with {}: {}'.format(response.status_code, response.reason)) # append these elevation results to the list of all results results.extend(response_json['results']) # sanity check that all our vectors have the same number of elements if not (len(results) == len(G.nodes()) == len(node_points)): raise Exception('Graph has {} nodes but we received {} results from the elevation API.'.format(len(G.nodes()), len(results))) else: log('Graph has {} nodes and we received {} results from the elevation API.'.format(len(G.nodes()), len(results))) # add elevation as an attribute to the nodes df = pd.DataFrame(node_points, columns=['node_points']) df['elevation'] = [result['elevation'] for result in results] log(df['elevation']) df['elevation'] = df['elevation'].round(3) # round to millimeter nx.set_node_attributes(G, name='elevation', values=df['elevation'].to_dict()) log('Added elevation data to all nodes.') return G
def compute_grid_landusemix(df_indices, df_osm_built, df_osm_pois, kw_args={'walkable_distance':600,'compute_activity_types_kde':True,'weighted_kde':True,'pois_weight':9,'log_weighted':True} ): """ Calculate land use mix indices on input grid Parameters ---------- XX_YY : pandas.Panel meshgrid with (x,y) reference points to calculate indices kde_activities : pandas.DataFrame Activity land use densities kde_residential : pandas.DataFrame Residential land use densities kw_args: dict additional keyword arguments for the indices calculation walkable_distance : int the bandwidth assumption for Kernel Density Estimation calculations (meters) compute_activity_types_kde : bool determines if the densities for each activity type should be computed weighted_kde : bool use Weighted Kernel Density Estimation or classic version pois_weight : int Points of interest weight equivalence with buildings (squared meter) log_weighted : bool apply natural logarithmic function to surface weights Returns ---------- pandas.DataFrame land use mix indices """ log("Land use mix calculation") start = time.time() # Get the bandwidth, related to 'walkable distances' bandwidth = kw_args["walkable_distance"] # Compute a weighted KDE? weighted_kde = kw_args["weighted_kde"] X_weights = None # Get full list of contained POIs contained_pois = list(set([element for list_ in df_osm_built.containing_poi[ df_osm_built.containing_poi.notnull() ] for element in list_])) # Get the POIs not contained by any building df_osm_pois_not_contained = df_osm_pois[ ~ df_osm_pois.index.isin( contained_pois) ] ############ ### Calculate land use density estimations ############ #### # Residential #### df_osm_built_indexed = df_osm_built[ df_osm_built.classification.isin(["residential","mixed"]) ] if (weighted_kde): X_weights = df_osm_built_indexed.landuses_m2.apply(lambda x: x["residential"] ) df_indices["residential_pdf"] = calculate_kde(df_indices.geometry, df_osm_built_indexed, None, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"] ) log("Residential density estimation done") #### # Activities #### df_osm_built_indexed = df_osm_built[ df_osm_built.classification.isin(["activity","mixed"]) ] df_osm_pois_not_cont_indexed = df_osm_pois_not_contained[ df_osm_pois_not_contained.classification.isin(["activity","mixed"]) ] if (weighted_kde): X_weights = df_osm_built_indexed.landuses_m2.apply(lambda x: x["activity"] ) df_indices["activity_pdf"] = calculate_kde(df_indices.geometry, df_osm_built_indexed, df_osm_pois_not_cont_indexed, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"] ) log("Activity density estimation done") #### # Compute activity types densities #### if ( kw_args["compute_activity_types_kde"] ): assert('activity_category' in df_osm_built.columns) # Get unique category values unique_categories_built = [list(x) for x in set(tuple(x) for x in df_osm_built.activity_category.values if isinstance(x,list) ) ] unique_categories_pois = [list(x) for x in set(tuple(x) for x in df_osm_pois_not_cont_indexed.activity_category.values if isinstance(x,list) ) ] flat_list = [item for sublist in unique_categories_built + unique_categories_pois for item in sublist] categories = list( set(flat_list) ) for cat in categories: # Get data frame selection of input category # Buildings and POIs within that category df_built_category = df_osm_built_indexed[ df_osm_built_indexed.activity_category.apply(lambda x: (isinstance(x,list)) and (cat in x) ) ] df_pois_category = df_osm_pois_not_cont_indexed[ df_osm_pois_not_cont_indexed.activity_category.apply(lambda x: (isinstance(x,list)) and (cat in x) ) ] if (weighted_kde): X_weights = df_built_category.landuses_m2.apply(lambda x: x[ cat ] ) df_indices[ cat + "_pdf" ] = calculate_kde( df_indices.geometry, df_built_category, df_pois_category, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"] ) log("Activity grouped by types density estimation done") # Compute land use mix indices index_column = "landusemix" df_indices[index_column] = df_indices.apply(lambda x: _land_use_mix(x.activity_pdf, x.residential_pdf), axis=1 ) df_indices["landuse_intensity"] = df_indices.apply(lambda x: (x.activity_pdf + x.residential_pdf)/2., axis=1 ) end = time.time() log("Land use mix calculation time: "+str(end-start))
def graph_to_gdfs_pix(G, nodes=True, edges=True, node_geometry=True, fill_edge_geometry=True): """ Convert a graph into node and/or edge GeoDataFrames Parameters ---------- G : networkx multidigraph nodes : bool if True, convert graph nodes to a GeoDataFrame and return it edges : bool if True, convert graph edges to a GeoDataFrame and return it node_geometry : bool if True, create a geometry column from node x and y data fill_edge_geometry : bool if True, fill in missing edge geometry fields using origin and destination nodes Returns ------- GeoDataFrame or tuple gdf_nodes or gdf_edges or both as a tuple """ if not (nodes or edges): raise ValueError('You must request nodes or edges, or both.') to_return = [] if nodes: start_time = time.time() nodes = {node: data for node, data in G.nodes(data=True)} gdf_nodes = gpd.GeoDataFrame(nodes).T if node_geometry: # gdf_nodes['geometry'] = gdf_nodes.apply(lambda row: Point(row['x'], row['y']), axis=1) gdf_nodes['geometry_pix'] = gdf_nodes.apply( lambda row: Point(row['x_pix'], row['y_pix']), axis=1) gdf_nodes.crs = G.graph['crs'] gdf_nodes.gdf_name = '{}_nodes'.format(G.graph['name']) gdf_nodes['osmid'] = gdf_nodes['osmid'].astype(np.int64).map(make_str) to_return.append(gdf_nodes) log('Created GeoDataFrame "{}" from graph in {:,.2f} seconds'.format( gdf_nodes.gdf_name, time.time() - start_time)) if edges: start_time = time.time() # create a list to hold our edges, then loop through each edge in the # graph edges = [] for u, v, key, data in G.edges(keys=True, data=True): # for each edge, add key and all attributes in data dict to the # edge_details edge_details = {'u': u, 'v': v, 'key': key} for attr_key in data: edge_details[attr_key] = data[attr_key] # if edge doesn't already have a geometry attribute, create one now # if fill_edge_geometry==True if 'geometry_pix' not in data: if fill_edge_geometry: point_u = Point((G.nodes[u]['x_pix'], G.nodes[u]['y_pix'])) point_v = Point((G.nodes[v]['x_pix'], G.nodes[v]['y_pix'])) edge_details['geometry_pix'] = LineString( [point_u, point_v]) else: edge_details['geometry_pix'] = np.nan edges.append(edge_details) # create a GeoDataFrame from the list of edges and set the CRS gdf_edges = gpd.GeoDataFrame(edges) gdf_edges.crs = G.graph['crs'] gdf_edges.gdf_name = '{}_edges'.format(G.graph['name']) to_return.append(gdf_edges) log('Created GeoDataFrame "{}" from graph in {:,.2f} seconds'.format( gdf_edges.gdf_name, time.time() - start_time)) if len(to_return) > 1: return tuple(to_return) else: return to_return[0]
def plot_graph_pix(G, im=None, bbox=None, fig_height=6, fig_width=None, margin=0.02, axis_off=True, equal_aspect=False, bgcolor='w', show=True, save=False, close=True, file_format='png', filename='temp', default_dpi=300, annotate=False, node_color='#66ccff', node_size=15, node_alpha=1, node_edgecolor='none', node_zorder=1, edge_color='#999999', edge_linewidth=1, edge_alpha=1, edge_width_key='speed_mph', edge_width_mult=1. / 25, use_geom=True): """ Plot a networkx spatial graph. Parameters ---------- G : networkx multidigraph bbox : tuple bounding box as north,south,east,west - if None will calculate from spatial extents of data. if passing a bbox, you probably also want to pass margin=0 to constrain it. fig_height : int matplotlib figure height in inches fig_width : int matplotlib figure width in inches margin : float relative margin around the figure axis_off : bool if True turn off the matplotlib axis equal_aspect : bool if True set the axis aspect ratio equal bgcolor : string the background color of the figure and axis show : bool if True, show the figure save : bool if True, save the figure as an image file to disk close : bool close the figure (only if show equals False) to prevent display file_format : string the format of the file to save (e.g., 'jpg', 'png', 'svg') filename : string the name of the file if saving default_dpi : int the resolution of the image file if saving (may get altered for large images) annotate : bool if True, annotate the nodes in the figure node_color : string the color of the nodes node_size : int the size of the nodes node_alpha : float the opacity of the nodes node_edgecolor : string the color of the node's marker's border node_zorder : int zorder to plot nodes, edges are always 2, so make node_zorder 1 to plot nodes beneath them or 3 to plot nodes atop them edge_color : string the color of the edges' lines edge_linewidth : float the width of the edges' lines edge_alpha : float the opacity of the edges' lines edge_width_key : str optional: key in edge propwerties to determine edge width, supercedes edge_linewidth, default to "speed_mph" edge_width_mult : float factor to rescale width for plotting, default to 1./25, which gives a line width of 1 for 25 mph speed limit. use_geom : bool if True, use the spatial geometry attribute of the edges to draw geographically accurate edges, rather than just lines straight from node to node Returns ------- fig, ax : tuple """ log('Begin plotting the graph...') node_Xs = [float(x) for _, x in G.nodes(data='x_pix')] node_Ys = [float(y) for _, y in G.nodes(data='y_pix')] # node_Xs = [float(x) for _, x in G.nodes(data='x')] # node_Ys = [float(y) for _, y in G.nodes(data='y')] # get north, south, east, west values either from bbox parameter or from the # spatial extent of the edges' geometries if bbox is None: edges = graph_to_gdfs_pix(G, nodes=False, fill_edge_geometry=True) # print ("plot_graph_pix():, edges:", edges) print("plot_graph_pix():, edges.columns:", edges.columns) # print ("plot_graph_pix(): edges['geometry_pix']:", edges['geometry_pix']) # print ("plot_graph_pix(): edges['geometry']:", edges['geometry']) print("type edges['geometry_pix'].:", type(edges['geometry_pix'])) print("type gpd.GeoSeries(edges['geometry_pix']):", type(gpd.GeoSeries(edges['geometry_pix']))) print("type gpd.GeoSeries(edges['geometry_pix'][0]):", type(gpd.GeoSeries(edges['geometry_pix']).iloc[0])) west, south, east, north = gpd.GeoSeries( edges['geometry_pix']).total_bounds # west, south, east, north = edges.total_bounds else: north, south, east, west = bbox # if caller did not pass in a fig_width, calculate it proportionately from # the fig_height and bounding box aspect ratio bbox_aspect_ratio = (north - south) / (east - west) if fig_width is None: fig_width = fig_height / bbox_aspect_ratio # create the figure and axis print("Creating figure and axis...") if im is not None: fig, ax = plt.subplots(figsize=(fig_width, fig_height)) ax.imshow(im) print("im.shape:", im.shape) # fig, ax = save_and_show(fig, ax, save, show, close, filename, file_format, dpi, axis_off) # return else: fig, ax = plt.subplots(figsize=(fig_width, fig_height), facecolor=bgcolor) ax.set_facecolor(bgcolor) ## create the figure and axis # fig, ax = plt.subplots(figsize=(fig_width, fig_height), facecolor=bgcolor) # ax.set_facecolor(bgcolor) # draw the edges as lines from node to node start_time = time.time() lines = [] widths = [] for u, v, data in G.edges(keys=False, data=True): if 'geometry_pix' in data and use_geom: # if it has a geometry attribute (a list of line segments), add them # to the list of lines to plot xs, ys = data['geometry_pix'].xy lines.append(list(zip(xs, ys))) else: # if it doesn't have a geometry attribute, the edge is a straight # line from node to node x1 = G.nodes[u]['x_pix'] y1 = G.nodes[u]['y_pix'] x2 = G.nodes[v]['x_pix'] y2 = G.nodes[v]['y_pix'] line = [(x1, y1), (x2, y2)] lines.append(line) # get widths if edge_width_key in data: width = int(np.rint(data[edge_width_key] * edge_width_mult)) else: width = edge_linewidth widths.append(width) # add the lines to the axis as a linecollection lc = LineCollection(lines, colors=edge_color, linewidths=widths, alpha=edge_alpha, zorder=2) ax.add_collection(lc) log('Drew the graph edges in {:,.2f} seconds'.format(time.time() - start_time)) # scatter plot the nodes ax.scatter(node_Xs, node_Ys, s=node_size, c=node_color, alpha=node_alpha, edgecolor=node_edgecolor, zorder=node_zorder) # set the extent of the figure margin_ns = (north - south) * margin margin_ew = (east - west) * margin ax.set_ylim((south - margin_ns, north + margin_ns)) ax.set_xlim((west - margin_ew, east + margin_ew)) # configure axis appearance xaxis = ax.get_xaxis() yaxis = ax.get_yaxis() xaxis.get_major_formatter().set_useOffset(False) yaxis.get_major_formatter().set_useOffset(False) # if axis_off, turn off the axis display set the margins to zero and point # the ticks in so there's no space around the plot if axis_off: ax.axis('off') ax.margins(0) ax.tick_params(which='both', direction='in') xaxis.set_visible(False) yaxis.set_visible(False) fig.canvas.draw() if equal_aspect: # make everything square ax.set_aspect('equal') fig.canvas.draw() else: # if the graph is not projected, conform the aspect ratio to not stretch the plot if G.graph['crs'] == ox_settings.default_crs: coslat = np.cos((min(node_Ys) + max(node_Ys)) / 2. / 180. * np.pi) ax.set_aspect(1. / coslat) fig.canvas.draw() # annotate the axis with node IDs if annotate=True if annotate: for node, data in G.nodes(data=True): ax.annotate(node, xy=(data['x_pix'], data['y_pix'])) # update dpi, if image if im is not None: # mpl can handle a max of 2^29 pixels, or 23170 on a side # recompute max_dpi max_dpi = int(23000 / max(fig_height, fig_width)) h, w = im.shape[:2] # try to set dpi to native resolution of imagery desired_dpi = max(default_dpi, 1.0 * h / fig_height) # desired_dpi = max(default_dpi, int( np.max(im.shape) / max(fig_height, fig_width) ) ) dpi = int(np.min([max_dpi, desired_dpi])) # save and show the figure as specified fig, ax = save_and_show(fig, ax, save, show, close, filename, file_format, dpi, axis_off) return fig, ax
def create_graph(mrt_response_json, name='unnamed', retain_all=True, bidirectional=False): """ Create a networkx graph from Overpass API HTTP response objects. Parameters ---------- response_jsons : list list of dicts of JSON responses from from the Overpass API name : string the name of the graph retain_all : bool if True, return the entire graph even if it is not connected bidirectional : bool if True, create bidirectional edges for one-way streets Returns ------- networkx multidigraph """ log('Creating networkx graph from downloaded OSM data...') start_time = time.time() # make sure we got data back from the server requests elements = [] # for response_json in response_jsons: elements.extend(mrt_response_json['elements']) if len(elements) < 1: raise EmptyOverpassResponse('There are no data elements in the response JSON objects') # create the graph as a MultiDiGraph and set the original CRS to default_crs G = nx.MultiDiGraph(name=name, crs=settings.default_crs) # extract nodes and paths from the downloaded osm data nodes = {} paths = {} # for osm_data in response_jsons: nodes_temp, paths_temp = parse_osm_nodes_paths(mrt_response_json) for key, value in nodes_temp.items(): nodes[key] = value for key, value in paths_temp.items(): paths[key] = value # add each osm node to the graph for node, data in nodes.items(): G.add_node(node, **data) # add each osm way (aka, path) to the graph G = ox.add_paths(G, paths, bidirectional=bidirectional) # retain only the largest connected component, if caller did not # set retain_all=True if not retain_all: G = get_largest_component(G) log('Created graph with {:,} nodes and {:,} edges in {:,.2f} seconds'.format(len(list(G.nodes())), len(list(G.edges())), time.time() - start_time)) # add length (great circle distance between nodes) attribute to each edge to # use as weight if len(G.edges) > 0: G = ox.add_edge_lengths(G) return G
def get_nearest_edges(G, X, Y, method=None, dist=0.0001): """ Return the graph edges nearest to a list of points. Pass in points as separate vectors of X and Y coordinates. The 'kdtree' method is by far the fastest with large data sets, but only finds approximate nearest edges if working in unprojected coordinates like lat-lng (it precisely finds the nearest edge if working in projected coordinates). The 'balltree' method is second fastest with large data sets, but it is precise if working in unprojected coordinates like lat-lng. As a rule of thumb, if you have a small graph just use method=None. If you have a large graph with lat-lng coordinates, use method='balltree'. If you have a large graph with projected coordinates, use method='kdtree'. Note that if you are working in units of lat-lng, the X vector corresponds to longitude and the Y vector corresponds to latitude. The method creates equally distanced points along the edges of the network. Then, these points are used in a kdTree or BallTree search to identify which is nearest.Note that this method will not give the exact perpendicular point along the edge, but the smaller the *dist* parameter, the closer the solution will be. Parameters ---------- G : networkx.MultiDiGraph input graph X : list-like The vector of longitudes or x's for which we will find the nearest edge in the graph. For projected graphs, use the projected coordinates, usually in meters. Y : list-like The vector of latitudes or y's for which we will find the nearest edge in the graph. For projected graphs, use the projected coordinates, usually in meters. method : string {None, 'kdtree', 'balltree'} Which method to use for finding nearest edge to each point. If None, we manually find each edge one at a time using get_nearest_edge. If 'kdtree' we use scipy.spatial.cKDTree for very fast euclidean search. Recommended for projected graphs. If 'balltree', we use sklearn.neighbors.BallTree for fast haversine search. Recommended for unprojected graphs. dist : float spacing length along edges. Units are the same as the geom; Degrees for unprojected geometries and meters for projected geometries. The smaller the value, the more points are created. Returns ------- ne : np.array array of nearest edges represented by u and v (the IDs of the nodes they link) and key """ if method is None: # calculate nearest edge one at a time for each (y, x) point ne = [get_nearest_edge(G, (y, x)) for x, y in tqdm(zip(X, Y))] elif method == "kdtree": # check if we were able to import scipy.spatial.cKDTree successfully if not cKDTree: raise ImportError( "The scipy package must be installed to use this optional feature." ) # transform graph into DataFrame edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) # transform edges into evenly spaced points edges["points"] = edges.apply( lambda x: utils_geo.redistribute_vertices(x.geometry, dist), axis=1) # develop edges data for each created points extended = (edges["points"].apply([pd.Series]).stack().reset_index( level=1, drop=True).join(edges).reset_index()) # Prepare btree arrays nbdata = np.array( list( zip(extended["Series"].apply(lambda x: x.x), extended["Series"].apply(lambda x: x.y)))) # build a k-d tree for euclidean nearest node search btree = cKDTree(data=nbdata, compact_nodes=True, balanced_tree=True) # query the tree for nearest node to each point points = np.array([X, Y]).T dist, idx = btree.query(points, k=1) # Returns ids of closest point eidx = extended.loc[idx, "index"] ne = edges.loc[eidx, ["u", "v", "key"]] elif method == "balltree": # check if we were able to import sklearn.neighbors.BallTree successfully if not BallTree: raise ImportError( "The scikit-learn package must be installed to use this optional feature." ) # transform graph into DataFrame edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) # transform edges into evenly spaced points edges["points"] = edges.apply( lambda x: utils_geo.redistribute_vertices(x.geometry, dist), axis=1) # develop edges data for each created points extended = (edges["points"].apply([pd.Series]).stack().reset_index( level=1, drop=True).join(edges).reset_index()) # haversine requires data in form of [lat, lng] and inputs/outputs in units of radians nodes = pd.DataFrame({ "x": extended["Series"].apply(lambda x: x.x), "y": extended["Series"].apply(lambda x: x.y), }) nodes_rad = np.deg2rad(nodes[["y", "x"]].values.astype(np.float)) points = np.array([Y, X]).T points_rad = np.deg2rad(points) # build a ball tree for haversine nearest node search tree = BallTree(nodes_rad, metric="haversine") # query the tree for nearest node to each point idx = tree.query(points_rad, k=5, return_distance=False) print(idx) eidx = extended.loc[idx[:, 0], "index"] ne = edges.loc[eidx, ["u", "v", "key"]] else: raise ValueError("You must pass a valid method name, or None.") utils.log(f"Found nearest edges to {len(X)} points") return np.array(ne)