def test_logging(): # test OSMnx's logger ox.log("test a fake default message") ox.log("test a fake debug", level=lg.DEBUG) ox.log("test a fake info", level=lg.INFO) ox.log("test a fake warning", level=lg.WARNING) ox.log("test a fake error", level=lg.ERROR) ox.citation() ox.ts(style="date") ox.ts(style="time")
def test_logging(): # test OSMnx's logger import logging as lg ox.log('test a fake debug', level=lg.DEBUG) ox.log('test a fake info', level=lg.INFO) ox.log('test a fake warning', level=lg.WARNING) ox.log('test a fake error', level=lg.ERROR)
def test_logging(): # test OSMnx's logger import logging as lg ox.log('test a fake debug', level=lg.DEBUG) ox.log('test a fake info', level=lg.INFO) ox.log('test a fake warning', level=lg.WARNING) ox.log('test a fake error', level=lg.ERROR) ox.citation()
def get_graph(row): global count_failed global count_success global count_already global count_small global failed_list try: # graph name = country + country iso + uc + uc id graph_name = '{}-{}-{}-{}'.format(row['CTR_MN_NM'], row['CTR_MN_ISO'], row['UC_NM_MN'], row['ID_HDC_G0']) graphml_folder = '{}/{}-{}'.format(output_graphml_path, row['CTR_MN_NM'], row['CTR_MN_ISO']) graphml_file = '{}-{}.graphml'.format(row['UC_NM_MN'], row['ID_HDC_G0']) filepath = os.path.join(graphml_folder, graphml_file) if not os.path.exists(filepath): # get graph print(ox.ts(), graph_name) G = ox.graph_from_polygon(polygon=row['geometry'].buffer(0), network_type=network_type, retain_all=retain_all, simplify=simplify, truncate_by_edge=truncate_by_edge) # don't save graphs if they have fewer than 3 nodes if len(G) > 2: ox.save_graphml(G, filepath=filepath) count_success = count_success + 1 else: count_small = count_small + 1 else: count_already = count_already + 1 except Exception as e: count_failed = count_failed + 1 failed_list.append(graph_name) ox.log('"{}" failed: {}'.format(graph_name, e), level=lg.ERROR) print(e, graph_name)
def request_url(url, pause_duration=pause_duration): # check if this request is already in the cache (if ox.settings.use_cache=True) cached_response_json = ox.downloader._retrieve_from_cache(url) if cached_response_json is not None: response_json = cached_response_json ox.log('Got node elevations from cache') else: try: # request the elevations from the API ox.log('Requesting node elevations from API: {}'.format(url)) time.sleep(pause_duration) # convert GET to POST to work around apache url length limits params = dict() endpoint, url_params = url.split('?') for chunk in url_params.split('&'): key, value = chunk.split('=') params[key] = value response = requests.post(endpoint, data=params, timeout=120) assert response.ok response_json = response.json() assert 'geonames' in response_json ox.downloader._save_to_cache(url, response_json, response.status_code) except Exception as e: ox.log(e) print(e) print('Error - server responded with {}: {}. {}'.format(response.status_code, response.reason, response.text)) return response_json
def get_Y_X_features_population_data(cities_selection=None, cities_skip=None): """ Returns the Y and X arrays for training/testing population downscaling estimates. It gathers either a selection of cities or all stored cities but a selected list to skip Y contains vectors with the correspondent population densities X contains vectors with normalized urban features X_columns columns referring to X values Numpy arrays are previously stored Parameters ---------- cities_selection : string list of cities to select cities_skip : string list of cities to skip (retrieve the rest) Returns ---------- np.array, np.array, np.array Y vector, X vector, X column names vector """ arr_X, arr_Y = [], [] # Get the complete training-testig dataset for Y_X_data_city in os.listdir("data/training"): # Only if it contains a valid extension if ".npz" not in Y_X_data_city: continue # Get city's name city_ref = Y_X_data_city.replace("_X_Y.npz", "") # Only retrieve data from cities_selection (if ever given) if (cities_selection is not None) and (city_ref not in cities_selection): log("Skipping city: " + str(city_ref)) continue # Skip cities data from from cities_skip (if ever given) if (cities_skip is not None) and (city_ref in cities_skip): log("Skipping city:", city_ref) continue log("Retrieving data for city: " + str(city_ref)) # Get stored data city_Y, city_X, city_X_cols = get_training_testing_data(city_ref) # Append values arr_Y.append(city_Y) arr_X.append(city_X) # Assumption: All generated testing-training data contain the same X columns return np.concatenate(arr_Y), np.concatenate(arr_X), city_X_cols
def request_url(url, pause_duration=pause_duration): # check if this request is already in the cache (if ox.settings.use_cache=True) cached_response_json = ox.downloader._retrieve_from_cache(url) if cached_response_json is not None: response_json = cached_response_json ox.log('Got node elevations from cache') else: try: # request the elevations from the API ox.log('Requesting node elevations from API: {}'.format(url)) time.sleep(pause_duration) response = requests.get(url) assert response.ok response_json = response.json() ox.downloader._save_to_cache(url, response_json, response.status_code) except Exception as e: ox.log(e) print('Error - server responded with {}: {}'.format( response.status_code, response.reason)) return response_json['results']
def generateGraphPlot(self, graph): osmnx.log('Generating graph based on elevation !') nc = osmnx.plot.get_node_colors_by_attr(graph, 'elevation', cmap='plasma') osmnx.plot_graph(graph, node_size=5, edge_color='#333333', bgcolor='k')
def get_training_testing_data(city_ref, df_insee_urban_features=None): """ Returns the Y and X arrays for training/testing population downscaling estimates. Y contains vectors with the correspondent population densities X contains vectors with normalized urban features X_columns columns referring to X values Numpy arrays are stored locally Parameters ---------- city_ref : string city reference name df_insee_urban_features : geopandas.GeoDataFrame grid-cells with population count data and calculated urban features Returns ---------- np.array, np.array, np.array Y vector, X vector, X column names vector """ # Population extract exists? if (os.path.exists(get_population_training_validating_filename(city_ref))): log("Urban population training+validation data/features exist for input city: " + city_ref) # Read from Numpy.Arrays data = np.load(get_population_training_validating_filename(city_ref)) # Project to UTM coordinates return data["Y"], data["X"], data["X_columns"] log("Calculating urban training+validation data/features for city: " + city_ref) start = time.time() # Select columns to normalize columns_to_normalise = [ col for col in df_insee_urban_features.columns if "num_" in col or "m2_" in col or "dispersion" in col or "accessibility" in col ] # Normalize selected columns df_insee_urban_features.loc[:, columns_to_normalise] = df_insee_urban_features.loc[:, columns_to_normalise].apply( lambda x: x / x.max(), axis=0) # By default, idINSPIRE for created squares (0 population count) is 0: Change for 'CRS' string: Coherent with squares aggregation procedure (string matching) df_insee_urban_features.loc[df_insee_urban_features.idINSPIRE == 0, "idINSPIRE"] = "CRS" # Aggregate 5x5 squares: Get all possible aggregations (step of 200 meters = length of individual square) aggregated_df_insee_urban_features = get_aggregated_squares( ox.project_gdf(df_insee_urban_features, to_crs="+init=epsg:3035"), step=200., conserve_squares_info=True) # X values: Vector <x1,x2, ... , xn> with normalized urban features X_values = [] # Y values: Vector <y1, y2, ... , ym> with normalized population densities. m=25 Y_values = [] # For each <Indices> combination, create a X and Y vector for idx in aggregated_df_insee_urban_features.indices: # Extract the urban features in the given 'indices' order (Fill to 0 for non-existent squares) square_info = df_insee_urban_features.reindex(idx).fillna(0) # Y input (Ground truth): Population densities population_densities = (square_info["pop_count"] / square_info["pop_count"].sum()).values if (all(pd.isna(population_densities)) ): # If sum of population count is 0, remove (NaN values) continue # X input: Normalized urban features urban_features = square_info[[ col for col in square_info.columns if col not in ['idINSPIRE', 'geometry', 'pop_count'] ]].values # Append X, Y X_values.append(urban_features) Y_values.append(population_densities) # Get the columns order referenced in each X vector X_values_columns = df_insee_urban_features[[ col for col in square_info.columns if col not in ['idINSPIRE', 'geometry', 'pop_count'] ]].columns X_values_columns = np.array(X_values_columns) # To Numpy Array X_values = np.array(X_values) Y_values = np.array(Y_values) # Save to file np.savez(get_population_training_validating_filename(city_ref), X=X_values, Y=Y_values, X_columns=X_values_columns) log("Done: urban training+validation data/features. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start))) return Y_values, X_values, X_values_columns
def compute_full_urban_features( city_ref, df_osm_built=None, df_osm_pois=None, df_insee=None, data_source=None, landusemix_args={ 'walkable_distance': 600, 'compute_activity_types_kde': True, 'weighted_kde': True, 'pois_weight': 9, 'log_weighted': True }, dispersion_args={ "radius_search": 750, "use_median": True, "K_nearest": 50 }, kwargs={"max_dispersion": 15}): """ Computes a set of urban features for each square where population count data exists Parameters ---------- city_ref : string city reference name df_osm_built : geopandas.GeoDataFrame input buildings df_osm_pois : geopandas.GeoDataFrame input points of interest df_insee : geopandas.GeoDataFrame grid-cells with population count where urban features will be calculated data_source : str define the type of population data for its retrieval in case it was stored kwargs : dict keyword arguments to guide the process Returns ---------- geopandas.GeoDataFrame geometry with updated urban features """ # Population extract exists? if (os.path.exists( get_population_urban_features_filename(city_ref, data_source))): log("Urban features from population gridded data exist for input city: " + city_ref) # Read from GeoJSON (default projection coordinates) df_insee_urban_features_4326 = gpd.read_file( get_population_urban_features_filename(city_ref, data_source)) # Project to UTM coordinates return ox.project_gdf(df_insee_urban_features_4326) # Required arguments assert (not df_osm_built is None) assert (not df_osm_pois is None) assert (not df_insee is None) # Get population count data with filled empty squares (null population) df_insee_urban_features = get_population_df_filled_empty_squares(df_insee) # Set crs crs_proj = df_insee.crs df_insee_urban_features.crs = crs_proj ################## ### Urban features ################## # Compute the urban features for each square log("Calculating urban features") start = time.time() # Conserve building geometries df_osm_built['geom_building'] = df_osm_built['geometry'] # Spatial join: grid-cell i - building j for all intersections df_insee_urban_features = gpd.sjoin(df_insee_urban_features, df_osm_built, op='intersects', how='left') # When a grid-cell i does not intersect any building: NaN values null_idx = df_insee_urban_features.loc[ df_insee_urban_features['geom_building'].isnull()].index # Replace NaN for urban features calculation min_polygon = Polygon([(0, 0), (0, np.finfo(float).eps), (np.finfo(float).eps, np.finfo(float).eps)]) df_insee_urban_features.loc[ null_idx, 'geom_building'] = df_insee_urban_features.loc[ null_idx, 'geom_building'].apply(lambda x: min_polygon) df_insee_urban_features.loc[ null_idx, 'landuses_m2'] = len(null_idx) * [{ 'residential': 0, 'activity': 0 }] df_insee_urban_features.loc[null_idx, 'building_levels'] = len(null_idx) * [0] ### Pre-calculation of urban features # Apply percentage of building presence within square: 1 if fully contained, 0.5 if half the building contained, ... df_insee_urban_features['building_ratio'] = df_insee_urban_features.apply( lambda x: x.geom_building.intersection(x.geometry ).area / x.geom_building.area, axis=1) df_insee_urban_features[ 'm2_total_residential'] = df_insee_urban_features.apply( lambda x: x.building_ratio * x.landuses_m2['residential'], axis=1) df_insee_urban_features[ 'm2_total_activity'] = df_insee_urban_features.apply( lambda x: x.building_ratio * x.landuses_m2['activity'], axis=1) df_insee_urban_features['m2_footprint_residential'] = 0 df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['residential']), 'm2_footprint_residential'] = df_insee_urban_features.loc[ df_insee_urban_features.classification.isin([ 'residential' ])].apply(lambda x: x.building_ratio * x.geom_building.area, axis=1) df_insee_urban_features['m2_footprint_activity'] = 0 df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['activity']), 'm2_footprint_activity'] = df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['activity'])].apply( lambda x: x.building_ratio * x.geom_building.area, axis=1) df_insee_urban_features['m2_footprint_mixed'] = 0 df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['mixed']), 'm2_footprint_mixed'] = df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['mixed'])].apply( lambda x: x.building_ratio * x.geom_building.area, axis=1) df_insee_urban_features['num_built_activity'] = 0 df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['activity']), 'num_built_activity'] = df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['activity' ])].building_ratio df_insee_urban_features['num_built_residential'] = 0 df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['residential']), 'num_built_residential'] = df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['residential' ])].building_ratio df_insee_urban_features['num_built_mixed'] = 0 df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['mixed']), 'num_built_mixed'] = df_insee_urban_features.loc[ df_insee_urban_features.classification.isin(['mixed' ])].building_ratio df_insee_urban_features['num_levels'] = df_insee_urban_features.apply( lambda x: x.building_ratio * x.building_levels, axis=1) df_insee_urban_features['num_buildings'] = df_insee_urban_features[ 'building_ratio'] df_insee_urban_features['built_up_m2'] = df_insee_urban_features.apply( lambda x: x.geom_building.area * x.building_ratio, axis=1) ### Urban features aggregation functions urban_features_aggregation = {} urban_features_aggregation['idINSPIRE'] = lambda x: x.head(1) urban_features_aggregation['pop_count'] = lambda x: x.head(1) urban_features_aggregation['geometry'] = lambda x: x.head(1) urban_features_aggregation['m2_total_residential'] = 'sum' urban_features_aggregation['m2_total_activity'] = 'sum' urban_features_aggregation['m2_footprint_residential'] = 'sum' urban_features_aggregation['m2_footprint_activity'] = 'sum' urban_features_aggregation['m2_footprint_mixed'] = 'sum' urban_features_aggregation['num_built_activity'] = 'sum' urban_features_aggregation['num_built_residential'] = 'sum' urban_features_aggregation['num_built_mixed'] = 'sum' urban_features_aggregation['num_levels'] = 'sum' urban_features_aggregation['num_buildings'] = 'sum' urban_features_aggregation['built_up_m2'] = 'sum' # Apply aggregate functions df_insee_urban_features = df_insee_urban_features.groupby( df_insee_urban_features.index).agg(urban_features_aggregation) # Calculate built up relation (relative to the area of the grid-cell geometry) df_insee_urban_features[ 'built_up_relation'] = df_insee_urban_features.apply( lambda x: x.built_up_m2 / x.geometry.area, axis=1) df_insee_urban_features.drop('built_up_m2', axis=1, inplace=True) # To geopandas.GeoDataFrame and set crs df_insee_urban_features = gpd.GeoDataFrame(df_insee_urban_features) df_insee_urban_features.crs = crs_proj # POIs df_osm_pois_selection = df_osm_pois[df_osm_pois.classification.isin( ["activity", "mixed"])] gpd_intersection_pois = gpd.sjoin(df_insee_urban_features, df_osm_pois_selection, op='intersects', how='left') # Number of activity/mixed POIs df_insee_urban_features[ 'num_activity_pois'] = gpd_intersection_pois.groupby( gpd_intersection_pois.index).agg({'osm_id': 'count'}) ################## ### Sprawling indices ################## df_insee_urban_features[ 'geometry_squares'] = df_insee_urban_features.geometry df_insee_urban_features[ 'geometry'] = df_insee_urban_features.geometry.centroid ''' compute_grid_accessibility(df_insee_urban_features, graph, df_osm_built, df_osm_pois) ''' # Compute land uses mix + densities estimation compute_grid_landusemix(df_insee_urban_features, df_osm_built, df_osm_pois, landusemix_args) # Dispersion indices compute_grid_dispersion(df_insee_urban_features, df_osm_built, dispersion_args) if (kwargs.get("max_dispersion")): # Set max bounds for dispersion values df_insee_urban_features.loc[ df_insee_urban_features.dispersion > kwargs.get("max_dispersion"), "dispersion"] = kwargs.get("max_dispersion") # Set back original geometries df_insee_urban_features[ 'geometry'] = df_insee_urban_features.geometry_squares df_insee_urban_features.drop('geometry_squares', axis=1, inplace=True) # Fill NaN sprawl indices with 0 df_insee_urban_features.fillna(0, inplace=True) # Save to GeoJSON file (no projection conserved, then use EPSG 4326) ox.project_gdf(df_insee_urban_features, to_latlong=True).to_file( get_population_urban_features_filename(city_ref, data_source), driver='GeoJSON') elapsed_time = int(time.time() - start) log("Done: Urban features calculation. Elapsed time (H:M:S): " + '{:02d}:{:02d}:{:02d}'.format(elapsed_time // 3600, ( elapsed_time % 3600 // 60), elapsed_time % 60)) return df_insee_urban_features
def process_spatial_indices( city_ref=None, region_args={ "polygon": None, "place": None, "which_result": 1, "point": None, "address": None, "distance": None, "north": None, "south": None, "east": None, "west": None, }, grid_step=100, process_osm_args={ "retrieve_graph": True, "default_height": 3, "meters_per_level": 3, "associate_landuses_m2": True, "minimum_m2_building_area": 9, "date": None, }, dispersion_args={ "radius_search": 750, "use_median": False, "K_nearest": 50, }, landusemix_args={ "walkable_distance": 600, "compute_activity_types_kde": True, "weighted_kde": True, "pois_weight": 9, "log_weighted": True, }, accessibility_args={ "fixed_distance": True, "fixed_activities": False, "max_edge_length": 200, "max_node_distance": 250, "fixed_distance_max_travel_distance": 2000, "fixed_distance_max_num_activities": 250, "fixed_activities_min_number": 20, }, indices_computation={ "dispersion": True, "landusemix": True, "accessibility": True, }, ): """ Process sprawling indices for an input region of interest 1) OSM data is retrieved and processed. If the city name has already been processed, locally stored data will be loaded 2) A regular grid is created where indices will be calculated 3) Sprawling indices are calculated and returned Parameters ---------- city_ref : str Name of input city / region grid_step : int step to sample the regular grid in meters region_args : dict contains the information to retrieve the region of interest as the following: polygon : shapely Polygon or MultiPolygon geographic shape to fetch the land use footprints within place : string or dict query string or structured query dict to geocode/download which_result : int result number to retrieve from geocode/download when using query string point : tuple the (lat, lon) central point around which to construct the region address : string the address to geocode and use as the central point around which to construct the region distance : int retain only those nodes within this many meters of the center of the region north : float northern latitude of bounding box south : float southern latitude of bounding box east : float eastern longitude of bounding box west : float western longitude of bounding box process_osm_args : dict additional arguments to drive the OSM data extraction process: retrieve_graph : boolean that determines if the street network for input city has to be retrieved and stored default_height : float height of buildings under missing data meters_per_level : float buildings number of levels assumed under missing data associate_landuses_m2 : boolean compute the total square meter for each land use minimum_m2_building_area : float minimum area to be considered a building (otherwise filtered) date : datetime.datetime query the database at a certain timestamp dispersion_args : dict arguments to drive the dispersion indices calculation radius_search: int circle radius to consider the dispersion calculation at a local point use_median : bool denotes whether the median or mean should be used to calculate the indices K_nearest : int number of neighboring buildings to consider in evaluation landusemix_args : dict arguments to drive the land use mix indices calculation walkable_distance : int the bandwidth assumption for Kernel Density Estimation calculations (meters) compute_activity_types_kde : bool determines if the densities for each activity type should be computed weighted_kde : bool use Weighted Kernel Density Estimation or classic version pois_weight : int Points of interest weight equivalence with buildings (squared meter) log_weighted : bool apply natural logarithmic function to surface weights accessibility_args : dict arguments to drive the accessibility indices calculation fixed_distance : bool denotes the cumulative opportunities access to activity land uses given a fixed maximum distance to travel fixed_activities : bool represents the distance needed to travel in order to reach a certain number of activity land uses max_edge_length: int maximum length, in meters, to tolerate an edge in a graph (otherwise, divide edge) max_node_distance: int maximum distance tolerated from input point to closest graph node in order to calculate accessibility values fixed_distance_max_travel_distance: int (fixed distance) maximum distance tolerated (cut&branch) when searching for the activities fixed_distance_max_num_activities: int (fixed distance) cut iteration if the number of activities exceeds a threshold fixed_activities_min_number: int (fixed activities) minimum number of activities required indices_computation : dict determines what sprawling indices should be computed Returns ---------- gpd.GeoDataFrame returns the regular grid with the indicated sprawling indices """ try: # Process OSM data df_osm_built, df_osm_building_parts, df_osm_pois = get_processed_osm_data( city_ref=city_ref, region_args=region_args, kwargs=process_osm_args) # Get route graph G = get_route_graph(city_ref) if not (indices_computation.get("accessibility") or indices_computation.get("landusemix") or indices_computation.get("dispersion")): log("Not computing any spatial indices") return None # Get indices grid df_indices = get_indices_grid(df_osm_built, df_osm_building_parts, df_osm_pois, grid_step) # Compute sprawling indices if indices_computation.get("accessibility"): compute_grid_accessibility(df_indices, G, df_osm_built, df_osm_pois, accessibility_args) if indices_computation.get("landusemix"): compute_grid_landusemix(df_indices, df_osm_built, df_osm_pois, landusemix_args) if indices_computation.get("dispersion"): compute_grid_dispersion(df_indices, df_osm_built, dispersion_args) return df_indices except Exception as e: log("Could not compute the spatial indices. An exception occurred: " + str(e)) return None
def get_extract_population_data( city_ref, data_source, pop_shapefile=None, pop_data_file=None, to_crs={"init": "epsg:4326"}, polygons_gdf=None, ): """Get data population extract of desired data source for input city, calculating the convex hull of input buildings geodataframe The population data frame is projected to the desired coordinate reference system Stores the extracted shapefile Returns the stored population data for input 'data source' and 'city reference' if it was previously stored Parameters ---------- city_ref : string name of input city data_source : string desired population data source pop_shapefile : string path of population count shapefile pop_data_file : string path of population data additional file (required for INSEE format) to_crs : dict desired coordinate reference system polygons_gdf : geopandas.GeoDataFrame polygons (e.g. buildings) for input region of interest which will determine the shape to extract Returns ---------- geopandas.GeoDataFrame returns the extracted population data """ # Input data source type given? assert data_source in DATA_SOURCES # Population extract exists? if os.path.exists(get_population_extract_filename(city_ref, data_source)): log("Population extract exists for input city: " + city_ref) return gpd.read_file( get_population_extract_filename(city_ref, data_source)) # Input shape given? assert not (np.all(polygons_gdf is None)) # Input population shapefile given? assert pop_shapefile is not None # All input files given? assert not ((data_source == "insee") and (pop_data_file is None)) # Get buildings convex hull polygon = GeometryCollection( polygons_gdf.geometry.values.tolist()).convex_hull # Convert to geo-dataframe with defined CRS poly_gdf = gpd.GeoDataFrame([polygon], columns=["geometry"], crs=polygons_gdf.crs) # Compute extract df_pop = get_population_df(pop_shapefile, pop_data_file, data_source, to_crs, poly_gdf) # Save to shapefile df_pop.to_file( get_population_extract_filename(city_ref, data_source), driver="ESRI Shapefile", ) return df_pop
def compute_grid_accessibility( df_indices, G, df_osm_built, df_osm_pois, kw_args={ "fixed_distance": True, "fixed_activities": False, "max_edge_length": 200, "max_node_distance": 250, "fixed_distance_max_travel_distance": 2000, "fixed_distance_max_num_activities": 250, "fixed_activities_min_number": 20, "fixed_activities_max_travel_distance": 5000 }, ): """ Calculate accessibility values at point_ref Parameters ---------- df_indices : geopandas.GeoDataFrame data frame containing the (x,y) reference points to calculate indices G : networkx multidigraph input graph to calculate accessibility df_osm_built : geopandas.GeoDataFrame data frame containing the building's geometries and corresponding land uses df_osm_pois : geopandas.GeoDataFrame data frame containing the points' of interest geometries kw_args: dict additional keyword arguments for the indices calculation fixed_distance : bool denotes the cumulative opportunities access to activity land uses given a fixed maximum distance to travel fixed_activities : bool represents the distance needed to travel in order to reach a certain number of activity land uses max_edge_length: int maximum length, in meters, to tolerate an edge in a graph (otherwise, divide edge) max_node_distance: int maximum distance tolerated from input point to closest graph node in order to calculate accessibility values fixed_distance_max_travel_distance: int (fixed distance) maximum distance tolerated (cut&branch) when searching for the activities fixed_distance_max_num_activities: int (fixed distance) cut iteration if the number of activities exceeds a threshold fixed_activities_min_number: int (fixed activities) minimum number of activities required fixed_activities_max_travel_distance : int (fixed activities) maximum distance tolerated (cut&branch) when searching for the activities Returns ---------- int number of activities found within a radius distance using the street network """ log("Calculating accessibility indices") start = time.time() # Assert that only one option is set assert kw_args["fixed_distance"] ^ kw_args["fixed_activities"] # Arguments to pandas.Series kw_arguments = pd.Series(kw_args) ############## # Prepare input data for indices calculation in parallel call ############## # Temporary folder to pickle data if not os.path.exists("temp"): os.makedirs("temp") # Number of CPU cores on your system num_cores = cpu_count() # Prepare input data: As many chunks of data as cores prepare_data( G, df_osm_built, df_osm_pois, df_indices, num_cores, kw_arguments ) # This command could have multiple commands separated by a new line \n parallel_code = os.path.realpath(__file__).replace(".py", "_parallel.py") command_call = ( "python " + parallel_code + " temp/graph.gpickle temp/points_NUM_CHUNK.pkl temp/arguments.pkl" ) ############## # Verify amount of memory used per subprocess ############## p = subprocess.Popen( command_call.replace("NUM_CHUNK", str(0)) + " memory_test", stdout=subprocess.PIPE, shell=True, ) output, err = p.communicate() p.wait() # Max number of subprocess allocations given its memory consumption numbers = [ numb for numb in str(output) if numb in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] ] max_processes = int("".join(numbers)) log( "Maximum number of processes to allocate (considering memory availability): " + str(max_processes) ) log("Number of available cores: " + str(num_cores)) ############## # Set chunks to run in parallel: If more core than allowed processes, divide chunks to run at most X processes ############## if ( num_cores > max_processes ): # Run parallel-chunks at a splitted pace, to avoid memory swap chunks_run = np.array_split(list(range(num_cores)), max_processes) else: # Run all chunks in parallel chunks_run = [list(range(num_cores))] # Parallel implementation for chunk in chunks_run: # Run full chunk Ps_i = [] for i in chunk: # Run each index p = subprocess.Popen( command_call.replace("NUM_CHUNK", str(i)), stdout=subprocess.PIPE, shell=True, ) Ps_i.append(p) # Get the output Output_errs = [p.communicate() for p in Ps_i] # This makes the wait possible Ps_status = [p.wait() for p in Ps_i] # Output for chunk for output, err in Output_errs: log(str(output)) # Associate data by getting the chunk results concatenated index_column = "accessibility" df_indices[index_column] = pd.concat( [ pd.read_pickle( "temp/indices_NUM_CHUNK.pkl".replace("NUM_CHUNK", str(i)) ) for i in range(num_cores) ], ignore_index=True, ).accessibility # Delete temporary folder shutil.rmtree("temp") log( "Done: Accessibility indices. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start)) )
mpl.use('Agg') #use agg backend so you don't need a display on travis-ci import os, shutil if os.path.exists('.temp'): shutil.rmtree('.temp') import osmnx as ox, logging as lg ox.config(log_console=True, log_file=True, use_cache=True, data_folder='.temp/data', logs_folder='.temp/logs', imgs_folder='.temp/imgs', cache_folder='.temp/cache') ox.log('test debug', level=lg.DEBUG) ox.log('test info', level=lg.INFO) ox.log('test warning', level=lg.WARNING) ox.log('test error', level=lg.ERROR) def test_imports(): import json, math, sys, os, io, ast, unicodedata, hashlib, re, random, time, warnings, datetime as dt, logging as lg from collections import OrderedDict, Counter from itertools import groupby, chain from dateutil import parser as date_parser import requests, numpy as np, pandas as pd, geopandas as gpd, networkx as nx, matplotlib.pyplot as plt, matplotlib.cm as cm from matplotlib.collections import LineCollection from shapely.geometry import Point, LineString, Polygon, MultiPolygon from shapely import wkt
def compute_full_urban_features(city_ref, df_osm_built=None, df_osm_pois=None, graph=None, df_insee=None, data_source=None, kwargs={"max_dispersion": 15}): """ Computes a set of urban features for each square where population count data exists Parameters ---------- city_ref : string city reference name df_osm_built : geopandas.GeoDataFrame input buildings df_osm_pois : geopandas.GeoDataFrame input points of interest graph : x_square : geopandas.GeoSeries geometry square where urban features will be calculated Returns ---------- geopandas.GeoSeries geometry with updated urban features """ # Population extract exists? if (os.path.exists( get_population_urban_features_filename(city_ref, data_source))): log("Urban features from population gridded data exist for input city: " + city_ref) # Read from GeoJSON (default projection coordinates) df_insee_urban_features_4326 = gpd.read_file( get_population_urban_features_filename(city_ref, data_source)) # Project to UTM coordinates return ox.project_gdf(df_insee_urban_features_4326) # Required arguments assert (not df_osm_built is None) assert (not df_osm_pois is None) assert (not graph is None) assert (not df_insee is None) # Copy data frame in order to modify it #df_insee_urban_features = df_insee.copy() # Data frame + creation of empty squares with 0 count population df_insee_urban_features = get_population_df_filled_empty_squares(df_insee) ################## ### Sprawling indices ################## df_insee_urban_features[ 'geometry_squares'] = df_insee_urban_features.geometry df_insee_urban_features[ 'geometry'] = df_insee_urban_features.geometry.centroid ''' compute_grid_accessibility(df_insee_urban_features, graph, df_osm_built, df_osm_pois) ''' # Compute land uses mix + densities estimation compute_grid_landusemix(df_insee_urban_features, df_osm_built, df_osm_pois) # Dispersion indices compute_grid_dispersion(df_insee_urban_features, df_osm_built) if (kwargs.get("max_dispersion")): # Set max bounds for dispersion values df_insee_urban_features.loc[ df_insee_urban_features.dispersion > kwargs.get("max_dispersion"), "dispersion"] = kwargs.get("max_dispersion") # Set back original geometries df_insee_urban_features[ 'geometry'] = df_insee_urban_features.geometry_squares df_insee_urban_features.drop('geometry_squares', axis=1, inplace=True) ################## ### Additional urban features ################## # Compute the urban features for each square log("Calculating urban features") start = time.time() df_insee_urban_features = df_insee_urban_features.apply( lambda x: compute_urban_features(df_osm_built, df_osm_pois, x), axis=1) # FillNA, set CRS df_insee_urban_features.fillna(0, inplace=True) df_insee_urban_features.crs = df_insee.crs # Save to GeoJSON file (no projection conserved, then use EPSG 4326) ox.project_gdf(df_insee_urban_features, to_latlong=True).to_file( get_population_urban_features_filename(city_ref, data_source), driver='GeoJSON') log("Done: Urban features calculation. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start))) return df_insee_urban_features
for label, row in ucs_to_get.iterrows(): graph_name = '{}-{}-{}-{}'.format(row['CTR_MN_NM'], row['CTR_MN_ISO'], row['UC_NM_MN'], row['ID_HDC_G0']) print(ox.ts(), graph_name) try: G = ox.graph_from_polygon(polygon=row['geometry'].buffer(0), network_type=network_type, retain_all=retain_all, simplify=simplify, truncate_by_edge=truncate_by_edge) except ox._errors.CacheOnlyModeInterrupt: # this happens every time because ox.settings.cache_only_mode = True pass except Exception as e: ox.log('"{}" failed: {}'.format(graph_name, e), level=lg.ERROR) print(e, graph_name) # In[ ]: end_time = time.time() - start_time print( ox.ts(), 'Finished caching raw data for {:,.0f} graphs in {:,.1f} seconds'.format( len(ucs_to_get), end_time)) # In[ ]:
def get_processed_osm_data( city_ref=None, region_args={ "polygon": None, "place": None, "which_result": 1, "point": None, "address": None, "distance": None, "north": None, "south": None, "east": None, "west": None }, kwargs={ "retrieve_graph": True, "default_height": 3, "meters_per_level": 3, "associate_landuses_m2": True, "mixed_building_first_floor_activity": True, "minimum_m2_building_area": 9, "date": None }): """ Retrieves buildings, building parts, and Points of Interest associated with a residential/activity land use from OpenStreetMap data for input city If a name for input city is given, the data will be loaded (if it was previously stored) If no stored files exist, it will query and process the data and store it under the city name Queries data for input region (polygon, place, point/address and distance around, or bounding box coordinates) Additional arguments will drive the overall process Parameters ---------- city_ref : str Name of input city / region region_args : dict contains the information to retrieve the region of interest as the following: polygon : shapely Polygon or MultiPolygon geographic shape to fetch the landuse footprints within place : string or dict query string or structured query dict to geocode/download which_result : int result number to retrieve from geocode/download when using query string point : tuple the (lat, lon) central point around which to construct the graph address : string the address to geocode and use as the central point around which to construct the graph distance : int retain only those nodes within this many meters of the center of the graph north : float northern latitude of bounding box south : float southern latitude of bounding box east : float eastern longitude of bounding box west : float western longitude of bounding box kwargs : dict additional arguments to drive the process: retrieve_graph : boolean that determines if the street network for input city has to be retrieved and stored default_height : float height of buildings under missing data meters_per_level : float buildings number of levels assumed under missing data associate_landuses_m2 : boolean compute the total square meter for each land use mixed_building_first_floor_activity : Boolean if True: Associates building's first floor to activity uses and the rest to residential uses if False: Associates half of the building's area to each land use (Activity and Residential) minimum_m2_building_area : float minimum area to be considered a building (otherwise filtered) date : datetime.datetime query the database at a certain time-stamp Returns ---------- [ gpd.GeoDataFrame, gpd.GeoDataFrame, gpd.GeoDataFrame ] returns the output geo dataframe containing all buildings, building parts, and points associated to a residential or activity land usage """ log("OSM data requested for city: " + str(city_ref)) start_time = time.time() if (city_ref): geo_poly_file, geo_poly_parts_file, geo_point_file = get_dataframes_filenames( city_ref) ########################## ### Stored file ? ########################## if (os.path.isfile(geo_poly_file)): # File exists log("Found stored files for city " + city_ref) # Load local GeoDataFrames return load_geodataframe(geo_poly_file), load_geodataframe( geo_poly_parts_file), load_geodataframe(geo_point_file) # Get keyword arguments for input region of interest polygon, place, which_result, point, address, distance, north, south, east, west = region_args.get( "polygon"), region_args.get("place"), region_args.get( "which_result"), region_args.get("point"), region_args.get( "address"), region_args.get("distance"), region_args.get( "north"), region_args.get("south"), region_args.get( "east"), region_args.get("west") ### Valid input? if not (any([ not (polygon is None), place, point, address, north, south, east, west ])): log("Error: Must provide at least one type of input") return None, None, None if (kwargs.get("date")): # Non-null date date_ = kwargs.get("date").strftime("%Y-%m-%dT%H:%M:%SZ") log("Requesting OSM database at time-stamp: " + date_) # e.g.: [date:"2004-05-06T00:00:00Z"] date_query = '[date:"' + date_ + '"]' else: date_query = "" ########################## ### Overpass query: Buildings ########################## # Query and update bounding box / polygon df_osm_built, polygon, north, south, east, west = create_buildings_gdf_from_input( date=date_query, polygon=polygon, place=place, which_result=which_result, point=point, address=address, distance=distance, north=north, south=south, east=east, west=west) df_osm_built["osm_id"] = df_osm_built.index df_osm_built.reset_index(drop=True, inplace=True) df_osm_built.gdf_name = str( city_ref) + '_buildings' if not city_ref is None else 'buildings' ########################## ### Overpass query: Land use polygons. Aid to perform buildings land use inference ########################## df_osm_lu = create_landuse_gdf(date=date_query, polygon=polygon, north=north, south=south, east=east, west=west) df_osm_lu["osm_id"] = df_osm_lu.index # Drop useless columns columns_of_interest = ["osm_id", "geometry", "landuse"] df_osm_lu.drop([ col for col in list(df_osm_lu.columns) if not col in columns_of_interest ], axis=1, inplace=True) df_osm_lu.reset_index(drop=True, inplace=True) df_osm_lu.gdf_name = str( city_ref) + '_landuse' if not city_ref is None else 'landuse' ########################## ### Overpass query: POIs ########################## df_osm_pois = create_pois_gdf(date=date_query, polygon=polygon, north=north, south=south, east=east, west=west) df_osm_pois["osm_id"] = df_osm_pois.index df_osm_pois.reset_index(drop=True, inplace=True) df_osm_pois.gdf_name = str( city_ref) + '_points' if not city_ref is None else 'points' ########## ### Overpass query: Building parts. Allow to calculate the real amount of M^2 for each building ########## df_osm_building_parts = create_building_parts_gdf(date=date_query, polygon=polygon, north=north, south=south, east=east, west=west) # Filter: 1) rows not needed (roof, etc) and 2) building that already exists in `buildings` extract if ("building" in df_osm_building_parts.columns): df_osm_building_parts = df_osm_building_parts[ (~df_osm_building_parts["building:part"].isin( building_parts_to_filter)) & (~df_osm_building_parts["building:part"].isnull()) & (df_osm_building_parts["building"].isnull())] else: df_osm_building_parts = df_osm_building_parts[ (~df_osm_building_parts["building:part"].isin( building_parts_to_filter)) & (~df_osm_building_parts["building:part"].isnull())] df_osm_building_parts["osm_id"] = df_osm_building_parts.index df_osm_building_parts.reset_index(drop=True, inplace=True) df_osm_building_parts.gdf_name = str( city_ref ) + '_building_parts' if not city_ref is None else 'building_parts' log("Done: OSM data requests. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) #################################################### ### Sanity check of height tags #################################################### start_time = time.time() sanity_check_height_tags(df_osm_built) sanity_check_height_tags(df_osm_building_parts) def remove_nan_dict(x): # Remove entries with NaN values return {k: v for k, v in x.items() if pd.notnull(v)} df_osm_built['height_tags'] = df_osm_built[[ c for c in height_tags if c in df_osm_built.columns ]].apply(lambda x: remove_nan_dict(x.to_dict()), axis=1) df_osm_building_parts['height_tags'] = df_osm_building_parts[[ c for c in height_tags if c in df_osm_building_parts.columns ]].apply(lambda x: remove_nan_dict(x.to_dict()), axis=1) ########### ### Remove columns which do not provide valuable information ########### columns_of_interest = columns_osm_tag + [ "osm_id", "geometry", "height_tags" ] df_osm_built.drop([ col for col in list(df_osm_built.columns) if not col in columns_of_interest ], axis=1, inplace=True) df_osm_building_parts.drop([ col for col in list(df_osm_building_parts.columns) if not col in columns_of_interest ], axis=1, inplace=True) columns_of_interest = columns_osm_tag + ["osm_id", "geometry"] df_osm_pois.drop([ col for col in list(df_osm_pois.columns) if not col in columns_of_interest ], axis=1, inplace=True) log('Done: Height tags sanity check and unnecessary columns have been dropped. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) ########### ### Classification ########### start_time = time.time() df_osm_built['classification'], df_osm_built['key_value'] = list( zip(*df_osm_built.apply(classify_tag, axis=1))) df_osm_pois['classification'], df_osm_pois['key_value'] = list( zip(*df_osm_pois.apply(classify_tag, axis=1))) df_osm_building_parts['classification'], df_osm_building_parts[ 'key_value'] = list( zip(*df_osm_building_parts.apply(classify_tag, axis=1))) # Remove unnecessary buildings df_osm_built.drop(df_osm_built[df_osm_built.classification.isnull()].index, inplace=True) df_osm_built.reset_index(inplace=True, drop=True) # Remove unnecessary POIs df_osm_pois.drop( df_osm_pois[df_osm_pois.classification.isin(["infer", "other"]) | df_osm_pois.classification.isnull()].index, inplace=True) df_osm_pois.reset_index(inplace=True, drop=True) # Building parts will acquire its containing building land use if it is not available df_osm_building_parts.loc[ df_osm_building_parts.classification.isin(["infer", "other"]), "classification"] = None log('Done: OSM tags classification. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) ########### ### Remove already used tags ########### start_time = time.time() df_osm_built.drop( [c for c in columns_osm_tag if c in df_osm_built.columns], axis=1, inplace=True) df_osm_pois.drop([c for c in columns_osm_tag if c in df_osm_pois.columns], axis=1, inplace=True) df_osm_building_parts.drop( [c for c in columns_osm_tag if c in df_osm_building_parts.columns], axis=1, inplace=True) ########### ### Project, drop small buildings and reset indices ########### ### Project to UTM coordinates within the same zone df_osm_built = ox.project_gdf(df_osm_built) df_osm_lu = ox.project_gdf(df_osm_lu, to_crs=df_osm_built.crs) df_osm_pois = ox.project_gdf(df_osm_pois, to_crs=df_osm_built.crs) df_osm_building_parts = ox.project_gdf(df_osm_building_parts, to_crs=df_osm_built.crs) # Drop buildings with an area lower than a threshold df_osm_built.drop(df_osm_built[ df_osm_built.geometry.area < kwargs["minimum_m2_building_area"]].index, inplace=True) log('Done: Geometries re-projection. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) #################################################### ### Infer buildings land use (under uncertainty) #################################################### start_time = time.time() compute_landuse_inference(df_osm_built, df_osm_lu) # Free space del df_osm_lu assert (len(df_osm_built[df_osm_built.key_value == { "inferred": "other" }]) == 0) assert (len(df_osm_built[df_osm_built.classification.isnull()]) == 0) assert (len(df_osm_pois[df_osm_pois.classification.isnull()]) == 0) log('Done: Land use deduction. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) #################################################### ### Associate for each building, its containing building parts and Points of interest #################################################### start_time = time.time() associate_structures(df_osm_built, df_osm_building_parts, operation='contains', column='containing_parts') associate_structures(df_osm_built, df_osm_pois, operation='intersects', column='containing_poi') # Classify activity types df_osm_built['activity_category'] = df_osm_built.apply( lambda x: classify_activity_category(x.key_value), axis=1) df_osm_pois['activity_category'] = df_osm_pois.apply( lambda x: classify_activity_category(x.key_value), axis=1) df_osm_building_parts['activity_category'] = df_osm_building_parts.apply( lambda x: classify_activity_category(x.key_value), axis=1) log('Done: Building parts association and activity categorization. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) #################################################### ### Associate effective number of levels, and measure the surface dedicated to each land use per building #################################################### if (kwargs["associate_landuses_m2"]): start_time = time.time() default_height = kwargs["default_height"] meters_per_level = kwargs["meters_per_level"] mixed_building_first_floor_activity = kwargs[ "mixed_building_first_floor_activity"] compute_landuses_m2(df_osm_built, df_osm_building_parts, df_osm_pois, default_height=default_height, meters_per_level=meters_per_level, mixed_building_first_floor_activity= mixed_building_first_floor_activity) # Set the composed classification given, for each building, its containing Points of Interest and building parts classification df_osm_built.loc[df_osm_built.apply(lambda x: x.landuses_m2[ "activity"] > 0 and x.landuses_m2["residential"] > 0, axis=1), "classification"] = "mixed" log('Done: Land uses surface association. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) df_osm_built.loc[ df_osm_built.activity_category.apply(lambda x: len(x) == 0), "activity_category"] = np.nan df_osm_pois.loc[df_osm_pois.activity_category.apply(lambda x: len(x) == 0), "activity_category"] = np.nan df_osm_building_parts.loc[ df_osm_building_parts.activity_category.apply(lambda x: len(x) == 0), "activity_category"] = np.nan ########################## ### Overpass query: Street network graph ########################## if (kwargs["retrieve_graph"]): # Save graph for input city shape start_time = time.time() get_route_graph(city_ref, date=date_query, polygon=polygon, north=north, south=south, east=east, west=west, force_crs=df_osm_built.crs) log('Done: Street network graph retrieval. Elapsed time (H:M:S): ' + time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) ########################## ### Store file ? ########################## if (city_ref): # File exists # Save GeoDataFrames store_geodataframe(df_osm_built, geo_poly_file) store_geodataframe(df_osm_building_parts, geo_poly_parts_file) store_geodataframe(df_osm_pois, geo_point_file) log("Stored OSM data files for city: " + city_ref) return df_osm_built, df_osm_building_parts, df_osm_pois
def prepare_testing_data(city_ref, pop_features=None): """Return a X array for population downscaling inference, that contain normalized urban features X contains vectors with normalized urban features X_columns columns referring to X values Numpy arrays are stored locally Parameters ---------- city_ref : string city reference name pop_features : geopandas.GeoDataFrame grid-cells with population count data and calculated urban features Returns ---------- np.array, np.array, np.array Y vector, X vector, X column names vector """ log("Calculating urban testing data/features for city: " + city_ref) start = time.time() # Select columns to normalize columns_to_normalise = [ col for col in pop_features.columns if "num_" in col or "m2_" in col or "dispersion" in col or "accessibility" in col ] # Normalize selected columns pop_features.loc[:, columns_to_normalise] = pop_features.loc[:, columns_to_normalise].apply( lambda x: x / x.max(), axis=0) # X values: Vector <x1,x2, ... , xn> with normalized urban features X_values = [] geom_values = [] for idx in pop_features.idx.unique(): square_info = pop_features[pop_features["idx"] == idx] urban_features = square_info[[ col for col in square_info.columns if col not in ["geometry", "pop_count", "idx"] ]].values X_values.append(urban_features) geom = square_info["geometry"] geom_values.append(geom) # Get the columns order referenced in each X vector X_values_columns = pop_features[[ col for col in square_info.columns if col not in ["geometry", "pop_count", "idx"] ]].columns X_values_columns = np.array(X_values_columns) X_values = np.array(X_values) geom_values = np.array(geom_values) log("Done: urban training+validation data/features. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start))) return X_values, X_values_columns, geom_values
def calculate_graph_indicators(graphml_folder, country_folder, filename): # get filepath and country/city identifiers filepath = os.path.join(graphml_folder, country_folder, filename) country, country_iso = country_folder.split('-') core_city, uc_id = filename.replace('.graphml', '').split('-') uc_id = int(uc_id) start_time = time.time() print(ox.ts(), 'processing', filepath) G = ox.load_graphml(filepath=filepath) # clustering and pagerank: needs directed representation cc_avg_dir, cc_avg_undir, cc_wt_avg_dir, cc_wt_avg_undir, pagerank_max = get_clustering( G) # get an undirected representation of this network for everything else Gu = ox.get_undirected(G) G.clear() G = None # street lengths lengths = pd.Series(nx.get_edge_attributes(Gu, 'length')) length_total = lengths.sum() length_median = lengths.median() length_mean = lengths.mean() # nodes, edges, node degree, self loops n = len(Gu.nodes) m = len(Gu.edges) k_avg = 2 * m / n self_loop_proportion = sum(u == v for u, v, k in Gu.edges) / m # proportion of 4-way intersections, 3-ways, and dead-ends streets_per_node = nx.get_node_attributes(Gu, 'street_count') prop_4way = list(streets_per_node.values()).count(4) / n prop_3way = list(streets_per_node.values()).count(3) / n prop_deadend = list(streets_per_node.values()).count(1) / n # average circuity and straightness circuity = calculate_circuity(Gu, length_total) straightness = 1 / circuity # elevation and grade grade_mean, grade_median, elev_mean, elev_median, elev_std, elev_range, elev_iqr = elevation_grades( Gu) # bearing/orientation entropy/order orientation_entropy = calculate_orientation_entropy(Gu) orientation_order = calculate_orientation_order(orientation_entropy) # total and clean intersection counts intersect_count, intersect_count_clean, intersect_count_clean_topo = intersection_counts( ox.project_graph(Gu), streets_per_node) # assemble the results rslt = { 'country': country, 'country_iso': country_iso, 'core_city': core_city, 'uc_id': uc_id, 'cc_avg_dir': cc_avg_dir, 'cc_avg_undir': cc_avg_undir, 'cc_wt_avg_dir': cc_wt_avg_dir, 'cc_wt_avg_undir': cc_wt_avg_undir, 'circuity': circuity, 'elev_iqr': elev_iqr, 'elev_mean': elev_mean, 'elev_median': elev_median, 'elev_range': elev_range, 'elev_std': elev_std, 'grade_mean': grade_mean, 'grade_median': grade_median, 'intersect_count': intersect_count, 'intersect_count_clean': intersect_count_clean, 'intersect_count_clean_topo': intersect_count_clean_topo, 'k_avg': k_avg, 'length_mean': length_mean, 'length_median': length_median, 'length_total': length_total, 'street_segment_count': m, 'node_count': n, 'orientation_entropy': orientation_entropy, 'orientation_order': orientation_order, 'pagerank_max': pagerank_max, 'prop_4way': prop_4way, 'prop_3way': prop_3way, 'prop_deadend': prop_deadend, 'self_loop_proportion': self_loop_proportion, 'straightness': straightness } elapsed = time.time() - start_time ox.log(f'finished {filepath} in {elapsed:.0f} seconds') return rslt
def compute_grid_dispersion( df_indices, df_osm_built, kwargs={"radius_search": 750, "use_median": True, "K_nearest": 50}, ): """ Creates grid and calculates dispersion indices. Parameters ---------- df_indices : geopandas.GeoDataFrame data frame containing the (x,y) reference points to calculate indices df_osm_built : geopandas.GeoDataFrame data frame containing the building's geometries kw_args: dict additional keyword arguments for the indices calculation radius_search: int circle radius to consider the dispersion calculation at a local point use_median : bool denotes whether the median or mean should be used to calculate the indices K_nearest : int number of neighboring buildings to consider in evaluation Returns ---------- geopandas.GeoDataFrame data frame with the added column for dispersion indices """ log("Calculating dispersion indices") start = time.time() # Get radius search: circle radius to consider the dispersion calculation at a local point radius_search = kwargs["radius_search"] # Assign dispersion calculation method if kwargs["use_median"]: _calculate_dispersion = closest_building_distance_median else: _calculate_dispersion = closest_building_distance_average # Calculate the closest distance for each building within K_nearest centroid buildings _apply_polygon_closest_distance_neighbor( df_osm_built, K_nearest=kwargs["K_nearest"] ) # For dispersion calculation approximation, create KDTree with buildings centroid coords_data = [ point.coords[0] for point in df_osm_built.loc[ df_osm_built.closest_d.notnull() ].geometry.apply(lambda x: x.centroid) ] # Create KDTree tree = spatial.KDTree(coords_data) # Compute dispersion indices index_column = "dispersion" df_indices[index_column] = df_indices.geometry.apply( lambda x: _calculate_dispersion( x, tree, df_osm_built.closest_d, radius_search ) ) # Remove added column df_osm_built.drop("closest_d", axis=1, inplace=True) log( "Done: Dispersion indices. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start)) )
def compute_full_urban_features( city_ref, df_osm_built=None, df_osm_pois=None, pop_grid=None, data_source=None, landusemix_args={ "walkable_distance": 600, "compute_activity_types_kde": True, "weighted_kde": True, "pois_weight": 9, "log_weighted": True, }, dispersion_args={ "radius_search": 750, "use_median": True, "K_nearest": 50, }, kwargs={"max_dispersion": 15}, ): """ Computes a set of urban features for each square where population count data exists Parameters ---------- city_ref : string city reference name df_osm_built : geopandas.GeoDataFrame input buildings df_osm_pois : geopandas.GeoDataFrame input points of interest pop_grid : geopandas.GeoDataFrame grid-cells with population count where urban features will be calculated data_source : str define the type of population data for its retrieval in case it was stored kwargs : dict keyword arguments to guide the process Returns ---------- geopandas.GeoDataFrame geometry with updated urban features """ # Population extract exists? if os.path.exists( get_population_urban_features_filename(city_ref, data_source)): log("Urban features from population gridded data exist for city: " + city_ref) # Read from GeoJSON (default projection coordinates) pop_features_4326 = gpd.read_file( get_population_urban_features_filename(city_ref, data_source)) # Project to UTM coordinates return ox.project_gdf(pop_features_4326) # Required arguments assert df_osm_built is not None assert df_osm_pois is not None assert pop_grid is not None # Get population count data with filled empty squares (null population) if data_source == "insee": pop_features = get_population_df_filled_empty_squares(pop_grid) elif data_source == "gpw": pop_features = pop_grid else: raise ValueError("Unknown data source.") # Set crs crs_proj = pop_grid.crs pop_features.crs = crs_proj ################## # Urban features ################## # Compute the urban features for each square log("Calculating urban features") start = time.time() # Conserve building geometries df_osm_built["geom_building"] = df_osm_built["geometry"] # Spatial join: grid-cell i - building j for all intersections pop_features = gpd.sjoin(pop_features, df_osm_built, op="intersects", how="left") # When a grid-cell i does not intersect any building: NaN values null_idx = pop_features.loc[pop_features["geom_building"].isnull()].index # Replace NaN for urban features calculation min_polygon = Polygon([ (0, 0), (0, np.finfo(float).eps), (np.finfo(float).eps, np.finfo(float).eps), ]) pop_features.loc[null_idx, "geom_building"] = pop_features.loc[ null_idx, "geom_building"].apply(lambda x: min_polygon) pop_features.loc[null_idx, "landuses_m2"] = len(null_idx) * [{ "residential": 0, "activity": 0 }] pop_features.loc[null_idx, "building_levels"] = len(null_idx) * [0] # Pre-calculation of urban features # Apply percentage of building presence within square: # 1 if fully contained, 0.5 if half the building contained, ... pop_features["building_ratio"] = pop_features.apply( lambda x: x.geom_building.intersection(x.geometry).area / x. geom_building.area, axis=1, ) pop_features["m2_total_residential"] = pop_features.apply( lambda x: x.building_ratio * x.landuses_m2["residential"], axis=1) pop_features["m2_total_activity"] = pop_features.apply( lambda x: x.building_ratio * x.landuses_m2["activity"], axis=1) pop_features["m2_footprint_residential"] = 0 pop_features.loc[pop_features.classification.isin(["residential"]), "m2_footprint_residential", ] = pop_features.loc[ pop_features.classification.isin([ "residential" ])].apply( lambda x: x.building_ratio * x.geom_building.area, axis=1) pop_features["m2_footprint_activity"] = 0 pop_features.loc[pop_features.classification.isin(["activity"]), "m2_footprint_activity", ] = pop_features.loc[ pop_features.classification.isin(["activity"])].apply( lambda x: x.building_ratio * x.geom_building.area, axis=1) pop_features["m2_footprint_mixed"] = 0 pop_features.loc[pop_features.classification.isin(["mixed"]), "m2_footprint_mixed", ] = pop_features.loc[ pop_features.classification.isin(["mixed"])].apply( lambda x: x.building_ratio * x.geom_building.area, axis=1) pop_features["num_built_activity"] = 0 pop_features.loc[pop_features.classification.isin(["activity"]), "num_built_activity", ] = pop_features.loc[ pop_features.classification.isin(["activity" ])].building_ratio pop_features["num_built_residential"] = 0 pop_features.loc[pop_features.classification.isin(["residential"]), "num_built_residential", ] = pop_features.loc[ pop_features.classification.isin(["residential" ])].building_ratio pop_features["num_built_mixed"] = 0 pop_features.loc[pop_features.classification.isin(["mixed"]), "num_built_mixed", ] = pop_features.loc[ pop_features.classification.isin(["mixed" ])].building_ratio pop_features["num_levels"] = pop_features.apply( lambda x: x.building_ratio * x.building_levels, axis=1) pop_features["num_buildings"] = pop_features["building_ratio"] pop_features["built_up_m2"] = pop_features.apply( lambda x: x.geom_building.area * x.building_ratio, axis=1) # Urban features aggregation functions urban_features_aggregation = {} if data_source == "insee": urban_features_aggregation["idINSPIRE"] = lambda x: x.head(1) urban_features_aggregation["pop_count"] = lambda x: x.head(1) elif data_source == "gpw": urban_features_aggregation["idx"] = lambda x: x.head(1) urban_features_aggregation["geometry"] = lambda x: x.head(1) urban_features_aggregation["m2_total_residential"] = "sum" urban_features_aggregation["m2_total_activity"] = "sum" urban_features_aggregation["m2_footprint_residential"] = "sum" urban_features_aggregation["m2_footprint_activity"] = "sum" urban_features_aggregation["m2_footprint_mixed"] = "sum" urban_features_aggregation["num_built_activity"] = "sum" urban_features_aggregation["num_built_residential"] = "sum" urban_features_aggregation["num_built_mixed"] = "sum" urban_features_aggregation["num_levels"] = "sum" urban_features_aggregation["num_buildings"] = "sum" urban_features_aggregation["built_up_m2"] = "sum" # Apply aggregate functions pop_features = pop_features.groupby( pop_features.index).agg(urban_features_aggregation) # Calculate built up relation (relative to the area of the grid-cell geometry) pop_features["built_up_relation"] = pop_features.apply( lambda x: x.built_up_m2 / x.geometry.area, axis=1) pop_features.drop("built_up_m2", axis=1, inplace=True) # To geopandas.GeoDataFrame and set crs pop_features = gpd.GeoDataFrame(pop_features) pop_features.crs = crs_proj # POIs df_osm_pois_selection = df_osm_pois[df_osm_pois.classification.isin( ["activity", "mixed"])] gpd_intersection_pois = gpd.sjoin( pop_features, df_osm_pois_selection, op="intersects", how="left", ) # Number of activity/mixed POIs pop_features["num_activity_pois"] = gpd_intersection_pois.groupby( gpd_intersection_pois.index).agg({"osm_id": "count"}) ################## # Sprawling indices ################## pop_features["geometry_squares"] = pop_features.geometry pop_features["geometry"] = pop_features.geometry.centroid # Compute land uses mix + densities estimation compute_grid_landusemix(pop_features, df_osm_built, df_osm_pois, landusemix_args) # Dispersion indices compute_grid_dispersion(pop_features, df_osm_built, dispersion_args) # Set back original geometries pop_features["geometry"] = pop_features.geometry_squares pop_features.drop("geometry_squares", axis=1, inplace=True) if kwargs.get("max_dispersion"): # Set max bounds for dispersion values pop_features.loc[ pop_features.dispersion > kwargs.get("max_dispersion"), "dispersion", ] = kwargs.get("max_dispersion") # Fill NaN sprawl indices with 0 pop_features.fillna(0, inplace=True) # Save to GeoJSON file (no projection conserved, then use EPSG 4326) ox.project_gdf(pop_features, to_latlong=True).to_file( get_population_urban_features_filename(city_ref, data_source), driver="GeoJSON", ) elapsed_time = int(time.time() - start) log("Done: Urban features calculation. Elapsed time (H:M:S): " + "{:02d}:{:02d}:{:02d}".format( elapsed_time // 3600, (elapsed_time % 3600 // 60), elapsed_time % 60, )) return pop_features
def compute_grid_landusemix( df_indices, df_osm_built, df_osm_pois, kw_args={ "walkable_distance": 600, "compute_activity_types_kde": True, "weighted_kde": True, "pois_weight": 9, "log_weighted": True }, ): """ Calculate land use mix indices on input grid Parameters ---------- df_indices : geopandas.GeoDataFrame data frame containing the (x,y) reference points to calculate indices df_osm_built : geopandas.GeoDataFrame data frame containing the building's geometries df_osm_pois : geopandas.GeoDataFrame data frame containing the points' of interest geometries kw_args: dict additional keyword arguments for the indices calculation walkable_distance : int the bandwidth assumption for Kernel Density Estimation calculations (meters) compute_activity_types_kde : bool determines if the densities for each activity type should be computed weighted_kde : bool use Weighted Kernel Density Estimation or classic version pois_weight : int Points of interest weight equivalence with buildings (squared meter) log_weighted : bool apply natural logarithmic function to surface weights Returns ---------- pandas.DataFrame land use mix indices """ log("Calculating land use mix indices") start = time.time() # Get the bandwidth, related to 'walkable distances' bandwidth = kw_args["walkable_distance"] # Compute a weighted KDE? weighted_kde = kw_args["weighted_kde"] X_weights = None # Get full list of contained POIs contained_pois = list( set( [ element for list_ in df_osm_built.containing_poi[ df_osm_built.containing_poi.notnull() ] for element in list_ ] ) ) # Get the POIs not contained by any building df_osm_pois_not_contained = df_osm_pois[ ~df_osm_pois.index.isin(contained_pois) ] ############ # Calculate land use density estimations ############ #### # Residential #### df_osm_built_indexed = df_osm_built[ df_osm_built.classification.isin(["residential", "mixed"]) ] if weighted_kde: X_weights = df_osm_built_indexed.landuses_m2.apply( lambda x: x["residential"] ) df_indices["residential_pdf"] = calculate_kde( df_indices.geometry, df_osm_built_indexed, None, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"], ) log("Residential density estimation done") #### # Activities #### df_osm_built_indexed = df_osm_built[ df_osm_built.classification.isin(["activity", "mixed"]) ] df_osm_pois_not_cont_indexed = df_osm_pois_not_contained[ df_osm_pois_not_contained.classification.isin(["activity", "mixed"]) ] if weighted_kde: X_weights = df_osm_built_indexed.landuses_m2.apply( lambda x: x["activity"] ) df_indices["activity_pdf"] = calculate_kde( df_indices.geometry, df_osm_built_indexed, df_osm_pois_not_cont_indexed, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"], ) log("Activity density estimation done") #### # Compute activity types densities #### if kw_args["compute_activity_types_kde"]: assert "activity_category" in df_osm_built.columns # Get unique category values unique_categories_built = [ list(x) for x in set( tuple(x) for x in df_osm_built.activity_category.values if isinstance(x, list) ) ] unique_categories_pois = [ list(x) for x in set( tuple(x) for x in df_osm_pois_not_cont_indexed.activity_category.values if isinstance(x, list) ) ] flat_list = [ item for sublist in unique_categories_built + unique_categories_pois for item in sublist ] categories = list(set(flat_list)) for cat in categories: # Get data frame selection of input category # Buildings and POIs within that category df_built_category = df_osm_built_indexed[ df_osm_built_indexed.activity_category.apply( lambda x: (isinstance(x, list)) and (cat in x) ) ] df_pois_category = df_osm_pois_not_cont_indexed[ df_osm_pois_not_cont_indexed.activity_category.apply( lambda x: (isinstance(x, list)) and (cat in x) ) ] if weighted_kde: X_weights = df_built_category.landuses_m2.apply( lambda x: x[cat] ) df_indices[cat + "_pdf"] = calculate_kde( df_indices.geometry, df_built_category, df_pois_category, bandwidth, X_weights, kw_args["pois_weight"], kw_args["log_weighted"], ) log("Activity grouped by types density estimation done") # Compute land use mix indices index_column = "landusemix" df_indices[index_column] = df_indices.apply( lambda x: _land_use_mix(x.activity_pdf, x.residential_pdf), axis=1 ) df_indices["landuse_intensity"] = df_indices.apply( lambda x: (x.activity_pdf + x.residential_pdf) / 2.0, axis=1 ) log( "Done: Land use mix indices. Elapsed time (H:M:S): " + time.strftime("%H:%M:%S", time.gmtime(time.time() - start)) )
def prepare_data( G, df_osm_built, df_osm_pois, df_indices, num_processes, kw_arguments ): """ Pickles data to a temporary folder in order to achieve parallel accessibility calculation A new subprocess will be created in order to minimize memory requirements Parameters ---------- G : networkx multidigraph input graph to calculate accessibility df_osm_built : geopandas.GeoDataFrame buildings data df_osm_pois : geopandas.GeoDataFrame buildings data df_indices : geopandas.GeoDataFrame data frame where indices will be calculated num_processes : int number of data chunks to create kw_arguments : pandas.Series additional keyword arguments Returns ---------- """ # Divide long edges divide_long_edges_graph(G, kw_arguments.max_edge_length) log("Graph long edges shortened") # Get activities df_built_activ = df_osm_built[ df_osm_built.classification.isin(["activity", "mixed"]) ] df_pois_activ = df_osm_pois[ df_osm_pois.classification.isin(["activity", "mixed"]) ] # Associate them to its closest node in the graph associate_activities_closest_node(G, df_built_activ, df_pois_activ) log("Activities associated to graph nodes") # Nodes dict for n, data in G.nodes.data(data=True): # Remove useless keys keys_ = list(data.keys()) [data.pop(k) for k in keys_ if k not in ["x", "y", "num_activities"]] # Edges dict for u, v, data in G.edges.data(data=True, keys=False): # Remove useless keys keys_ = list(data.keys()) [data.pop(k) for k in keys_ if k not in ["length", "key"]] try: G.graph.pop("streets_per_node") except Exception: pass # Pickle graph nx.write_gpickle(G, "temp/graph.gpickle") # Prepare input indices points data_split = np.array_split(df_indices, num_processes) for i in range(num_processes): data_split[i].to_pickle("temp/points_" + str(i) + ".pkl") # Pickle arguments kw_arguments.to_pickle("temp/arguments.pkl")