def get_region_road_stats(x): try: data_path = load_config()['paths']['data'] if os.path.exists( os.path.join(data_path, 'road_stats', '{}_stats.csv'.format(x[3]))): print('{} already finished!'.format(x[3])) return None print('{} started!'.format(x[3])) road_dict = map_roads() road_gpd = roads(data_path, x[3], regional=True) road_gpd['length'] = road_gpd.geometry.apply(line_length) road_gpd['road_type'] = road_gpd.infra_type.apply( lambda x: road_dict[x]) road_gpd = road_gpd.groupby('road_type').sum() road_gpd['continent'] = x[10] road_gpd['country'] = x[1] road_gpd['region'] = x[3] road_gpd.to_csv( os.path.join(data_path, 'road_stats', '{}_stats.csv'.format(x.GID_2))) except Exception as e: print('Failed to finish {} because of {}!'.format(x[3], e))
def region_bridges(n): """ This function will extract all bridges from OpenStreetMap for the specified region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. Returns: *GeoDataFrame* : A geopandas GeoDataFrame with all bridges in a region. Will also save this to a .csv file. """ # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get name of the region region = x.GID_2 # extract bridges from OpenStreetMAp bridges_osm = bridges(data_path, region, regional=True) # estimate length of each bridges in meters bridges_osm['length'] = bridges_osm.geometry.apply(line_length) bridges_osm['length'] = bridges_osm['length'] * 1000 road_dict = map_roads() # map roads to primary, secondary, tertiary and other roads. bridges_osm['road_type'] = bridges_osm.road_type.apply( lambda y: road_dict[y]) bridges_osm['region'] = region bridges_osm['country'] = region[:3] # save to .csv bridges_osm.to_csv( os.path.join(data_path, 'bridges_osm', '{}.csv'.format(region))) print('{} finished!'.format(region)) return bridges_osm
def get_liquefaction_region(n, rail=False): """ Function to intersect all return periods of a particualar hazard with all road or railway assets in the specific region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. Returns: *output* : a GeoDataFrame with all intersections between the infrastructure assets and the liquefaction map. Will be saved as .feather file. """ try: # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get name of the region and the geometry region = x.GID_2 reg_geom = x.geometry # if intersection is already done for this region, stop and move on to the next region. if (not rail) & os.path.exists( os.path.join(data_path, 'liquefaction_road', '{}_liq.ft'.format(region))): print('{} already finished!'.format(region)) return None if (rail) & os.path.exists( os.path.join(data_path, 'liquefaction_rail', '{}_liq.ft'.format(region))): print('{} already finished!'.format(region)) return None # load OpenStreetMap data. if not rail: road_gpd = roads(data_path, region, regional=True) road_dict = map_roads() road_gpd['length'] = road_gpd.geometry.apply(line_length) road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5) road_gpd['road_type'] = road_gpd.infra_type.apply( lambda y: road_dict[y]) infra_gpd = road_gpd.copy() else: rail_gpd = railway(data_path, region, regional=True) rail_gpd['length'] = rail_gpd.geometry.apply(line_length) rail_gpd.geometry = rail_gpd.geometry.simplify(tolerance=0.5) infra_gpd = rail_gpd.copy() # create geosjon geometry to do the rasterio masking geoms = [mapping(reg_geom.envelope.buffer(1))] # extract the raster values values within the polygon with rasterio.open( os.path.join(data_path, 'Hazards', 'Liquefaction', 'Global', 'liquefaction_v1_deg.tif')) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] # change array to integers, to reduce the size of the polygonized GeoDataFrame. out_image[out_image <= 0] = -1 out_image = numpy.array(out_image, dtype='int32') # the actual polygonization of the raster map results = ({ 'properties': { 'raster_val': v }, 'geometry': s } for i, (s, v) in enumerate( shapes(out_image[:, :], mask=None, transform=out_transform))) # and save to a geodataframe gdf = geopandas.GeoDataFrame.from_features(list(results), crs='epsg:4326') gdf['geometry'] = gdf.buffer(0) # now lets intersect the liquefaction map with the infrastructure assets. tqdm.pandas(desc=region) inb = infra_gpd.progress_apply( lambda x: intersect_hazard(x, gdf.sindex, gdf, liquefaction=True), axis=1).copy() inb = inb.apply(pandas.Series) inb.columns = ['geometry', 'liquefaction'] inb['length_liq'] = inb.geometry.apply(line_length) infra_gpd[['length_liq', 'liquefaction']] = inb[['length_liq', 'liquefaction']] output = infra_gpd.drop(['geometry'], axis=1) output['country'] = region[:3] output['continent'] = x.continent output['region'] = region # and save the output to the designated folders. if not rail: output.to_feather( os.path.join(data_path, 'liquefaction_road', '{}_liq.ft'.format(region))) else: output.to_feather( os.path.join(data_path, 'liquefaction_rail', '{}_liq.ft'.format(region))) except Exception as e: print('Failed to finish {} because of {}!'.format(region, e))
def get_tree_density(n, rail=False): """ Function to intersect all return periods of a particualar hazard with all road or railway assets in the specific region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. Returns: *output* : a GeoDataFrame with all intersections between the infrastructure assets and the liquefaction map. Will be saved as .feather file. """ try: # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get name of the region and the geometry region = x.GID_2 reg_geom = x.geometry # load OpenStreetMap data. if not rail: road_gpd = roads(data_path, region, regional=True) road_dict = map_roads() road_gpd['road_type'] = road_gpd.infra_type.apply( lambda y: road_dict[y]) infra_gpd = road_gpd.copy() else: rail_gpd = railway(data_path, region, regional=True) infra_gpd = rail_gpd.copy() # create geosjon geometry to do the rasterio masking geoms = [mapping(reg_geom.envelope.buffer(1))] # extract the raster values values within the polygon with rasterio.open( os.path.join( data_path, 'input_data', 'Crowther_Nature_Biome_Revision_01_WGS84_GeoTiff.tif') ) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] # grab the tree density value for the road by using a point query tqdm.pandas(desc='Tree Density' + region) infra_gpd['Tree_Dens'] = infra_gpd.centroid.progress_apply( lambda x: get_raster_value(x, out_image, out_transform)) infra_gpd['Tree_Dens'] = infra_gpd['Tree_Dens'].astype(float) infra_gpd['region'] = region infra_gpd = infra_gpd.drop('geometry', axis=1) # and save the output to the designated folders. if not rail: pandas.DataFrame(infra_gpd).to_feather( os.path.join(data_path, 'tree_cover_road', '{}.ft'.format(region))) else: pandas.DataFrame(infra_gpd).to_feather( os.path.join(data_path, 'tree_cover_rail', '{}.ft'.format(region))) print('{} finished!'.format(region)) except: print('{} failed!'.format(region))
def region_intersection(n, hzd, rail=False): """ Function to intersect all return periods of a particualar hazard with all road or railway assets in the specific region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. *hzd* : abbrevation of the hazard we want to intersect. **EQ** for earthquakes, **Cyc** for cyclones, **FU** for river flooding, **PU** for surface flooding and **CF** for coastal flooding. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. Returns: *output* : a GeoDataFrame with all intersections between the infrastructure assets and the specified hazard. Will also be saved as .feather file. """ # get path where all hazards and data are located data_path = load_config()['paths']['data'] hazard_path = load_config()['paths']['hazard_data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get the name of the region region = x.GID_2 try: # check if we already did the hazard intersection for this region. If so, we dont do it again! if (not rail) & os.path.exists( os.path.join(data_path, 'output_{}_full'.format(hzd), '{}_{}.ft'.format(region, hzd))): print('{} already finished!'.format(region)) return pandas.read_feather( os.path.join( os.path.join(data_path, 'output_{}_full'.format(hzd), '{}_{}.ft'.format(region, hzd)))) elif (rail) & os.path.exists( os.path.join(data_path, 'output_{}_rail_full'.format(hzd), '{}_{}.ft'.format(region, hzd))): print('{} already finished!'.format(region)) return pandas.read_feather( os.path.join( os.path.join(data_path, 'output_{}_rail_full'.format(hzd), '{}_{}.ft'.format(region, hzd)))) # load specifics for the hazard we want to run. if hzd == 'EQ': hzd_name_dir = 'Earthquake' hzd_names = [ 'EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475' ] elif hzd == 'Cyc': hzd_name_dir = 'Cyclones' hzd_names = [ 'Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000' ] elif hzd == 'FU': hzd_name_dir = 'FluvialFlooding' hzd_names = [ 'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200', 'FU-250', 'FU-500', 'FU-1000' ] elif hzd == 'PU': hzd_name_dir = 'PluvialFlooding' hzd_names = [ 'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200', 'PU-250', 'PU-500', 'PU-1000' ] elif hzd == 'CF': hzd_name_dir = 'CoastalFlooding' hzd_names = [ 'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500', 'CF-1000' ] # extract data from OpenStreetMap, either the roads or the railway data. try: if not rail: road_gpd = roads(data_path, region, regional=True) road_dict = map_roads() road_gpd['length'] = road_gpd.geometry.apply(line_length) road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5) road_gpd['road_type'] = road_gpd.infra_type.apply( lambda x: road_dict[x]) infra_gpd = road_gpd.copy() elif rail: rail_gpd = railway(data_path, region, regional=True) rail_gpd['length'] = rail_gpd.geometry.apply(line_length) rail_gpd['geometry'] = rail_gpd.geometry.simplify( tolerance=0.5) infra_gpd = rail_gpd.copy() print('{} osm data loaded!'.format(region)) except: print('{} osm data not properly loaded!'.format(region)) return None # for the global datasets, we can just create a big dataframe with all the hazard polygons # (because the resolution is relatively coarse) if (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'): hazard_path = load_config()['paths']['hazard_data'] hazard_path = os.path.join(hazard_path, hzd_name_dir, 'Global') hzd_list = [ os.path.join(hazard_path, x) for x in os.listdir(hazard_path) ] try: hzds_data = multiple_polygonized(region, x.geometry, hzd_list, hzd_names) except: hzds_data = pandas.DataFrame(columns=['hazard']) for iter_, hzd_name in enumerate(hzd_names): # for the country level datasets, we need to load hazard files in the loop, else we run into RAM problems (and time). if (hzd == 'PU') | (hzd == 'FU'): try: hzds_data = single_polygonized(hzd_name, region, x.geometry, x.ISO_3digit, hzd) hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name] hzd_region.reset_index(inplace=True, drop=True) except: hzd_region = pandas.DataFrame(columns=['hazard']) # for the global datasets, we just extract the individual hazard maps from the DataFrame we created before this loop. elif (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'): try: hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name] hzd_region.reset_index(inplace=True, drop=True) except: hzd_region == pandas.DataFrame(columns=['hazard']) # if there are no hazard values in the region for the specific return period, just give everything zeros. if len(hzd_region) == 0: infra_gpd['length_{}'.format(hzd_name)] = 0 infra_gpd['val_{}'.format(hzd_name)] = 0 continue # now lets intersect the hazard with the ifnrastructure asset and #get the hazard values and intersection lengths for each asset. hzd_reg_sindex = hzd_region.sindex tqdm.pandas(desc=hzd_name + '_' + region) inb = infra_gpd.progress_apply( lambda x: intersect_hazard(x, hzd_reg_sindex, hzd_region), axis=1).copy() inb = inb.apply(pandas.Series) inb.columns = ['geometry', 'val_{}'.format(hzd_name)] inb['length_{}'.format(hzd_name)] = inb.geometry.apply(line_length) # and at the results to the dataframe with all the infrastructure assets. infra_gpd[[ 'length_{}'.format(hzd_name), 'val_{}'.format(hzd_name) ]] = inb[['length_{}'.format(hzd_name), 'val_{}'.format(hzd_name)]] output = infra_gpd.drop(['geometry'], axis=1) output['country'] = global_regions.loc[global_regions['GID_2'] == region]['ISO_3digit'].values[0] output['continent'] = global_regions.loc[global_regions['GID_2'] == region]['continent'].values[0] output['region'] = region # and save output to the designated folder for the hazard. if not rail: output.to_feather( os.path.join(data_path, 'output_{}_full'.format(hzd), '{}_{}.ft'.format(region, hzd))) else: output.to_feather( os.path.join(data_path, 'output_{}_rail_full'.format(hzd), '{}_{}.ft'.format(region, hzd))) print('Finished {}!'.format(region)) return output except Exception as e: print('Failed to finish {} because of {}!'.format(region, e))