def get_region_rail_stats(n): try: data_path = load_config()['paths']['data'] global_data = gpd.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) global_data = global_data.loc[global_data.GID_2.isin([ (x.split('.')[0]) for x in os.listdir(os.path.join(data_path, 'region_osm')) ])] x = global_data.iloc[n] if os.path.exists( os.path.join(data_path, 'railway_stats', '{}_stats.csv'.format(x.GID_2))): print('{} already finished!'.format(x.GID_2)) return None print('{} started!'.format(x.GID_2)) rail_gpd = railway(data_path, x.GID_2, regional=True) rail_gpd['length'] = rail_gpd.geometry.apply(line_length) rail_gpd = rail_gpd.groupby('infra_type').sum() rail_gpd['continent'] = x.continent rail_gpd['country'] = x.ISO_3digit rail_gpd['region'] = x.GID_2 rail_gpd.to_csv( os.path.join(data_path, 'railway_stats', '{}_stats.csv'.format(x.GID_2))) except Exception as e: print('Failed to finish {} because of {}!'.format(n, e))
def get_region_road_stats(x): try: data_path = load_config()['paths']['data'] if os.path.exists( os.path.join(data_path, 'road_stats', '{}_stats.csv'.format(x[3]))): print('{} already finished!'.format(x[3])) return None print('{} started!'.format(x[3])) road_dict = map_roads() road_gpd = roads(data_path, x[3], regional=True) road_gpd['length'] = road_gpd.geometry.apply(line_length) road_gpd['road_type'] = road_gpd.infra_type.apply( lambda x: road_dict[x]) road_gpd = road_gpd.groupby('road_type').sum() road_gpd['continent'] = x[10] road_gpd['country'] = x[1] road_gpd['region'] = x[3] road_gpd.to_csv( os.path.join(data_path, 'road_stats', '{}_stats.csv'.format(x.GID_2))) except Exception as e: print('Failed to finish {} because of {}!'.format(x[3], e))
def bridge_extraction(save_all=False): """ Function to extract all bridges from OpenStreetMap. Optional Arguments: *save_all* : Default is **False**. Set to **True** if you would like to save all bridges of the world in one csv file. Will become a big csv! """ # set data path data_path = load_config()['paths']['data'] # load shapefile with all regions global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) global_regions = global_regions.loc[(global_regions.GID_2.isin([ (x.split('.')[0]) for x in os.listdir(os.path.join(data_path, 'region_osm')) ]))] # run the bridge extraction for all regions with Pool(cpu_count() - 1) as pool: collect_bridges = pool.map(region_bridges, list(global_regions.index), chunksize=1) # save all bridges in one file. if save_all: # concat them to one file all_bridges = pandas.concat(collect_bridges) all_bridges.reset_index(inplace=True, drop=True) all_bridges.to_csv( os.path.join(data_path, 'output_data', 'osm_bridges.csv'))
def tree_values(rail=False): """ Function to run intersection with global tree density map for all regions parallel. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. """ # set data path data_path = load_config()['paths']['data'] # load shapefile with all regions global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) if not rail: global_regions = global_regions.loc[(global_regions.GID_2.isin([ x[:-10] for x in os.listdir(os.path.join(data_path, 'road_stats')) ]))] else: global_regions = global_regions.loc[(global_regions.GID_2.isin([ x[:-10] for x in os.listdir(os.path.join(data_path, 'railway_stats')) ]))] # run tree value extraction for all regions parallel with Pool(cpu_count() - 1) as pool: pool.map(get_tree_density, list(global_regions.index), chunksize=1)
def exposure_analysis(rail=False): """ Get exposure statistics for all road or railway assets in all regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. """ # set data path data_path = load_config()['paths']['data'] # load shapefile with all regions global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # load csv with income group data and assign income group to regions incomegroups = pandas.read_csv(os.path.join(data_path, 'input_data', 'incomegroups_2018.csv'), index_col=[0]) income_dict = dict(zip(incomegroups.index, incomegroups.GroupCode)) global_regions['wbincome'] = global_regions.GID_0.apply( lambda x: income_dict[x]) # only keep regions for which we have data global_regions = global_regions.loc[global_regions.GID_2.isin([ (x.split('.')[0]) for x in os.listdir(os.path.join(data_path, 'region_osm')) ])] # create dictionary with information on protection standards prot_lookup = dict( zip(global_regions['GID_2'], global_regions['prot_stand'])) # create lists for the parallelization regions = list(global_regions.index) prot_lookups = [prot_lookup] * len(regions) data_paths = [data_path] * len(regions) # run exposure analysis parallel if not rail: with Pool(cpu_count() - 1) as pool: collect_output = pool.starmap(regional_roads, zip(regions, prot_lookups, data_paths), chunksize=1) pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'total_exposure_road.csv')) else: with Pool(cpu_count() - 1) as pool: collect_output = pool.starmap(regional_railway, zip(regions, prot_lookups, data_paths), chunksize=1) pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'total_exposure_railway.csv'))
def liquefaction_overlays(rail=False): """ Function to run intersection with global liquefaction map for all regions parallel. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. """ # set data path data_path = load_config()['paths']['data'] # load shapefile with all regions and their information global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) if not rail: global_regions = global_regions.loc[(global_regions.GID_2.isin([ (x.split('.')[0]) for x in os.listdir(os.path.join(data_path, 'region_osm')) ]))] else: global_regions = global_regions.loc[(global_regions.GID_2.isin([ (x.split('.')[0]) for x in os.listdir(os.path.join(data_path, 'region_osm')) ]))] # run liquefaction intersection for all regions parallel with Pool(cpu_count() - 1) as pool: pool.map(get_liquefaction_region, list(global_regions.index)[:4], chunksize=1)
def all_region_stats(): data_path = load_config()['paths']['data'] global_data = gpd.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) global_data = global_data.loc[global_data.GID_2.isin([ (x.split('.')[0]) for x in os.listdir(os.path.join(data_path, 'region_osm')) ])] with Pool(cpu_count() - 1) as pool: pool.map(get_region_road_stats, list(global_data.to_records()), chunksize=1)
def all_country_stats(): data_path = load_config()['paths']['data'] global_countries = gpd.read_file( os.path.join(data_path, 'input_data', 'global_countries.shp')) list_iso3 = [x.split('_')[0] for x in global_countries.ISO_3digit] with Pool(cpu_count() - 1) as pool: collect_countries = pool.map(get_country_road_stats, list_iso3, chunksize=1) pd.concat(collect_countries).to_csv( os.path.join(data_path, 'summarized', 'country_road_stats.csv'))
def region_bridges(n): """ This function will extract all bridges from OpenStreetMap for the specified region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. Returns: *GeoDataFrame* : A geopandas GeoDataFrame with all bridges in a region. Will also save this to a .csv file. """ # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get name of the region region = x.GID_2 # extract bridges from OpenStreetMAp bridges_osm = bridges(data_path, region, regional=True) # estimate length of each bridges in meters bridges_osm['length'] = bridges_osm.geometry.apply(line_length) bridges_osm['length'] = bridges_osm['length'] * 1000 road_dict = map_roads() # map roads to primary, secondary, tertiary and other roads. bridges_osm['road_type'] = bridges_osm.road_type.apply( lambda y: road_dict[y]) bridges_osm['region'] = region bridges_osm['country'] = region[:3] # save to .csv bridges_osm.to_csv( os.path.join(data_path, 'bridges_osm', '{}.csv'.format(region))) print('{} finished!'.format(region)) return bridges_osm
def get_country_road_stats(iso3): try: print(iso3) data_path = load_config()['paths']['data'] list_files = [ os.path.join(data_path, 'road_stats', x) for x in os.listdir(os.path.join(data_path, 'road_stats')) if (iso3 in x) ] collect_regions = [] for file in list_files: collect_regions.append(pd.read_csv(file)) return pd.concat(collect_regions).groupby( ['road_type', 'continent', 'country']).sum() except Exception as e: print('Failed to finish {} because of {}!'.format(iso3, e))
def planet_osm(): """ This function will download the planet file from the OSM servers. """ data_path = load_config()['paths']['data'] osm_path_in = os.path.join(data_path, 'planet_osm') # create directory to save planet osm file if that directory does not exit yet. if not os.path.exists(osm_path_in): os.makedirs(osm_path_in) # if planet file is not downloaded yet, download it. if 'planet-latest.osm.pbf' not in os.listdir(osm_path_in): url = 'https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf' urllib.request.urlretrieve( url, os.path.join(osm_path_in, 'planet-latest.osm.pbf')) else: print('Planet file is already downloaded')
def SSBN_merge(from_=0, to_=235): """ Merge all countries parallel. Optional Arguments: *from_* : Default is **0**. Set to a different value if you would like to select a different subset. *to_* : Default is **235**. Set to a different value if you would like to select a different subset. """ # set data path data_path = load_config()['paths']['data'] # load shapefile with all countries global_data = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_countries.shp')) global_data = global_data[int(from_):int(to_)] # run SSBN merge for all countries parallel with Pool(cpu_count() - 1) as pool: pool.map(merge_SSBN_maps, (global_data['ISO_3digit']), chunksize=1)
def cyclone_sensitivity(rail=False, region_count=1000): """ Function to perform the caculations for a sensitivity analysis related to cyclone damage to road or railway assets for all regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. *region_count* : Default is **1000**. Change this number if you want to include a different amount of regions. """ # set data path data_path = load_config()['paths']['data'] # set list of events events = ['Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000'] # get list of all files for which we have hazard intersection information # do this for roads if not rail: all_files = [ os.path.join(data_path, 'output_Cyc_full', x) for x in os.listdir(os.path.join(data_path, 'output_Cyc_full')) ][:region_count] # set parameter values problem = { 'num_vars': 4, 'names': ['x1', 'x2', 'x3', 'x4'], 'bounds': [[5000, 50000], [1000, 10000], [500, 5000], [0, 500]] } # Generate samples and save them, to be used in uncertainty and sensitivity analysis of results param_values = morris.sample(problem, 10, num_levels=4, grid_jump=2, local_optimization=True) param_values.tofile( os.path.join(data_path, 'input_data', 'param_values_cyc_road.pkl')) # and for railways else: all_files = [ os.path.join(data_path, 'output_Cyc_rail_full', x) for x in os.listdir( os.path.join(data_path, 'output_Cyc_rail_full')) ][:region_count] # set parameter values problem = { 'num_vars': 3, 'names': ['x1', 'x2', 'x3'], 'bounds': [[5000, 50000], [1000, 10000], [0, 1]] } # Generate samples and save them, to be used in uncertainty and sensitivity analysis of results param_values = morris.sample(problem, 10, num_levels=4, grid_jump=2, local_optimization=True) param_values.tofile( os.path.join(data_path, 'input_data', 'param_values_cyc_rail.pkl')) # prepare multiprocessing data_paths = [data_path] * len(all_files) event_list = [events] * len(all_files) param_list = [param_values] * len(all_files) rail_list = [rail] * len(all_files) # run cyclone sensitivity analysis parallel and save outputs with Pool(cpu_count() - 1) as pool: if not rail: collect_output = pool.starmap(sensitivity.regional_cyclone, zip(all_files, data_paths, event_list, param_list, rail_list), chunksize=1) pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'sa_cyc_road.csv')) else: collect_output = pool.starmap(sensitivity.regional_cyclone, zip(all_files, data_paths, event_list, param_list, rail_list), chunksize=1) pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'sa_cyc_rail.csv'))
def flood_sensitivity(hazard, rail=False, region_count=1000): """ Function to perform the caculations for a sensitivity analysis related to flood damage to road or railway assets for all regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. *region_count* : Default is **1000**. Change this number if you want to include a different amount of regions. """ # set data path data_path = load_config()['paths']['data'] # load shapefile with country level information global_countries = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_countries.shp')) global_countries.wbregion = global_countries.wbregion.str.replace( 'LCA', 'LAC') global_countries['wbregion'].loc[ global_countries.wbregion.isnull()] = 'YHI' wbreg_lookup = dict( zip(global_countries['ISO_3digit'], global_countries['wbregion'])) # import curves, costs and paved vs unpaved ratios if not rail: # import curves flood_curve_paved = pandas.read_excel(os.path.join( data_path, 'input_data', 'Costs_curves.xlsx'), usecols=[1, 2, 3, 4, 5, 6, 7, 8], sheet_name='Flooding', index_col=[0], skiprows=1) flood_curve_unpaved = pandas.read_excel( os.path.join(data_path, 'input_data', 'Costs_curves.xlsx'), usecols=[11, 12, 13, 14, 15, 16, 17, 18], sheet_name='Flooding', index_col=[0], skiprows=1) flood_curve_unpaved.columns = flood_curve_paved.columns global_costs = pandas.read_excel(os.path.join(data_path, 'input_data', 'Costs_curves.xlsx'), usecols=[0, 1, 2, 3, 4, 5, 6, 7], header=0, index_col=0, skipfooter=45) global_costs.columns = [ 'SAS', 'SSA', 'MNA', 'EAP', 'LAC', 'ECA', 'YHI' ] paved_ratios = pandas.read_csv(os.path.join(data_path, 'input_data', 'paved_ratios.csv'), index_col=[0, 1]) paved_ratios.index.names = ['ISO3', 'road_type'] paved_ratios = paved_ratios.reset_index() else: curve = pandas.read_excel(os.path.join(data_path, 'input_data', 'Costs_curves.xlsx'), usecols=[1, 2, 3, 4, 5, 6, 7, 8], sheet_name='Flooding', index_col=[0], skipfooter=9, skiprows=1) curve.columns = curve.columns.droplevel(0) # Load all files for which we have intersection data if not rail: all_files = [ os.path.join(data_path, 'output_{}_full'.format(hazard), x) for x in os.listdir( os.path.join(data_path, 'output_{}_full'.format(hazard))) ][:region_count] else: all_files = [ os.path.join(data_path, 'output_{}_rail_full'.format(hazard), x) for x in os.listdir( os.path.join(data_path, 'output_{}_rail_full'.format(hazard))) ][:region_count] # create list with all hazard events if hazard == 'FU': events = [ 'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200', 'FU-250', 'FU-500', 'FU-1000' ] elif hazard == 'PU': events = [ 'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200', 'PU-250', 'PU-500', 'PU-1000' ] elif hazard == 'CF': events = [ 'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500', 'CF-1000' ] # prepare multiprocessing data_paths = [data_path] * len(all_files) events_list = [events] * len(all_files) wbreg_list = [wbreg_lookup] * len(all_files) pav_cost_list = [global_costs] * len(all_files) pav_rat_list = [paved_ratios] * len(all_files) cur_pav_list = [flood_curve_paved] * len(all_files) cur_unpav_list = [flood_curve_unpaved] * len(all_files) hzd_list = [hazard] * len(all_files) # run flood damage sensitivity analysis parallel and save outputs with Pool(cpu_count() - 1) as pool: if not rail: collect_output = pool.starmap(sensitivity.regional_flood, zip(all_files, hzd_list, data_paths, pav_cost_list, pav_rat_list, cur_pav_list, cur_unpav_list, events_list, wbreg_list), chunksize=1) pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'sa_{}_road.csv').format(hazard)) else: collect_output = pool.starmap(sensitivity.regional_flood, zip(all_files, hzd_list, data_paths, pav_cost_list, pav_rat_list, cur_pav_list, cur_unpav_list, events_list, wbreg_list, hzd_list), chunksize=1) pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'sa_{}_rail.csv').format(hazard))
def earthquake_sensitivity(rail=False, region_count=1000): """ Function to perform the caculations for a sensitivity analysis related to earthquake damage to road or railway assets for all regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. *region_count* : Default is **1000**. Change this number if you want to include a different amount of regions. """ # set data path data_path = load_config()['paths']['data'] # load shapefile with country level information global_countries = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_countries.shp')) global_countries.wbregion = global_countries.wbregion.str.replace( 'LCA', 'LAC') global_countries['wbregion'].loc[ global_countries.wbregion.isnull()] = 'YHI' wbreg_lookup = dict( zip(global_countries['ISO_3digit'], global_countries['wbregion'])) # import cost values for different World Bank regions global_costs = pandas.read_excel(os.path.join(data_path, 'input_data', 'Costs_curves.xlsx'), usecols=[0, 1, 2, 3, 4, 5, 6, 7], header=0, index_col=0, skipfooter=45) global_costs.columns = ['SAS', 'SSA', 'MNA', 'EAP', 'LAC', 'ECA', 'YHI'] # read csv file with information on paved and unpaved roads. paved_ratios = pandas.read_csv(os.path.join(data_path, 'input_data', 'paved_ratios.csv'), index_col=[0, 1]) paved_ratios.index.names = ['ISO3', 'road_type'] paved_ratios = paved_ratios.reset_index() # Load all files for which we have intersection data if not rail: all_files = [ os.path.join(data_path, 'output_EQ_full', x) for x in os.listdir(os.path.join(data_path, 'output_EQ_full')) ][:region_count] else: all_files = [ os.path.join(data_path, 'output_EQ_full', x) for x in os.listdir(os.path.join(data_path, 'output_EQ_rail_full')) ][:region_count] # set list of hazard events events = ['EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475'] # prepare multiprocessing data_paths = [data_path] * len(all_files) pav_cost_list = [global_costs] * len(all_files) pav_rat_list = [paved_ratios] * len(all_files) events_list = [events] * len(all_files) wbreg_list = [wbreg_lookup] * len(all_files) rail_list = [rail] * len(all_files) # run earthquake sensitivity analysis parallel and save outputs with Pool(cpu_count() - 1) as pool: collect_output = pool.starmap(sensitivity.regional_earthquake, zip(all_files, data_paths, pav_cost_list, pav_rat_list, events_list, wbreg_list, rail_list), chunksize=1) if not rail: pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'sa_eq_road.csv')) else: pandas.concat(collect_output).to_csv( os.path.join(data_path, 'summarized', 'sa_eq_rail.csv'))
def region_intersection(n, hzd, rail=False): """ Function to intersect all return periods of a particualar hazard with all road or railway assets in the specific region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. *hzd* : abbrevation of the hazard we want to intersect. **EQ** for earthquakes, **Cyc** for cyclones, **FU** for river flooding, **PU** for surface flooding and **CF** for coastal flooding. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. Returns: *output* : a GeoDataFrame with all intersections between the infrastructure assets and the specified hazard. Will also be saved as .feather file. """ # get path where all hazards and data are located data_path = load_config()['paths']['data'] hazard_path = load_config()['paths']['hazard_data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get the name of the region region = x.GID_2 try: # check if we already did the hazard intersection for this region. If so, we dont do it again! if (not rail) & os.path.exists( os.path.join(data_path, 'output_{}_full'.format(hzd), '{}_{}.ft'.format(region, hzd))): print('{} already finished!'.format(region)) return pandas.read_feather( os.path.join( os.path.join(data_path, 'output_{}_full'.format(hzd), '{}_{}.ft'.format(region, hzd)))) elif (rail) & os.path.exists( os.path.join(data_path, 'output_{}_rail_full'.format(hzd), '{}_{}.ft'.format(region, hzd))): print('{} already finished!'.format(region)) return pandas.read_feather( os.path.join( os.path.join(data_path, 'output_{}_rail_full'.format(hzd), '{}_{}.ft'.format(region, hzd)))) # load specifics for the hazard we want to run. if hzd == 'EQ': hzd_name_dir = 'Earthquake' hzd_names = [ 'EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475' ] elif hzd == 'Cyc': hzd_name_dir = 'Cyclones' hzd_names = [ 'Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000' ] elif hzd == 'FU': hzd_name_dir = 'FluvialFlooding' hzd_names = [ 'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200', 'FU-250', 'FU-500', 'FU-1000' ] elif hzd == 'PU': hzd_name_dir = 'PluvialFlooding' hzd_names = [ 'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200', 'PU-250', 'PU-500', 'PU-1000' ] elif hzd == 'CF': hzd_name_dir = 'CoastalFlooding' hzd_names = [ 'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500', 'CF-1000' ] # extract data from OpenStreetMap, either the roads or the railway data. try: if not rail: road_gpd = roads(data_path, region, regional=True) road_dict = map_roads() road_gpd['length'] = road_gpd.geometry.apply(line_length) road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5) road_gpd['road_type'] = road_gpd.infra_type.apply( lambda x: road_dict[x]) infra_gpd = road_gpd.copy() elif rail: rail_gpd = railway(data_path, region, regional=True) rail_gpd['length'] = rail_gpd.geometry.apply(line_length) rail_gpd['geometry'] = rail_gpd.geometry.simplify( tolerance=0.5) infra_gpd = rail_gpd.copy() print('{} osm data loaded!'.format(region)) except: print('{} osm data not properly loaded!'.format(region)) return None # for the global datasets, we can just create a big dataframe with all the hazard polygons # (because the resolution is relatively coarse) if (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'): hazard_path = load_config()['paths']['hazard_data'] hazard_path = os.path.join(hazard_path, hzd_name_dir, 'Global') hzd_list = [ os.path.join(hazard_path, x) for x in os.listdir(hazard_path) ] try: hzds_data = multiple_polygonized(region, x.geometry, hzd_list, hzd_names) except: hzds_data = pandas.DataFrame(columns=['hazard']) for iter_, hzd_name in enumerate(hzd_names): # for the country level datasets, we need to load hazard files in the loop, else we run into RAM problems (and time). if (hzd == 'PU') | (hzd == 'FU'): try: hzds_data = single_polygonized(hzd_name, region, x.geometry, x.ISO_3digit, hzd) hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name] hzd_region.reset_index(inplace=True, drop=True) except: hzd_region = pandas.DataFrame(columns=['hazard']) # for the global datasets, we just extract the individual hazard maps from the DataFrame we created before this loop. elif (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'): try: hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name] hzd_region.reset_index(inplace=True, drop=True) except: hzd_region == pandas.DataFrame(columns=['hazard']) # if there are no hazard values in the region for the specific return period, just give everything zeros. if len(hzd_region) == 0: infra_gpd['length_{}'.format(hzd_name)] = 0 infra_gpd['val_{}'.format(hzd_name)] = 0 continue # now lets intersect the hazard with the ifnrastructure asset and #get the hazard values and intersection lengths for each asset. hzd_reg_sindex = hzd_region.sindex tqdm.pandas(desc=hzd_name + '_' + region) inb = infra_gpd.progress_apply( lambda x: intersect_hazard(x, hzd_reg_sindex, hzd_region), axis=1).copy() inb = inb.apply(pandas.Series) inb.columns = ['geometry', 'val_{}'.format(hzd_name)] inb['length_{}'.format(hzd_name)] = inb.geometry.apply(line_length) # and at the results to the dataframe with all the infrastructure assets. infra_gpd[[ 'length_{}'.format(hzd_name), 'val_{}'.format(hzd_name) ]] = inb[['length_{}'.format(hzd_name), 'val_{}'.format(hzd_name)]] output = infra_gpd.drop(['geometry'], axis=1) output['country'] = global_regions.loc[global_regions['GID_2'] == region]['ISO_3digit'].values[0] output['continent'] = global_regions.loc[global_regions['GID_2'] == region]['continent'].values[0] output['region'] = region # and save output to the designated folder for the hazard. if not rail: output.to_feather( os.path.join(data_path, 'output_{}_full'.format(hzd), '{}_{}.ft'.format(region, hzd))) else: output.to_feather( os.path.join(data_path, 'output_{}_rail_full'.format(hzd), '{}_{}.ft'.format(region, hzd))) print('Finished {}!'.format(region)) return output except Exception as e: print('Failed to finish {} because of {}!'.format(region, e))
def all_outputs(): """ Summarize all outputs into .csv files per hazard and asset type. """ data_path = load_config()['paths']['data'] # Fluvial Flooding get_files = os.listdir(os.path.join(data_path, 'FU_impacts')) with Pool(40) as pool: tot_road_FU = list( tqdm(pool.imap(load_FU_csv, get_files), total=len(get_files))) pd.concat(tot_road_FU, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'FU_road_losses.csv')) get_files = os.listdir(os.path.join(data_path, 'FU_impacts_rail')) with Pool(40) as pool: tot_road_FU = list( tqdm(pool.imap(load_FU_csv_rail, get_files), total=len(get_files))) pd.concat(tot_road_FU, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'FU_rail_losses.csv')) # Pluvial Flooding get_files = os.listdir(os.path.join(data_path, 'PU_impacts')) with Pool(40) as pool: tot_road_PU = list( tqdm(pool.imap(load_PU_csv, get_files), total=len(get_files))) pd.concat(tot_road_PU, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'PU_road_losses.csv')) get_files = os.listdir(os.path.join(data_path, 'PU_impacts_rail')) with Pool(40) as pool: tot_road_PU = list( tqdm(pool.imap(load_PU_csv_rail, get_files), total=len(get_files))) pd.concat(tot_road_PU, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'PU_rail_losses.csv')) # Earthquakes get_files = os.listdir(os.path.join(data_path, 'EQ_impacts')) with Pool(40) as pool: tot_road_EQ = list( tqdm(pool.imap(load_EQ_csv, get_files), total=len(get_files))) pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'EQ_road_losses.csv')) get_files = os.listdir(os.path.join(data_path, 'EQ_impacts_rail')) with Pool(40) as pool: tot_road_EQ = list( tqdm(pool.imap(load_EQ_csv_rail, get_files), total=len(get_files))) pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'EQ_rail_losses.csv')) # Coastal Flooding get_files = os.listdir(os.path.join(data_path, 'CF_impacts')) with Pool(40) as pool: tot_road_CF = list( tqdm(pool.imap(load_CF_csv, get_files), total=len(get_files))) pd.concat(tot_road_CF, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'CF_road_losses.csv')) get_files = os.listdir(os.path.join(data_path, 'CF_impacts_rail')) with Pool(40) as pool: tot_road_CF = list( tqdm(pool.imap(load_CF_csv_rail, get_files), total=len(get_files))) pd.concat(tot_road_CF, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'CF_rail_losses.csv')) # Coastal Flooding get_files = os.listdir(os.path.join(data_path, 'Cyc_impacts')) with Pool(40) as pool: tot_road_Cyc = list( tqdm(pool.imap(load_Cyc_csv, get_files), total=len(get_files))) pd.concat(tot_road_Cyc, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'Cyc_road_losses.csv')) get_files = os.listdir(os.path.join(data_path, 'Cyc_impacts_rail')) with Pool(40) as pool: tot_road_Cyc = list( tqdm(pool.imap(load_Cyc_csv_rail, get_files), total=len(get_files))) pd.concat(tot_road_Cyc, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'Cyc_rail_losses.csv')) # Fluvial get_files = os.listdir(os.path.join(data_path, 'FU_sensitivity')) with Pool(40) as pool: tot_road_EQ = list( tqdm(pool.imap(load_FU_csv_sens, get_files), total=len(get_files))) pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'sa_FU_road_losses.csv')) # # Pluvial get_files = os.listdir(os.path.join(data_path, 'PU_sensitivity')) with Pool(40) as pool: tot_road_EQ = list( tqdm(pool.imap(load_PU_csv_sens, get_files), total=len(get_files))) pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'sa_PU_road_losses.csv')) # # Coastal get_files = os.listdir(os.path.join(data_path, 'CF_sensitivity')) with Pool(40) as pool: tot_road_EQ = list( tqdm(pool.imap(load_CF_csv_sens, get_files), total=len(get_files))) pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'sa_CF_road_losses.csv')) # # Earthquakes get_files = os.listdir(os.path.join(data_path, 'EQ_sensitivity')) with Pool(40) as pool: tot_road_EQ = list( tqdm(pool.imap(load_EQ_csv_sens, get_files), total=len(get_files))) pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'sa_EQ_road_losses.csv')) # # Cyclones get_files = os.listdir(os.path.join(data_path, 'Cyc_sensitivity')) with Pool(40) as pool: tot_road_Cyc = list( tqdm(pool.imap(load_Cyc_csv_sens, get_files), total=len(get_files))) pd.concat(tot_road_Cyc, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'sa_Cyc_road_losses.csv')) # # Bridges get_files = os.listdir(os.path.join(data_path, 'bridge_rail_risk')) with Pool(40) as pool: tot_bridges = list( tqdm(pool.imap(load_bridge_rail_csv, get_files), total=len(get_files))) pd.concat(tot_bridges, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'bridge_rail_risk_.csv')) # # Bridges get_files = os.listdir(os.path.join(data_path, 'bridge_road_risk')) with Pool(40) as pool: tot_bridges = list( tqdm(pool.imap(load_bridge_road_csv, get_files), total=len(get_files))) pd.concat(tot_bridges, sort=True).reset_index(drop=True).to_csv( os.path.join(data_path, 'summarized', 'bridges_road_risk_.csv'))
def all_countries(subset=[], regionalized=False, reversed_order=False): """ Clip all countries from the planet osm file and save them to individual osm.pbf files Optional Arguments: *subset* : allow for a pre-defined subset of countries. REquires ISO3 codes. Will run all countries if left empty. *regionalized* : Default is **False**. Set to **True** if you want to have the regions of a country as well. *reversed_order* : Default is **False**. Set to **True** to work backwards for a second process of the same country set to prevent overlapping calculations. Returns: clipped osm.pbf files for the defined set of countries (either the whole world by default or the specified subset) """ # set data path data_path = load_config()['paths']['data'] # path to planet file planet_path = os.path.join(data_path, 'planet_osm', 'planet-latest.osm.pbf') # global shapefile path if regionalized == True: world_path = os.path.join(data_path, 'input_data', 'global_regions.shp') else: world_path = os.path.join(data_path, 'input_data', 'global_countries.shp') # create poly files for all countries poly_files(data_path, world_path, save_shapefile=False, regionalized=regionalized) # prepare lists for multiprocessing if not os.path.exists(os.path.join(data_path, 'country_poly_files')): os.makedirs(os.path.join(data_path, 'country_poly_files')) if not os.path.exists(os.path.join(data_path, 'country_osm')): os.makedirs(os.path.join(data_path, 'country_osm')) if regionalized == False: get_poly_files = os.listdir( os.path.join(data_path, 'country_poly_files')) if len(subset) > 0: polyPaths = [ os.path.join(data_path, 'country_poly_files', x) for x in get_poly_files if x[:3] in subset ] area_pbfs = [ os.path.join(data_path, 'region_osm_admin1', x.split('.')[0] + '.osm.pbf') for x in get_poly_files if x[:3] in subset ] else: polyPaths = [ os.path.join(data_path, 'country_poly_files', x) for x in get_poly_files ] area_pbfs = [ os.path.join(data_path, 'region_osm_admin1', x.split('.')[0] + '.osm.pbf') for x in get_poly_files ] big_osm_paths = [planet_path] * len(polyPaths) elif regionalized == True: if not os.path.exists(os.path.join(data_path, 'regional_poly_files')): os.makedirs(os.path.join(data_path, 'regional_poly_files')) if not os.path.exists(os.path.join(data_path, 'region_osm')): os.makedirs(os.path.join(data_path, 'region_osm_admin1')) get_poly_files = os.listdir( os.path.join(data_path, 'regional_poly_files')) if len(subset) > 0: polyPaths = [ os.path.join(data_path, 'regional_poly_files', x) for x in get_poly_files if x[:3] in subset ] area_pbfs = [ os.path.join(data_path, 'region_osm_admin1', x.split('.')[0] + '.osm.pbf') for x in get_poly_files if x[:3] in subset ] big_osm_paths = [ os.path.join(data_path, 'country_osm', x[:3] + '.osm.pbf') for x in get_poly_files if x[:3] in subset ] else: polyPaths = [ os.path.join(data_path, 'regional_poly_files', x) for x in get_poly_files ] area_pbfs = [ os.path.join(data_path, 'region_osm_admin1', x.split('.')[0] + '.osm.pbf') for x in get_poly_files ] big_osm_paths = [ os.path.join(data_path, 'country_osm', x[:3] + '.osm.pbf') for x in get_poly_files ] data_paths = [data_path] * len(polyPaths) # allow for reversed order if you want to run two at the same time (convenient to work backwards for the second process, to prevent overlapping calculation) if reversed_order == True: polyPaths = polyPaths[::-1] area_pbfs = area_pbfs[::-1] big_osm_paths = big_osm_paths[::-1] # extract all country osm files through multiprocesing pool = Pool(cpu_count() - 1) pool.starmap(clip_osm, zip(data_paths, big_osm_paths, polyPaths, area_pbfs))
def bridge_intersection(file, rail=False): """ Function to obtain all bridge intersection values from the regional intersection data. To be able to do this, we require all other hazard intersection files to be finished. Arguments: *file* : file with all unique road bridges in a region. Returns: *.feather file* : a geopandas GeoDataframe, saved as .feather file with all intersection values. """ # specify the file path where all data is located. data_path = load_config()['paths']['data'] # obtain the paths for all intersected data for all hazards if not rail: all_EQ_files = [ os.path.join(data_path, 'output_EQ_full', x) for x in os.listdir(os.path.join(data_path, 'output_EQ_full')) ] all_Cyc_files = [ os.path.join(data_path, 'output_Cyc_full', x) for x in os.listdir(os.path.join(data_path, 'output_Cyc_full')) ] all_PU_files = [ os.path.join(data_path, 'output_PU_full', x) for x in os.listdir(os.path.join(data_path, 'output_PU_full')) ] all_FU_files = [ os.path.join(data_path, 'output_FU_full', x) for x in os.listdir(os.path.join(data_path, 'output_FU_full')) ] all_CF_files = [ os.path.join(data_path, 'output_CF_full', x) for x in os.listdir(os.path.join(data_path, 'output_CF_full')) ] else: all_EQ_files = [ os.path.join(data_path, 'output_EQ_rail_full', x) for x in os.listdir(os.path.join(data_path, 'output_EQ_rail_full')) ] all_Cyc_files = [ os.path.join(data_path, 'output_Cyc_rail_full', x) for x in os.listdir( os.path.join(data_path, 'output_Cyc_rail_full')) ] all_PU_files = [ os.path.join(data_path, 'output_PU_rail_full', x) for x in os.listdir(os.path.join(data_path, 'output_PU_rail_full')) ] all_FU_files = [ os.path.join(data_path, 'output_FU_rail_full', x) for x in os.listdir(os.path.join(data_path, 'output_FU_rail_full')) ] all_CF_files = [ os.path.join(data_path, 'output_CF_rail_full', x) for x in os.listdir(os.path.join(data_path, 'output_CF_rail_full')) ] # read the datafile with all bridges in the region we are interested in. df_bridge = pandas.read_csv(file, index_col=[0]) df_bridge['osm_id'] = df_bridge.osm_id.astype(str) # load earthquake intersection file for this region df_EQ = pandas.read_feather( [x for x in all_EQ_files if os.path.split(file)[1][:-6] in x][0]) df_EQ['osm_id'] = df_EQ.osm_id.astype(str) # load cyclone intersection file for this region df_Cyc = pandas.read_feather( [x for x in all_Cyc_files if os.path.split(file)[1][:-6] in x][0]) df_Cyc['osm_id'] = df_Cyc.osm_id.astype(str) # load surface flooding intersection file for this region df_PU = pandas.read_feather( [x for x in all_PU_files if os.path.split(file)[1][:-6] in x][0]) df_PU['osm_id'] = df_PU.osm_id.astype(str) # load river flooding intersection file for this region df_FU = pandas.read_feather( [x for x in all_FU_files if os.path.split(file)[1][:-6] in x][0]) df_FU['osm_id'] = df_FU.osm_id.astype(str) # load coastal flooding intersection file for this region df_CF = pandas.read_feather( [x for x in all_CF_files if os.path.split(file)[1][:-6] in x][0]) df_CF['osm_id'] = df_CF.osm_id.astype(str) # grab all bridges from each of the datasets if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_EQ.osm_id))]) == 0: df_output = pandas.DataFrame(columns=list(df_EQ[[ x for x in list(df_EQ.columns) if ('val' in x) | ('length_' in x) ]].columns), index=df_bridge.index).fillna(0) df_bridge = pandas.concat([df_bridge, df_output], axis=1) else: region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list( df_EQ.osm_id))] df_reg_bridges = df_EQ.loc[df_EQ.osm_id.isin( [str(x) for x in list(region_bridges.osm_id)])] df_bridge = df_bridge.merge(df_reg_bridges[[ x for x in list(df_EQ.columns) if ('val' in x) | ('length_' in x) ] + ['osm_id']], left_on='osm_id', right_on='osm_id', how='left') if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_Cyc.osm_id))]) == 0: df_output = pandas.DataFrame(columns=list(df_Cyc[[ x for x in list(df_Cyc.columns) if ('val' in x) | ('length_' in x) ]].columns), index=df_bridge.index).fillna(0) df_bridge = pandas.concat([df_bridge, df_output], axis=1) else: region_bridges = df_bridge.loc[df_bridge.osm_id.isin( list(df_Cyc.osm_id))] df_reg_bridges = df_Cyc.loc[df_Cyc.osm_id.isin( [str(x) for x in list(region_bridges.osm_id)])] df_bridge = df_bridge.merge(df_reg_bridges[[ x for x in list(df_Cyc.columns) if ('val' in x) | ('length_' in x) ] + ['osm_id']], left_on='osm_id', right_on='osm_id', how='left') if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_FU.osm_id))]) == 0: df_output = pandas.DataFrame(columns=list(df_FU[[ x for x in list(df_FU.columns) if ('val' in x) | ('length_' in x) ]].columns), index=df_bridge.index).fillna(0) df_bridge = pandas.concat([df_bridge, df_output], axis=1) else: region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list( df_FU.osm_id))] df_reg_bridges = df_FU.loc[df_FU.osm_id.isin( [str(x) for x in list(region_bridges.osm_id)])] df_bridge = df_bridge.merge(df_reg_bridges[[ x for x in list(df_FU.columns) if ('val' in x) | ('length_' in x) ] + ['osm_id']], left_on='osm_id', right_on='osm_id', how='left') if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_PU.osm_id))]) == 0: df_output = pandas.DataFrame(columns=list(df_PU[[ x for x in list(df_PU.columns) if ('val' in x) | ('length_' in x) ]].columns), index=df_bridge.index).fillna(0) df_bridge = pandas.concat([df_bridge, df_output], axis=1) else: region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list( df_PU.osm_id))] df_reg_bridges = df_PU.loc[df_PU.osm_id.isin( [str(x) for x in list(region_bridges.osm_id)])] df_bridge = df_bridge.merge(df_reg_bridges[[ x for x in list(df_PU.columns) if ('val' in x) | ('length_' in x) ] + ['osm_id']], left_on='osm_id', right_on='osm_id', how='left') if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_CF.osm_id))]) == 0: df_output = pandas.DataFrame(columns=list(df_CF[[ x for x in list(df_CF.columns) if ('val' in x) | ('length_' in x) ]].columns), index=df_bridge.index).fillna(0) df_bridge = pandas.concat([df_bridge, df_output], axis=1) else: region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list( df_CF.osm_id))] df_reg_bridges = df_CF.loc[df_CF.osm_id.isin( [str(x) for x in list(region_bridges.osm_id)])] df_bridge = df_bridge.merge(df_reg_bridges[[ x for x in list(df_CF.columns) if ('val' in x) | ('length_' in x) ] + ['osm_id']], left_on='osm_id', right_on='osm_id', how='left') df_bridge.drop('geometry', inplace=True, axis=1) # save the intersected bridges to a new file with all hazard intersections. if not rail: df_bridge.to_feather( os.path.join(data_path, 'bridges_osm_roads', '{}.ft'.format(list(df_bridge.region.unique())[0]))) else: df_bridge.to_feather( os.path.join(data_path, 'bridges_osm_rail', '{}.ft'.format(list(df_bridge.region.unique())[0])))
def single_polygonized(flood_scen, region, geometry, country_ISO3, hzd='FU'): """ Function to overlay a surface or river flood hazard map with the infrastructure assets. Arguments: *flood_scen* : Unique ID for the flood scenario to be used. *region* : Unique ID of the region that is intersected. *geometry* : Shapely geometry of the region that is being intersected. *country_ISO3* : ISO3 code of the country in which the region is situated. Required to get the FATHOM flood maps. Optional Arguments: *hzd* : Default is **FU**. Can be changed to **PU** for surface flooding. Returns: *gdf* : A GeoDataFrame where each row is a poylgon with the same flood depth. """ # get path where all hazards all located hazard_path = load_config()['paths']['hazard_data'] # get dictioniary in which we can lookup the name of the country used in the FATHOM flood files. folder_dict = create_folder_lookup() # fix a few things that are still wrong in the data if (country_ISO3 == 'SDN') | (country_ISO3 == 'SSD'): country_full = 'sudan' country_ISO2 = 'SD' else: country_full = folder_dict[country_ISO3] country_ISO2 = coco.convert(names=[country_ISO3], to='ISO2') # create geosjon geometry to do the rasterio masking geoms = [mapping(geometry)] # get the full path name of fluvial or pluvial flooding if hzd == 'FU': flood_type = 'fluvial_undefended' else: flood_type = 'pluvial_undefended' # specify path to the hazard map flood_path = os.path.join(hazard_path, 'InlandFlooding', country_full, '{}_{}_merged'.format(country_ISO2, flood_type), '{}-{}.tif'.format(country_ISO2, flood_scen)) # load hazard map with rasterio and clip it to the area we are interested in. with rasterio.open(flood_path) as src: out_image, out_transform = mask(src, geoms, crop=True) # change points in waterbodies and zeros to -1, so we can easily remove it from the dataset out_image[out_image == 999] = -1 out_image[out_image <= 0] = -1 out_image = numpy.round(out_image, 1) # change to centimeters and integers, substantially reduces the size. out_image = numpy.array(out_image * 100, dtype='int32') # the actual polygonization of the raster map results = ({ 'properties': { 'raster_val': v }, 'geometry': s } for i, (s, v) in enumerate( shapes(out_image[0, :, :], mask=None, transform=out_transform))) # and save to a new geopandas GeoDataFrame gdf = geopandas.GeoDataFrame.from_features(list(results), crs='epsg:4326') gdf = gdf.loc[gdf.raster_val > 0] gdf = gdf.loc[gdf.raster_val < 5000] gdf['geometry'] = gdf.buffer(0) gdf['hazard'] = flood_scen return gdf
def single_country(country, regionalized=False, create_poly_files=False): """ Clip a country from the planet osm file and save to individual osm.pbf files This function has the option to extract individual regions Arguments: *country* : The country for which we want extract the data. Keyword Arguments: *regionalized* : Default is **False**. Set to **True** will parallelize the extraction over all regions within a country. *create_poly_files* : Default is **False**. Set to **True** will create new .poly files. """ # set data path data_path = load_config()['paths']['data'] # path to planet file planet_path = os.path.join(data_path, 'planet_osm', 'planet-latest.osm.pbf') # global shapefile path if regionalized == True: world_path = os.path.join(data_path, 'input_data', 'global_regions.shp') else: world_path = os.path.join(data_path, 'input_data', 'global_countries.shp') # create poly files for all countries if create_poly_files == True: poly_files(data_path, world_path, save_shapefile=False, regionalized=regionalized) if not os.path.exists(os.path.join(data_path, 'country_poly_files')): os.makedirs(os.path.join(data_path, 'country_poly_files')) if not os.path.exists(os.path.join(data_path, 'country_osm')): os.makedirs(os.path.join(data_path, 'country_osm')) ctry_poly = os.path.join(data_path, 'country_poly_files', '{}.poly'.format(country)) ctry_pbf = os.path.join(data_path, 'country_osm', '{}.osm.pbf'.format(country)) if regionalized == False: clip_osm(data_path, planet_path, ctry_poly, ctry_pbf) elif regionalized == True: if (os.path.exists(ctry_pbf) is not True): clip_osm(data_path, planet_path, ctry_poly, ctry_pbf) if not os.path.exists(os.path.join(data_path, 'regional_poly_files')): os.makedirs(os.path.join(data_path, 'regional_poly_files')) if not os.path.exists(os.path.join(data_path, 'region_osm_admin1')): os.makedirs(os.path.join(data_path, 'region_osm_admin1')) get_poly_files = [ x for x in os.listdir(os.path.join(data_path, 'regional_poly_files')) if x.startswith(country) ] polyPaths = [ os.path.join(data_path, 'regional_poly_files', x) for x in get_poly_files ] area_pbfs = [ os.path.join(data_path, 'region_osm_admin1', x.split('.')[0] + '.osm.pbf') for x in get_poly_files ] data_paths = [data_path] * len(polyPaths) planet_paths = [ctry_pbf] * len(polyPaths) # and run all regions parallel to each other pool = Pool(cpu_count() - 1) pool.starmap(clip_osm, zip(data_paths, planet_paths, polyPaths, area_pbfs))
def get_liquefaction_region(n, rail=False): """ Function to intersect all return periods of a particualar hazard with all road or railway assets in the specific region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. Returns: *output* : a GeoDataFrame with all intersections between the infrastructure assets and the liquefaction map. Will be saved as .feather file. """ try: # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get name of the region and the geometry region = x.GID_2 reg_geom = x.geometry # if intersection is already done for this region, stop and move on to the next region. if (not rail) & os.path.exists( os.path.join(data_path, 'liquefaction_road', '{}_liq.ft'.format(region))): print('{} already finished!'.format(region)) return None if (rail) & os.path.exists( os.path.join(data_path, 'liquefaction_rail', '{}_liq.ft'.format(region))): print('{} already finished!'.format(region)) return None # load OpenStreetMap data. if not rail: road_gpd = roads(data_path, region, regional=True) road_dict = map_roads() road_gpd['length'] = road_gpd.geometry.apply(line_length) road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5) road_gpd['road_type'] = road_gpd.infra_type.apply( lambda y: road_dict[y]) infra_gpd = road_gpd.copy() else: rail_gpd = railway(data_path, region, regional=True) rail_gpd['length'] = rail_gpd.geometry.apply(line_length) rail_gpd.geometry = rail_gpd.geometry.simplify(tolerance=0.5) infra_gpd = rail_gpd.copy() # create geosjon geometry to do the rasterio masking geoms = [mapping(reg_geom.envelope.buffer(1))] # extract the raster values values within the polygon with rasterio.open( os.path.join(data_path, 'Hazards', 'Liquefaction', 'Global', 'liquefaction_v1_deg.tif')) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] # change array to integers, to reduce the size of the polygonized GeoDataFrame. out_image[out_image <= 0] = -1 out_image = numpy.array(out_image, dtype='int32') # the actual polygonization of the raster map results = ({ 'properties': { 'raster_val': v }, 'geometry': s } for i, (s, v) in enumerate( shapes(out_image[:, :], mask=None, transform=out_transform))) # and save to a geodataframe gdf = geopandas.GeoDataFrame.from_features(list(results), crs='epsg:4326') gdf['geometry'] = gdf.buffer(0) # now lets intersect the liquefaction map with the infrastructure assets. tqdm.pandas(desc=region) inb = infra_gpd.progress_apply( lambda x: intersect_hazard(x, gdf.sindex, gdf, liquefaction=True), axis=1).copy() inb = inb.apply(pandas.Series) inb.columns = ['geometry', 'liquefaction'] inb['length_liq'] = inb.geometry.apply(line_length) infra_gpd[['length_liq', 'liquefaction']] = inb[['length_liq', 'liquefaction']] output = infra_gpd.drop(['geometry'], axis=1) output['country'] = region[:3] output['continent'] = x.continent output['region'] = region # and save the output to the designated folders. if not rail: output.to_feather( os.path.join(data_path, 'liquefaction_road', '{}_liq.ft'.format(region))) else: output.to_feather( os.path.join(data_path, 'liquefaction_rail', '{}_liq.ft'.format(region))) except Exception as e: print('Failed to finish {} because of {}!'.format(region, e))
def load_Cyc_csv_sens(x): data_path = load_config()['paths']['data'] return pd.read_csv(os.path.join(data_path, 'Cyc_sensitivity', x))
def regional_railway(n, prot_lookup, data_path): """ Function to get summarized exposure values for each region for all railway assets. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. *prot_lookup* : dictionary with dike design standards for a region. *data_path* : file path to location of all data. Returns: *dataframe* : a pandas DataFrame with exposure statistics. """ # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile region = global_regions.iloc[n] print('{} started!'.format(region.GID_2)) try: # load ID and income group for the region ID = region.GID_2 wbincome = region.wbincome # specify all unique hazard abbrevations hazards = ['EQ', 'Cyc', 'PU', 'FU', 'CF'] collect_risks = [] # load regional statistics reg_stats = pandas.read_csv( os.path.join(data_path, 'railway_stats', '{}_stats.csv'.format(ID))) # loop over all hazards for hazard in hazards: try: # read exposure data df = pandas.read_feather( os.path.join(data_path, 'output_{}_rail_full'.format(hazard), '{}_{}.ft'.format(ID, hazard))) except: continue # correct for protection standards for fluval and coastal flooding if (hazard == 'FU') | (hazard == 'CF'): prot_stand = prot_lookup[ID] no_floods = [ x for x in [x for x in df.columns if ('val' in x)] if prot_stand > int(x.split('-')[1]) ] df[no_floods] = 0 # correct for (assumed) design standards for surface flooding if (hazard == 'PU'): if wbincome == 'HIC': df.loc[:, ['val_PU-5', 'val_PU-10', 'val_PU-20', 'val_PU-50' ]] = 0 elif wbincome == 'UMC': df.loc[:, ['val_PU-5', 'val_PU-10', 'val_PU-20']] = 0 else: df.loc[:, [ 'val_PU-5', 'val_PU-10', ]] = 0 # correct for (assumed) design standards for river flooding if (hazard == 'FU'): if wbincome == 'HIC': df.loc[:, ['val_FU-5', 'val_FU-10', 'val_FU-20', 'val_FU-50' ]] = 0 elif wbincome == 'UMC': df.loc[:, [ 'val_FU-5', 'val_FU-10', 'val_FU-20', ]] = 0 else: df.loc[:, ['val_FU-5', 'val_FU-10']] = 0 # correct for (assumed) design standards for coastal flooding if (hazard == 'CF'): if wbincome == 'HIC': df.loc[:, [ 'val_CF-10', 'val_CF-20', 'val_CF-50', ]] = 0 elif wbincome == 'UMC': df.loc[:, ['val_CF-10', 'val_CF-20']] = 0 else: df.loc[:, ['val_CF-10']] = 0 if hazard == 'EQ': reg_df = df.copy() elif hazard != 'EQ': reg_df = reg_df.merge(df[[ x for x in df.columns if ('val_' in x) | ('length_' in x) ] + ['osm_id']], left_on='osm_id', right_on='osm_id') # something went wrong in the order of the azard maps, correct that here. if hazard == 'EQ': event_list = [ 'EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475' ] # RPS = [1 / 250, 1 / 475, 1 / 975, 1 / 1500, 1 / 2475] cat_list = [1, 2, 3, 4] bins = [-1, 92, 180, 340, 650, 2000] df = df.rename( { 'val_EQ_rp250': 'val_EQ_rp475', 'val_EQ_rp475': 'val_EQ_rp1500', 'val_EQ_rp975': 'val_EQ_rp250', 'val_EQ_rp1500': 'val_EQ_rp2475', 'val_EQ_rp2475': 'val_EQ_rp975', 'length_EQ_rp250': 'length_EQ_rp475', 'length_EQ_rp475': 'length_EQ_rp1500', 'length_EQ_rp975': 'length_EQ_rp250', 'length_EQ_rp1500': 'length_EQ_rp2475', 'length_EQ_rp2475': 'length_EQ_rp975', }, axis='columns') elif hazard == 'Cyc': event_list = [ 'Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000' ] RPS = [1 / 50, 1 / 100, 1 / 250, 1 / 500, 1 / 1000] cat_list = [1, 2, 3, 4] bins = [-1, 154, 178, 209, 252, 1000] df = df.rename( { 'val_Cyc_rp100': 'val_Cyc_rp1000', 'val_Cyc_rp500': 'val_Cyc_rp100', 'val_Cyc_rp1000': 'val_Cyc_rp500', 'length_Cyc_rp100': 'length_Cyc_rp1000', 'length_Cyc_rp500': 'length_Cyc_rp100', 'length_Cyc_rp1000': 'length_Cyc_rp500' }, axis='columns') elif hazard == 'FU': event_list = [ 'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200', 'FU-250', 'FU-500', 'FU-1000' ] RPS = [ 1 / 5, 1 / 10, 1 / 20, 1 / 50, 1 / 75, 1 / 100, 1 / 200, 1 / 250, 1 / 500, 1 / 1000 ] cat_list = [1, 2, 3, 4] bins = [-1, 25, 50, 100, 200, 2000] elif hazard == 'PU': event_list = [ 'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200', 'PU-250', 'PU-500', 'PU-1000' ] RPS = [ 1 / 5, 1 / 10, 1 / 20, 1 / 50, 1 / 75, 1 / 100, 1 / 200, 1 / 250, 1 / 500, 1 / 1000 ] cat_list = [1, 2, 3, 4] bins = [-1, 25, 50, 100, 200, 2000] elif hazard == 'CF': event_list = [ 'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500', 'CF-1000' ] RPS = [ 1 / 10, 1 / 20, 1 / 50, 1 / 100, 1 / 200, 1 / 500, 1 / 1000 ] cat_list = [1, 2, 3, 4] bins = [-1, 25, 50, 100, 200, 2000] # calculate the annual kilometers of total possible roads for each asset reg_stats[hazard] = reg_stats.apply( lambda x: total_length_risk(x, RPS), axis=1) # bin this into the four risk categories, as specified in the Supplementary Materials of Koks et al. (2019) for event in event_list: reg_df['binned_{}'.format(event)] = pandas.cut( reg_df['val_{}'.format(event)], bins=bins, labels=[0] + cat_list) get_all_cats = [] # calculate the annual exposed kilometers of road per risk category per asset type for cat in cat_list[:]: get_all_events = [] for event in event_list: event_sep = reg_df.loc[reg_df['binned_{}'.format( event)] == cat][[ 'length_{}'.format(event), 'country', 'region', 'continent', 'infra_type' ]] cont_out = pandas.DataFrame( event_sep.groupby( ['continent', 'country', 'region', 'infra_type'])['length_{}'.format(event)].sum()) get_all_events.append(cont_out) cat_df = pandas.concat(get_all_events, axis=1) cat_df = cat_df.fillna(0) if len(cat_df) == 0: cat_df = pandas.DataFrame( columns=list(cat_df.columns) + ['risk_{}_{}'.format(cat, hazard)], index=df.groupby( ['continent', 'country', 'region', 'infra_type']).sum().index).fillna(0) else: cat_df['risk_{}_{}'.format(cat, hazard)] = cat_df.apply( lambda x: exposed_length_risk(x, hazard, RPS), axis=1) cat_df.loc[ cat_df['risk_{}_{}'.format(cat, hazard)] < 0] = 0 cat_df.reset_index(inplace=True) get_all_cats.append( cat_df.groupby([ 'continent', 'country', 'region', 'infra_type' ]).sum()['risk_{}_{}'.format(cat, hazard)]) collect_risks.append(pandas.concat(get_all_cats, axis=1).fillna(0)) # return results to be saved in one big file for all regions combined return (pandas.concat(collect_risks, axis=1).fillna(0)) except Exception as e: print('Failed to finish {} because of {}!'.format(region.GID_2, e))
def load_EQ_csv(x): data_path = load_config()['paths']['data'] return pd.read_csv(os.path.join(data_path, 'EQ_impacts', x))
def merge_SSBN_maps(country): """ Function to merge SSBN maps to a country level. Arguments: *country* : ISO3 code of the country for which we want to merge the river and surface flood maps to country level. """ try: print('{} started!'.format(country)) # get path where all hazards all located hazard_path = load_config()['paths']['hazard_data'] # get dictioniary in which we can lookup the name of the country used in the FATHOM flood files. folder_lookup = create_folder_lookup() # get ISO2 and full country names for each country country_ISO2 = coco.convert(names=[country], to='ISO2') country_full = folder_lookup[country] rps = ['5', '10', '20', '50', '75', '100', '200', '250', '1000'] flood_types = ['fluvial_undefended', 'pluvial_undefended'] flood_types_abb = ['FU', 'PU'] flood_mapping = dict(zip(flood_types, flood_types_abb)) # merge all subcountry files into one country file for each hazard. for flood_type in flood_types: new_folder = os.path.join( hazard_path, 'InlandFlooding', country_full, '{}_{}_merged'.format(country_ISO2, flood_type)) try: os.mkdir(new_folder) except: None path_to_all_files = os.path.join( hazard_path, 'InlandFlooding', country_full, '{}_{}'.format(country_ISO2, flood_type)) full_paths = [ os.path.join(path_to_all_files, x) for x in os.listdir(path_to_all_files) if x.endswith('.tif') ] for rp in tqdm(rps, desc=flood_type + '_' + country, leave=False, total=len(rps), unit='rp'): get_one_rp = [x for x in full_paths if '-{}-'.format(rp) in x] stringlist_rp = ' '.join(get_one_rp) rp_out = os.path.join( new_folder, '{}-{}-{}.tif'.format(country_ISO2, flood_mapping[flood_type], rp)) os.system( 'gdal_merge.py -q -o {} {} -co COMPRESS=LZW -co BIGTIFF=YES -co PREDICTOR=2 -co TILED=YES' .format(rp_out, stringlist_rp)) print('{} finished!'.format(country)) except: print( '{} failed! It seems we do not have proper flood data for this country.' .format(country))
def load_Cyc_csv_rail(x): data_path = load_config()['paths']['data'] return pd.read_csv(os.path.join(data_path, 'Cyc_impacts_rail', x))
def get_tree_density(n, rail=False): """ Function to intersect all return periods of a particualar hazard with all road or railway assets in the specific region. Arguments: *n* : the index ID of a region in the specified shapefile with all the regions. Optional Arguments: *rail* : Default is **False**. Set to **True** if you would like to intersect the railway assets in a region. Returns: *output* : a GeoDataFrame with all intersections between the infrastructure assets and the liquefaction map. Will be saved as .feather file. """ try: # specify the file path where all data is located. data_path = load_config()['paths']['data'] # load shapefile with unique information for each region global_regions = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_regions_v2.shp')) # grab the row of the region from the global region shapefile x = global_regions.iloc[n] # get name of the region and the geometry region = x.GID_2 reg_geom = x.geometry # load OpenStreetMap data. if not rail: road_gpd = roads(data_path, region, regional=True) road_dict = map_roads() road_gpd['road_type'] = road_gpd.infra_type.apply( lambda y: road_dict[y]) infra_gpd = road_gpd.copy() else: rail_gpd = railway(data_path, region, regional=True) infra_gpd = rail_gpd.copy() # create geosjon geometry to do the rasterio masking geoms = [mapping(reg_geom.envelope.buffer(1))] # extract the raster values values within the polygon with rasterio.open( os.path.join( data_path, 'input_data', 'Crowther_Nature_Biome_Revision_01_WGS84_GeoTiff.tif') ) as src: out_image, out_transform = mask(src, geoms, crop=True) out_image = out_image[0, :, :] # grab the tree density value for the road by using a point query tqdm.pandas(desc='Tree Density' + region) infra_gpd['Tree_Dens'] = infra_gpd.centroid.progress_apply( lambda x: get_raster_value(x, out_image, out_transform)) infra_gpd['Tree_Dens'] = infra_gpd['Tree_Dens'].astype(float) infra_gpd['region'] = region infra_gpd = infra_gpd.drop('geometry', axis=1) # and save the output to the designated folders. if not rail: pandas.DataFrame(infra_gpd).to_feather( os.path.join(data_path, 'tree_cover_road', '{}.ft'.format(region))) else: pandas.DataFrame(infra_gpd).to_feather( os.path.join(data_path, 'tree_cover_rail', '{}.ft'.format(region))) print('{} finished!'.format(region)) except: print('{} failed!'.format(region))
def load_bridge_road_csv(file): data_path = load_config()['paths']['data'] return pd.read_csv(os.path.join(data_path, 'bridge_road_risk', file))
def global_shapefiles(regionalized=False): """ This function will simplify shapes and add necessary columns, to make further processing more quickly For now, we will make use of the latest GADM data: https://gadm.org/download_world.html Optional Arguments: *regionalized* : Default is **False**. Set to **True** will also create the global_regions.shp file. """ data_path = load_config()['paths']['data'] # path to country GADM file if regionalized == False: # load country file country_gadm_path = os.path.join(data_path, 'GADM36', 'gadm36_0.shp') gadm_level0 = geopandas.read_file(country_gadm_path) # remove antarctica, no roads there anyways gadm_level0 = gadm_level0.loc[~gadm_level0['NAME_0']. isin(['Antarctica'])] # remove tiny shapes to reduce size substantially gadm_level0['geometry'] = gadm_level0.apply(remove_tiny_shapes, axis=1) # simplify geometries gadm_level0['geometry'] = gadm_level0.simplify( tolerance=0.005, preserve_topology=True).buffer(0.01).simplify( tolerance=0.005, preserve_topology=True) # add additional info glob_info_path = os.path.join(data_path, 'input_data', 'global_information.xlsx') load_glob_info = pandas.read_excel(glob_info_path) gadm_level0 = gadm_level0.merge(load_glob_info, left_on='GID_0', right_on='ISO_3digit') #save to new country file glob_ctry_path = os.path.join(data_path, 'input_data', 'global_countries.shp') gadm_level0.to_file(glob_ctry_path) else: # this is dependent on the country file, so check whether that one is already created: glob_ctry_path = os.path.join(data_path, 'input_data', 'global_countries.shp') if os.path.exists(glob_ctry_path): gadm_level0 = geopandas.read_file( os.path.join(data_path, 'input_data', 'global_countries.shp')) else: print('ERROR: You need to create the country file first') return None # load region file region_gadm_path = os.path.join(data_path, 'GADM36', 'gadm36_2.shp') gadm_level1 = geopandas.read_file(region_gadm_path) # remove tiny shapes to reduce size substantially gadm_level1['geometry'] = gadm_level1.apply(remove_tiny_shapes, axis=1) # simplify geometries gadm_level1['geometry'] = gadm_level1.simplify( tolerance=0.005, preserve_topology=True).buffer(0.01).simplify( tolerance=0.005, preserve_topology=True) # add additional info glob_info_path = os.path.join(data_path, 'input_data', 'global_information.xlsx') load_glob_info = pandas.read_excel(glob_info_path) gadm_level1 = gadm_level1.merge(load_glob_info, left_on='GID_0', right_on='ISO_3digit') gadm_level1.rename(columns={'coordinates': 'coordinate'}, inplace=True) # add some missing geometries from countries with no subregions get_missing_countries = list( set(list(gadm_level0.GID_0.unique())).difference( list(gadm_level1.GID_0.unique()))) mis_country = gadm_level0.loc[gadm_level0['GID_0'].isin( get_missing_countries)] # mis_country['GID_1'] = mis_country['GID_0'] + '_' + str(0) + '_' + str( 1) gadm_level1 = geopandas.GeoDataFrame( pandas.concat([gadm_level1, mis_country], ignore_index=True)) gadm_level1.reset_index(drop=True, inplace=True) #save to new country file gadm_level1.to_file( os.path.join(data_path, 'input_data', 'global_regions.shp'))