def hit_country_per_hazard(intensity_path, names_path, reg_id_path, date_path): """hit_country_per_hazard: create list of hit countries from hazard set Parameters ---------- intensity_path : str Path to file containing sparse matrix with hazards as rows and grid points as cols, values only at location with impacts names_path : str Path to file with identifier for each hazard (i.e. IBtracID) (rows of the matrix) reg_id_path : str Path to file with ISO country ID of each grid point (cols of the matrix) date_path : str Path to file with start date of each hazard (rows of the matrix) Returns ------- pd.DataFrame with all hit countries per hazard """ with open(intensity_path, 'rb') as filef: inten = pickle.load(filef) with open(names_path, 'rb') as filef: names = pickle.load(filef) with open(reg_id_path, 'rb') as filef: reg_id = pickle.load(filef) with open(date_path, 'rb') as filef: date = pickle.load(filef) # loop over the tracks (over the rows of the intensity matrix) all_hits = [] for track in range(0, len(names)): # select track tc_track = inten[track, ] # select only indices that are not zero hits = tc_track.nonzero()[1] # get the country of these indices and remove dublicates hits = list(set(reg_id[hits])) # append hit countries to list all_hits.append(hits) # create data frame for output hit_countries = pd.DataFrame( columns=['hit_country', 'Date_start', 'ibtracsID']) for track, _ in enumerate(names): # Check if track has hit any country else go to the next track if len(all_hits[track]) > 0: # loop over hit_country for hit in range(0, len(all_hits[track])): # Hit country ISO ctry_iso = u_coord.country_to_iso(all_hits[track][hit], "alpha3") # create entry for each country a hazard has hit hit_countries = hit_countries.append( { 'hit_country': ctry_iso, 'Date_start': date[track], 'ibtracsID': names[track] }, ignore_index=True) # retrun data frame with all hit countries per hazard return hit_countries
def __init__(self, hazard_dict, exposure, impact_funcs, haz_model="NWP", exposure_name=None): """Initialization with hazard, exposure and vulnerability. Parameters ---------- hazard_dict : dict Dictionary of the format {run_datetime: Hazard} with run_datetime being the initialization time of a weather forecast run and Hazard being a CLIMADA Hazard derived from that forecast for one event. A probabilistic representation of that one event is possible, as long as the attribute Hazard.date is the same for all events. Several run_datetime:Hazard combinations for the same event can be provided. exposure : Exposure impact_funcs : ImpactFuncSet haz_model : str, optional Short string specifying the model used to create the hazard, if possible three big letters. Default is 'NWP' for numerical weather prediction. exposure_name : str, optional string specifying the exposure (e.g. 'EU'), which is used to name output files. """ self.run_datetime = list(hazard_dict.keys()) self.hazard = list(hazard_dict.values()) # check event_date hazard_date = np.unique( [date for hazard in self.hazard for date in hazard.date]) if not len(hazard_date) == 1: raise ValueError( "Please provide hazards containing only one " + "event_date. The current hazards contain several " + "events with different event_dates and the Forecast " + "class cannot function proberly with such hazards.") self.event_date = dt.datetime.fromordinal(hazard_date[0]) self.haz_model = haz_model self.exposure = exposure if exposure_name is None: try: self.exposure_name = u_coord.country_to_iso( exposure.gdf.region_id.unique()[0], "name") except (KeyError, AttributeError): self.exposure_name = "custom" else: self.exposure_name = exposure_name self.vulnerability = impact_funcs self._impact = [Impact() for dt in self.run_datetime]
def _map_exp_to_mriot(self, exp_regid, mriot_type): """ Map regions names in exposure into Input-output regions names. exp_regid must be according to ISO 3166 numeric country codes. """ if mriot_type == 'WIOD': mriot_reg_name = u_coord.country_to_iso(exp_regid, "alpha3") idx_country = np.where(self.mriot_reg_names == mriot_reg_name)[0] if not idx_country.size > 0.: mriot_reg_name = 'ROW' elif mriot_type == '': mriot_reg_name = exp_regid return mriot_reg_name
def emdat_countries_by_hazard(emdat_file_csv, hazard=None, year_range=None): """return list of all countries exposed to a chosen hazard type from EMDAT data as CSV. Parameters ---------- emdat_file : str, Path, or DataFrame Either string with full path to CSV-file or pandas.DataFrame loaded from EM-DAT CSV hazard : list or str List of Disaster (sub-)type accordung EMDAT terminology, i.e.: Animal accident, Drought, Earthquake, Epidemic, Extreme temperature, Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry), Storm, Volcanic activity, Wildfire; Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone, Tsunami, etc.; OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc. year_range : list or tuple Year range to be extracted, e.g. (2000, 2015); (only min and max are considered) Returns ------- countries_iso3a : list List of ISO3-codes of countries impacted by the disaster (sub-)types countries_names : list List of names of countries impacted by the disaster (sub-)types """ df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range) countries_iso3a = list(df_data.ISO.unique()) countries_names = list() for iso3a in countries_iso3a: try: countries_names.append(u_coord.country_to_iso(iso3a, "name")) except LookupError: countries_names.append('NA') return countries_iso3a, countries_names
def emdat_to_impact(emdat_file_csv, hazard_type_climada, year_range=None, countries=None, hazard_type_emdat=None, reference_year=None, imp_str="Total Damages"): """function to load EM-DAT data return impact per event Parameters ---------- emdat_file_csv : str or pd.DataFrame Either string with full path to CSV-file or pandas.DataFrame loaded from EM-DAT CSV countries : list of str country ISO3-codes or names, e.g. ['JAM', 'CUB']. default: countries=None for all countries hazard_type_climada : list or str List of Disaster (sub-)type accordung EMDAT terminology, i.e.: Animal accident, Drought, Earthquake, Epidemic, Extreme temperature, Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry), Storm, Volcanic activity, Wildfire; Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone, Tsunami, etc.; OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc. year_range : list or tuple Year range to be extracted, e.g. (2000, 2015); (only min and max are considered) reference_year : int reference year of exposures. Impact is scaled proportional to GDP to the value of the reference year. Default: No scaling for 0 imp_str : str Column name of impact metric in EMDAT CSV, default = "Total Damages ('000 US$)" Returns ------- impact_instance : instance of climada.engine.Impact impact object of same format as output from CLIMADA impact computation. Values scaled with GDP to reference_year if reference_year is given. i.e. current US$ for imp_str="Total Damages ('000 US$) scaled" (factor 1000 is applied) impact_instance.eai_exp holds expected annual impact for each country. impact_instance.coord_exp holds rough central coordinates for each country. countries (list): ISO3-codes of countries in same order as in impact_instance.eai_exp """ if "Total Damages" in imp_str: imp_str = "Total Damages ('000 US$)" elif "Insured Damages" in imp_str: imp_str = "Insured Damages ('000 US$)" elif "Reconstruction Costs" in imp_str: imp_str = "Reconstruction Costs ('000 US$)" imp_str = VARNAMES_EMDAT[max(VARNAMES_EMDAT.keys())][imp_str] if not hazard_type_emdat: hazard_type_emdat = [hazard_type_climada] if reference_year == 0: reference_year = None # Inititate Impact-instance: impact_instance = Impact() impact_instance.tag = dict() impact_instance.tag['haz'] = TagHaz( haz_type=hazard_type_climada, file_name=emdat_file_csv, description='EM-DAT impact, direct import') impact_instance.tag['exp'] = Tag( file_name=emdat_file_csv, description='EM-DAT impact, direct import') impact_instance.tag['impf_set'] = Tag(file_name=None, description=None) # Load EM-DAT impact data by event: em_data = emdat_impact_event(emdat_file_csv, countries=countries, hazard=hazard_type_emdat, year_range=year_range, reference_year=reference_year, imp_str=imp_str, version=max(VARNAMES_EMDAT.keys())) if isinstance(countries, str): countries = [countries] elif not countries: countries = emdat_countries_by_hazard(emdat_file_csv, year_range=year_range, hazard=hazard_type_emdat)[0] if em_data.empty: return impact_instance, countries impact_instance.event_id = np.array(em_data.index, int) impact_instance.event_name = list(em_data[VARNAMES_EMDAT[max( VARNAMES_EMDAT.keys())]['Dis No']]) date_list = list() for year in list(em_data['Year']): date_list.append(datetime.toordinal(datetime.strptime(str(year), '%Y'))) if 'Start Year' in em_data.columns and 'Start Month' in em_data.columns \ and 'Start Day' in em_data.columns: idx = 0 for year, month, day in zip(em_data['Start Year'], em_data['Start Month'], em_data['Start Day']): if np.isnan(year): idx += 1 continue if np.isnan(month): month = 1 if np.isnan(day): day = 1 date_list[idx] = datetime.toordinal( datetime.strptime('%02i/%02i/%04i' % (day, month, year), '%d/%m/%Y')) idx += 1 impact_instance.date = np.array(date_list, int) impact_instance.crs = DEF_CRS if not reference_year: impact_instance.at_event = np.array(em_data["impact"]) else: impact_instance.at_event = np.array(em_data["impact_scaled"]) impact_instance.at_event[np.isnan(impact_instance.at_event)] = 0 if not year_range: year_range = [em_data['Year'].min(), em_data['Year'].max()] impact_instance.frequency = np.ones( em_data.shape[0]) / (1 + np.diff(year_range)) impact_instance.tot_value = 0 impact_instance.aai_agg = np.nansum(impact_instance.at_event * impact_instance.frequency) impact_instance.unit = 'USD' impact_instance.imp_mat = [] # init rough exposure with central point per country shp = shapereader.natural_earth(resolution='110m', category='cultural', name='admin_0_countries') shp = shapereader.Reader(shp) countries_reg_id = list() countries_lat = list() countries_lon = list() impact_instance.eai_exp = np.zeros( len(countries)) # empty: damage at exposure for idx, cntry in enumerate(countries): try: cntry = u_coord.country_to_iso(cntry, "alpha3") except LookupError: LOGGER.warning('Country not found in iso_country: %s', cntry) cntry_boolean = False for rec in shp.records(): if rec.attributes['ADM0_A3'].casefold() == cntry.casefold(): bbox = rec.geometry.bounds cntry_boolean = True break if cntry_boolean: countries_lat.append(np.mean([bbox[1], bbox[3]])) countries_lon.append(np.mean([bbox[0], bbox[2]])) else: countries_lat.append(np.nan) countries_lon.append(np.nan) try: countries_reg_id.append(u_coord.country_to_iso(cntry, "numeric")) except LookupError: countries_reg_id.append(0) df_tmp = em_data[em_data[VARNAMES_EMDAT[max( VARNAMES_EMDAT.keys())]['ISO']].str.contains(cntry)] if not reference_year: impact_instance.eai_exp[idx] = sum( np.array(df_tmp["impact"]) * impact_instance.frequency[0]) else: impact_instance.eai_exp[idx] = sum( np.array(df_tmp["impact_scaled"]) * impact_instance.frequency[0]) impact_instance.coord_exp = np.stack([countries_lat, countries_lon], axis=1) return impact_instance, countries
def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=None, reference_year=None, imp_str="Total Damages ('000 US$)", version=2020): """function to load EM-DAT data return impact per event Parameters ---------- emdat_file_csv : str or DataFrame Either string with full path to CSV-file or pandas.DataFrame loaded from EM-DAT CSV countries : list of str country ISO3-codes or names, e.g. ['JAM', 'CUB']. default: countries=None for all countries hazard : list or str List of Disaster (sub-)type accordung EMDAT terminology, i.e.: Animal accident, Drought, Earthquake, Epidemic, Extreme temperature, Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry), Storm, Volcanic activity, Wildfire; Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone, Tsunami, etc.; OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc. year_range : list or tuple Year range to be extracted, e.g. (2000, 2015); (only min and max are considered) reference_year : int reference year of exposures. Impact is scaled proportional to GDP to the value of the reference year. Default: No scaling for 0 imp_str : str Column name of impact metric in EMDAT CSV, default = "Total Damages ('000 US$)" version : int EM-DAT version to take variable/column names from (defaul: 2020) Returns ------- out : pd.DataFrame EMDAT DataFrame with new columns "year", "region_id", and "impact" and +impact_scaled" total impact per event with same unit as chosen impact, but multiplied by 1000 if impact is given as 1000 US$ (e.g. imp_str="Total Damages ('000 US$) scaled"). """ imp_str = VARNAMES_EMDAT[version][imp_str] df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range, countries=countries, target_version=version) df_data['year'] = df_data['Year'] df_data['reference_year'] = reference_year df_data['impact'] = df_data[imp_str] df_data['impact_scaled'] = scale_impact2refyear( df_data[imp_str].values, df_data.Year.values, df_data.ISO.values, reference_year=reference_year) df_data['region_id'] = np.nan for country in df_data.ISO.unique(): try: df_data.loc[df_data.ISO == country, 'region_id'] = \ u_coord.country_to_iso(country, "numeric") except LookupError: LOGGER.warning('ISO3alpha code not found in iso_country: %s', country) if '000 US' in imp_str: df_data['impact'] *= 1e3 df_data['impact_scaled'] *= 1e3 return df_data.reset_index(drop=True)
def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_range=None, reference_year=None, imp_str="Total Damages ('000 US$)", version=2020): """function to load EM-DAT data and sum impact per year Parameters ---------- emdat_file_csv : str or DataFrame Either string with full path to CSV-file or pandas.DataFrame loaded from EM-DAT CSV countries : list of str country ISO3-codes or names, e.g. ['JAM', 'CUB']. countries=None for all countries (default) hazard : list or str List of Disaster (sub-)type accordung EMDAT terminology, i.e.: Animal accident, Drought, Earthquake, Epidemic, Extreme temperature, Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry), Storm, Volcanic activity, Wildfire; Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone, Tsunami, etc.; OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc. year_range : list or tuple Year range to be extracted, e.g. (2000, 2015); (only min and max are considered) version : int required EM-DAT data format version (i.e. year of download), changes naming of columns/variables (default: 2020) Returns ------- out : pd.DataFrame DataFrame with summed impact and scaled impact per year and country. """ imp_str = VARNAMES_EMDAT[version][imp_str] df_data = clean_emdat_df(emdat_file_csv, countries=countries, hazard=hazard, year_range=year_range, target_version=version) df_data[imp_str + " scaled"] = scale_impact2refyear( df_data[imp_str].values, df_data.Year.values, df_data.ISO.values, reference_year=reference_year) out = pd.DataFrame(columns=[ 'ISO', 'region_id', 'year', 'impact', 'impact_scaled', 'reference_year' ]) for country in df_data.ISO.unique(): country = u_coord.country_to_iso(country, "alpha3") if not df_data.loc[df_data.ISO == country].size: continue all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1) data_out = pd.DataFrame(index=np.arange(0, len(all_years)), columns=out.columns) df_country = df_data.loc[df_data.ISO == country] for cnt, year in enumerate(all_years): data_out.loc[cnt, 'year'] = year data_out.loc[cnt, 'reference_year'] = reference_year data_out.loc[cnt, 'ISO'] = country data_out.loc[cnt, 'region_id'] = u_coord.country_to_iso( country, "numeric") data_out.loc[cnt, 'impact'] = \ np.nansum(df_country[df_country.Year.isin([year])][imp_str]) data_out.loc[cnt, 'impact_scaled'] = \ np.nansum(df_country[df_country.Year.isin([year])][imp_str + " scaled"]) if '000 US' in imp_str: # EM-DAT damages provided in '000 USD data_out.loc[cnt, 'impact'] = data_out.loc[cnt, 'impact'] * 1e3 data_out.loc[ cnt, 'impact_scaled'] = data_out.loc[cnt, 'impact_scaled'] * 1e3 out = pd.concat([out, data_out]) out = out.reset_index(drop=True) return out
def clean_emdat_df(emdat_file, countries=None, hazard=None, year_range=None, target_version=2020): """ Get a clean and standardized DataFrame from EM-DAT-CSV-file (1) load EM-DAT data from CSV to DataFrame and remove header/footer, (2) handle version, clean up, and add columns, and (3) filter by country, hazard type and year range (if any given) Parameters ---------- emdat_file : str, Path, or DataFrame Either string with full path to CSV-file or pandas.DataFrame loaded from EM-DAT CSV countries : list of str country ISO3-codes or names, e.g. ['JAM', 'CUB']. countries=None for all countries (default) hazard : list or str List of Disaster (sub-)type accordung EMDAT terminology, i.e.: Animal accident, Drought, Earthquake, Epidemic, Extreme temperature, Flood, Fog, Impact, Insect infestation, Landslide, Mass movement (dry), Storm, Volcanic activity, Wildfire; Coastal Flooding, Convective Storm, Riverine Flood, Tropical cyclone, Tsunami, etc.; OR CLIMADA hazard type abbreviations, e.g. TC, BF, etc. year_range : list or tuple Year range to be extracted, e.g. (2000, 2015); (only min and max are considered) target_version : int required EM-DAT data format version (i.e. year of download), changes naming of columns/variables (default: 2020) Returns ------- df_data : pd.DataFrame DataFrame containing cleaned and filtered EM-DAT impact data """ # (1) load EM-DAT data from CSV to DataFrame, skipping the header: if isinstance(emdat_file, (str, Path)): df_emdat = pd.read_csv(emdat_file, encoding="ISO-8859-1", header=0) counter = 0 while not ('Country' in df_emdat.columns and 'ISO' in df_emdat.columns): counter += 1 df_emdat = pd.read_csv(emdat_file, encoding="ISO-8859-1", header=counter) if counter == 10: break del counter elif isinstance(emdat_file, pd.DataFrame): df_emdat = emdat_file else: raise TypeError('emdat_file needs to be str or DataFrame') # drop rows with 9 or more NaN values (e.g. footer): df_emdat = df_emdat.dropna(thresh=9) # (2) handle version, clean up, and add columns: # (2.1) identify underlying EMDAT version of csv: version = 2020 for vers in list(VARNAMES_EMDAT.keys()): if len(df_emdat.columns) >= len(VARNAMES_EMDAT[vers]) and \ all(item in list(df_emdat.columns) for item in VARNAMES_EMDAT[vers].values()): version = vers # (2.2) create new DataFrame df_data with column names as target version df_data = pd.DataFrame(index=df_emdat.index.values, columns=VARNAMES_EMDAT[target_version].values()) if 'Year' not in df_data.columns: # make sure column "Year" exists df_data['Year'] = np.nan for _, col in enumerate(df_data.columns): # loop over columns if col in VARNAMES_EMDAT[version]: df_data[col] = df_emdat[VARNAMES_EMDAT[version][col]] elif col in df_emdat.columns: df_data[col] = df_emdat[col] elif col == 'Year' and version <= 2018: years_list = list() for _, disaster_no in enumerate( df_emdat[VARNAMES_EMDAT[version]['Dis No']]): if isinstance(disaster_no, str): years_list.append(int(disaster_no[0:4])) else: years_list.append(np.nan) df_data[col] = years_list if version <= 2018 and target_version >= 2020: date_list = list() year_list = list() month_list = list() day_list = list() for year in list(df_data['Year']): if not np.isnan(year): date_list.append(datetime.strptime(str(year), '%Y')) else: date_list.append(datetime.strptime(str('0001'), '%Y')) boolean_warning = True for idx, datestr in enumerate(list(df_emdat['Start date'])): try: date_list[idx] = datetime.strptime(datestr[-7:], '%m/%Y') except ValueError: if boolean_warning: LOGGER.warning('EM_DAT CSV contains invalid time formats') boolean_warning = False try: date_list[idx] = datetime.strptime(datestr, '%d/%m/%Y') except ValueError: if boolean_warning: LOGGER.warning('EM_DAT CSV contains invalid time formats') boolean_warning = False day_list.append(date_list[idx].day) month_list.append(date_list[idx].month) year_list.append(date_list[idx].year) df_data['Start Month'] = np.array(month_list, dtype='int') df_data['Start Day'] = np.array(day_list, dtype='int') df_data['Start Year'] = np.array(year_list, dtype='int') for var in ['Disaster Subtype', 'Disaster Type', 'Country']: df_data[VARNAMES_EMDAT[target_version][var]].fillna('None', inplace=True) # (3) Filter by countries, year range, and disaster type # (3.1) Countries: if countries and isinstance(countries, str): countries = [countries] if countries and isinstance(countries, list): for idx, country in enumerate(countries): # convert countries to iso3 alpha code: countries[idx] = u_coord.country_to_iso(country, "alpha3") df_data = df_data[df_data['ISO'].isin(countries)].reset_index( drop=True) # (3.2) Year range: if year_range: for idx in df_data.index: if np.isnan(df_data.loc[0, 'Year']): df_data.loc[0, 'Year'] = \ df_data.loc[0, VARNAMES_EMDAT[target_version]['Start Year']] df_data = df_data[(df_data['Year'] >= min(year_range)) & (df_data['Year'] <= max(year_range))] # (3.3) Disaster type: if hazard and isinstance(hazard, str): hazard = [hazard] if hazard and isinstance(hazard, list): disaster_types = list() disaster_subtypes = list() for idx, haz in enumerate(hazard): if haz in df_data[VARNAMES_EMDAT[target_version] ['Disaster Type']].unique(): disaster_types.append(haz) if haz in df_data[VARNAMES_EMDAT[target_version] ['Disaster Subtype']].unique(): disaster_subtypes.append(haz) if haz in PERIL_TYPE_MATCH_DICT.keys(): disaster_types += PERIL_TYPE_MATCH_DICT[haz] if haz in PERIL_SUBTYPE_MATCH_DICT.keys(): disaster_subtypes += PERIL_SUBTYPE_MATCH_DICT[haz] df_data = df_data[ (df_data[VARNAMES_EMDAT[target_version]['Disaster Type']]. isin(disaster_types)) | (df_data[VARNAMES_EMDAT[target_version]['Disaster Subtype']]. isin(disaster_subtypes))] return df_data.reset_index(drop=True)
def init_spam_agrar(self, **parameters): """initiates agriculture exposure from SPAM data: https://dataverse.harvard.edu/ dataset.xhtml?persistentId=doi:10.7910/DVN/DHXBJX Optional parameters: data_path (str): absolute path where files are stored. Default: SYSTEM_DIR country (str): Three letter country code of country to be cut out. No default (global) name_adm1 (str): Name of admin1 (e.g. Federal State) to be cut out. No default name_adm2 (str): Name of admin2 to be cut out. No default spam_variable (str): select one agricultural variable: 'A' physical area 'H' harvested area 'P' production 'Y' yield 'V_agg' value of production, aggregated to all crops, food and non-food (default) Warning: for A, H, P and Y, currently all crops are summed up spam_technology (str): select one agricultural technology type: 'TA' all technologies together, ie complete crop (default) 'TI' irrigated portion of crop 'TH' rainfed high inputs portion of crop 'TL' rainfed low inputs portion of crop 'TS' rainfed subsistence portion of crop 'TR' rainfed portion of crop (= TA - TI, or TH + TL + TS) ! different impact_ids are assigned to each technology (1-6) save_name_adm1 (Boolean): Determines how many aditional data are saved: False: only basics (lat, lon, total value), region_id per country True: like 1 + name of admin1 haz_type (str): hazard type abbreviation, e.g. 'DR' for Drought or 'CP' for CropPotential Returns: """ data_p = parameters.get('data_path', SYSTEM_DIR) spam_t = parameters.get('spam_technology', 'TA') spam_v = parameters.get('spam_variable', 'V_agg') adm0 = parameters.get('country') adm1 = parameters.get('name_adm1') adm2 = parameters.get('name_adm2') save_adm1 = parameters.get('save_name_adm1', False) haz_type = parameters.get('haz_type', DEF_HAZ_TYPE) # Test if parameters make sense: if spam_v not in ['A', 'H', 'P', 'Y', 'V_agg'] or \ spam_t not in ['TA', 'TI', 'TH', 'TL', 'TS', 'TR']: raise ValueError('Invalid input parameter(s).') # read data from CSV: data = self._read_spam_file(data_path=data_p, spam_technology=spam_t, spam_variable=spam_v, result_mode=1) # extract country or admin level (if provided) data, region = self._spam_set_country(data, country=adm0, name_adm1=adm1, name_adm2=adm2) # sort by alloc_key to make extraction of lat / lon easier: data = data.sort_values(by=['alloc_key']) lat, lon = self._spam_get_coordinates(data.loc[:, 'alloc_key'], data_path=data_p) if save_adm1: self.name_adm1 = data.loc[:, 'name_adm1'].values if spam_v == 'V_agg': # total only (column 7) i_1 = 7 i_2 = 8 else: i_1 = 7 # get sum over all crops (columns 7 to 48) i_2 = 49 self.gdf['value'] = data.iloc[:, i_1:i_2].sum(axis=1).values self.gdf['latitude'] = lat.values self.gdf['longitude'] = lon.values LOGGER.info('Lat. range: {:+.3f} to {:+.3f}.'.format( np.min(self.gdf.latitude), np.max(self.gdf.latitude))) LOGGER.info('Lon. range: {:+.3f} to {:+.3f}.'.format( np.min(self.gdf.longitude), np.max(self.gdf.longitude))) # set region_id (numeric ISO3): country_id = data.loc[:, 'iso3'] if country_id.unique().size == 1: region_id = np.ones(self.gdf.value.size, int)\ * u_coord.country_to_iso(country_id.iloc[0], "numeric") else: region_id = np.zeros(self.gdf.value.size, int) for i in range(0, self.gdf.value.size): region_id[i] = u_coord.country_to_iso(country_id.iloc[i], "numeric") self.gdf['region_id'] = region_id self.ref_year = 2005 self.tag = Tag() self.tag.description = ("SPAM agrar exposure for variable " + spam_v + " and technology " + spam_t) # if impact id variation iiv = 1, assign different damage function ID # per technology type. self._set_impf(spam_t, haz_type) self.tag.file_name = (FILENAME_SPAM + '_' + spam_v + '_' + spam_t + '.csv') # self.tag.shape = cntry_info[2] #self.tag.country = cntry_info[1] if spam_v in ('A', 'H'): self.value_unit = 'Ha' elif spam_v == 'Y': self.value_unit = 'kg/Ha' elif spam_v == 'P': self.value_unit = 'mt' else: self.value_unit = 'USD' LOGGER.info('Total {} {} {}: {:.1f} {}.'.format( spam_v, spam_t, region, self.gdf.value.sum(), self.value_unit)) self.check()
def country_iso_geom(countries, shp_file, admin_key=['ADMIN', 'ADM0_A3']): """ Get country ISO alpha_3, country id (defined as the United Nations Statistics Division (UNSD) 3-digit equivalent numeric codes and 0 if country not found) and country's geometry shape. Parameters ---------- countries : list or dict list of country names (admin0) or dict with key = admin0 name and value = [admin1 names] shp_file : cartopy.io.shapereader.Reader shape file admin_key: str key to find admin0 or subunit name Returns ------- cntry_info : dict key = ISO alpha_3 country, value = [country id, country name, country geometry], cntry_admin1 : dict key = ISO alpha_3 country, value = [admin1 geometries] """ countries_shp = {} list_records = list(shp_file.records()) for info_idx, info in enumerate(list_records): countries_shp[info.attributes[admin_key[0]].title()] = info_idx cntry_info = dict() cntry_admin1 = dict() if isinstance(countries, list): countries = {cntry: [] for cntry in countries} admin1_rec = list() else: admin1_rec = shapereader.natural_earth(resolution='10m', category='cultural', name='admin_1_states_provinces') admin1_rec = shapereader.Reader(admin1_rec) admin1_rec = list(admin1_rec.records()) for country_name, prov_list in countries.items(): country_idx = countries_shp.get(country_name.title()) if country_idx is None: options = [ country_opt for country_opt in countries_shp if country_name.title() in country_opt ] if not options: options = list(countries_shp.keys()) raise ValueError('Country %s not found. Possible options: %s' % (country_name, options)) iso3 = list_records[country_idx].attributes[admin_key[1]] try: cntry_id = u_coord.country_to_iso(iso3, "numeric") except LookupError: cntry_id = 0 cntry_info[iso3] = [ cntry_id, country_name.title(), list_records[country_idx].geometry ] cntry_admin1[iso3] = _fill_admin1_geom(iso3, admin1_rec, prov_list) return cntry_info, cntry_admin1