def test_str_to_date_pass(self): """Test _date_to_str function""" date = 730000 self.assertEqual(u_dt.str_to_date(u_dt.date_to_str(date)), date) date = [640000, 730000] self.assertEqual(u_dt.str_to_date(u_dt.date_to_str(date)), date)
def impact_yearset_from_sampling_vect(imp, sampled_years, sampling_vect, correction_fac=True): """Create a yearset of impacts (yimp) containing a probabilistic impact for each year in the sampled_years list by sampling events from the impact received as input following the sampling vector provided. In contrast to the expected annual impact (eai) yimp contains impact values that differ among years. When correction factor is true, the yimp are scaled such that the average over all years is equal to the eai. Parameters ----------- imp : climada.engine.Impact() impact object containing impacts per event sampled_years : list A list of years that shall be covered by the resulting yimp. sampling_vect : 2D array The sampling vector specifies how to sample the yimp, it consists of one sub-array per sampled_year, which contains the event_ids of the events used to calculate the annual impacts. It needs to be obtained in a first call, i.e. [yimp, sampling_vect] = climada_yearsets.impact_yearset(...) and can then be provided in this function to obtain the exact same sampling (also for a different imp object) Optional parameter correction_fac : boolean If True a correction factor is applied to the resulting yimp. It is scaled in such a way that the expected annual impact (eai) of the yimp equals the eai of the input impact Returns ------- yimp : climada.engine.Impact() yearset of impacts containing annual impacts for all sampled_years """ #compute impact per sampled_year imp_per_year = compute_imp_per_year(imp, sampling_vect) #copy imp object as basis for the yimp object yimp = copy.deepcopy(imp) if correction_fac: #adjust for sampling error imp_per_year = imp_per_year / calculate_correction_fac( imp_per_year, imp) #save calculations in yimp yimp.at_event = imp_per_year n_sampled_years = len(sampled_years) yimp.event_id = np.arange(1, n_sampled_years + 1) yimp.tag['yimp object'] = True yimp.date = u_dt.str_to_date( [str(date) + '-01-01' for date in sampled_years]) yimp.frequency = np.ones(n_sampled_years) * sum( len(row) for row in sampling_vect) / n_sampled_years return yimp
def set_from_single_run(self, file_path=None, lonmin=-85, latmin=-180, lonmax=85, \ latmax=180, years_user=None): """ Reads netcdf file and initializes a hazard Parameters: file_path (str): path to netcdf file lonmin, latin, lonmax, latmax (int, optional) : bounding box to extract years_user (array, optional) : start and end year specified by the user Returns: hazard """ if file_path is None: LOGGER.error('No drough-file-path set') raise NameError #determine time period that is covered by the input data years_file = np.zeros(2) string = re.search('annual_(.+?)_', file_path) if string: years_file[0] = int(string.group(1)) string = re.search(str(int(years_file[0])) + '_(.+?).nc', file_path) if string: years_file[1] = int(string.group(1)) if years_user is None: id_bands = np.arange(1, years_file[1] - years_file[0] + 2).tolist() event_list = [ str(n) for n in range(int(years_file[0]), int(years_file[1] + 1)) ] else: id_bands = np.arange(years_user[0]-years_file[0]-1, \ years_user[1] - years_file[0]).tolist() event_list = [ str(n) for n in range(int(years_user[0]), int(years_user[1] + 1)) ] date = [event_list[n] + '-01-01' for n in range(len(event_list))] #extract additional information of original file data = xr.open_dataset(file_path, decode_times=False) self.set_raster([file_path], band=id_bands, \ geometry=list([shapely.geometry.box(lonmin, latmin, lonmax, latmax)])) self.check() self.crop = data.crop self.event_name = event_list self.frequency = np.ones(len( self.event_name)) * (1 / len(self.event_name)) self.fraction = self.intensity.copy() self.fraction.data.fill(1.0) self.units = 't / y' self.date = np.array(dt.str_to_date(date)) return self
def plot_start_end_date(self, event=None): """plot start and end date of the chosen event""" startyear = str_to_date(str(int(event) - 1) + '-09-15') startdate = str_to_date(str(int(event) - 1) + '-10-01') enddate = str_to_date(str(int(event) + 1) + '-01-01') dates = np.arange( np.ceil(startdate / 100) * 100, np.ceil(startdate / 100) * 100 + 400, 100) list_dates = list() for i in range(len(dates)): list_dates.append(date_to_str(dates.astype(np.int64)[i])) colourmap = 'plasma' boundaries = np.arange(startyear, enddate, 15) # create list of colors from colormap cmap_reds = mpl.cm.get_cmap(colourmap, len(boundaries)) index_thr = np.where(boundaries < startdate)[0] colors = ["white" for x in range(len(index_thr))] colors.extend( list(cmap_reds(np.arange(len(boundaries) - len(index_thr))))) # define colourmap cmap = mpl.colors.ListedColormap(colors[1:], "") # set over-color to last color of list cmap.set_over(colors[len(colors) - 1]) # Plot Start self.intensity = sparse.csr_matrix(self.date_start) self.plot_intensity(event=event, cmap=cmap, vmin=startdate, vmax=enddate, snap="true") plt.ylabel('Date') plt.yticks(dates, list_dates) # Plot End self.intensity = sparse.csr_matrix(self.date_end) self.plot_intensity(event=event, cmap=cmap, vmin=startdate, vmax=enddate, snap="true") plt.ylabel('Date') plt.yticks(dates, list_dates)
def post_processing(self, date): """Date in format '2003-08-01' Sets intensity of events starting after that date to zero""" year = date[:4] index_event = self.event_name.index(year) shape_haz = self.intensity.shape month = str_to_date(date) for i in range(shape_haz[1]): if self.date_start[index_event, i] >= month: self.intensity[index_event, i] = 0 self.date_start[index_event, i] = 0 self.intensity = sparse.csr_matrix(self.intensity) self.date_start = sparse.csr_matrix(self.date_start)
def _clean_firms_csv(csv_firms): """Read and remove low confidence data from firms: - MODIS: remove data where confidence values are lower than CLEAN_THRESH - VIIRS: remove data where confidence values are set to low (keep nominal and high values) Parameters: csv_firms: csv file of the FIRMS data Returns: pd.DataFrame """ firms = pd.read_csv(csv_firms) # Check for the type of instrument (MODIS vs VIIRS) # Remove data with low confidence interval # Uniformize the name of the birghtness columns between VIIRS and MODIS temp = pd.DataFrame() if 'instrument' in firms.columns: if firms.instrument.any() == 'MODIS' or firms.instrument.any() == 'VIIRS': firms_modis = firms.drop(firms[firms.instrument == 'VIIRS'].index) firms_modis.confidence = np.array( list(map(int, firms_modis.confidence.values.tolist()))) firms_modis = firms_modis.drop(firms_modis[ \ firms_modis.confidence < CLEAN_THRESH].index) temp = firms_modis firms_viirs = firms.drop(firms[firms.instrument == 'MODIS'].index) if firms_viirs.size: firms_viirs = firms_viirs.drop(firms_viirs[firms_viirs.confidence == 'l'].index) firms_viirs = firms_viirs.rename(columns={'bright_ti4':'brightness'}) temp = temp.append(firms_viirs, sort=True) temp = temp.drop(columns=['bright_ti4']) firms = temp firms = firms.reset_index() firms = firms.drop(columns=['index']) firms['iter_ev'] = np.ones(len(firms), bool) firms['cons_id'] = np.zeros(len(firms), int) - 1 firms['event_id'] = np.zeros(len(firms), int) firms['clus_id'] = np.zeros(len(firms), int) - 1 firms['datenum'] = np.array(str_to_date(firms['acq_date'].values)) return firms
def impact_yearset(imp, sampled_years, lam=None, correction_fac=True): """Create a yearset of impacts (yimp) containing a probabilistic impact for each year in the sampled_years list by sampling events from the impact received as input with a Poisson distribution centered around lam per year (lam = sum(imp.frequency)). In contrast to the expected annual impact (eai) yimp contains impact values that differ among years. When correction factor is true, the yimp are scaled such that the average over all years is equal to the eai. Parameters ----------- imp : climada.engine.Impact() impact object containing impacts per event sampled_years : list A list of years that shall be covered by the resulting yimp. Optional parameters lam: int The applied Poisson distribution is centered around lam events per year. If no lambda value is given, the default lam = sum(imp.frequency) is used. correction_fac : boolean If True a correction factor is applied to the resulting yimp. It is scaled in such a way that the expected annual impact (eai) of the yimp equals the eai of the input impact Returns ------- yimp : climada.engine.Impact() yearset of impacts containing annual impacts for all sampled_years sampling_vect : 2D array The sampling vector specifies how to sample the yimp, it consists of one sub-array per sampled_year, which contains the event_ids of the events used to calculate the annual impacts. Can be used to re-create the exact same yimp. """ n_sampled_years = len(sampled_years) #create sampling vector if not lam: lam = np.sum(imp.frequency) events_per_year = sample_from_poisson(n_sampled_years, lam) sampling_vect = sample_events(events_per_year, imp.frequency) #compute impact per sampled_year imp_per_year = compute_imp_per_year(imp, sampling_vect) #copy imp object as basis for the yimp object yimp = copy.deepcopy(imp) #save imp_per_year in yimp if correction_fac: #adjust for sampling error imp_per_year = imp_per_year / calculate_correction_fac( imp_per_year, imp) else: yimp.at_event = imp_per_year #save calculations in yimp yimp.event_id = np.arange(1, n_sampled_years + 1) yimp.tag['yimp object'] = True yimp.date = u_dt.str_to_date( [str(date) + '-01-01' for date in sampled_years]) yimp.frequency = np.ones(n_sampled_years) * sum( len(row) for row in sampling_vect) / n_sampled_years return yimp, sampling_vect
def set_from_isimip_netcdf(self, input_dir=None, filename=None, bbox=None, yearrange=None, ag_model=None, cl_model=None, bias_corr=None, scenario=None, soc=None, co2=None, crop=None, irr=None, fn_str_var=None): """Wrapper to fill hazard from crop yield NetCDF file. Build and tested for output from ISIMIP2 and ISIMIP3, but might also work for other NetCDF containing gridded crop model output from other sources. Parameters: input_dir (Path or str): path to input data directory, default: {CONFIG.exposures.crop_production.local_data}/Input/Exposure filename (string): name of netcdf file in input_dir. If filename is given, the other parameters specifying the model run are not required! bbox (list of four floats): bounding box: [lon min, lat min, lon max, lat max] yearrange (int tuple): year range for hazard set, f.i. (1976, 2005) ag_model (str): abbrev. agricultural model (only when input_dir is selected) f.i. 'clm-crop', 'gepic','lpjml','pepic' cl_model (str): abbrev. climate model (only when input_dir is selected) f.i. ['gfdl-esm2m', 'hadgem2-es','ipsl-cm5a-lr','miroc5' bias_corr (str): bias correction of climate forcing, f.i. 'ewembi' (ISIMIP2b, default) or 'w5e5' (ISIMIP3b) scenario (str): climate change scenario (only when input_dir is selected) f.i. 'historical' or 'rcp60' or 'ISIMIP2a' soc (str): socio-economic trajectory (only when input_dir is selected) f.i. '2005soc' or 'histsoc' co2 (str): CO2 forcing scenario (only when input_dir is selected) f.i. 'co2' or '2005co2' crop (str): crop type (only when input_dir is selected) f.i. 'whe', 'mai', 'soy' or 'ric' irr (str): irrigation type (only when input_dir is selected) f.i 'noirr' or 'irr' fn_str_var (str): FileName STRing depending on VARiable and ISIMIP simuation round raises: NameError """ if not fn_str_var: fn_str_var = FN_STR_VAR if scenario is None: scenario = 'historical' if bias_corr is None: bias_corr = 'ewembi' if bbox is None: bbox = BBOX if input_dir is None: input_dir = INPUT_DIR input_dir = Path(input_dir) if not Path(input_dir).is_dir(): LOGGER.error('Input directory %s does not exist', input_dir) raise NameError # The filename is set or other variables (cl_model, scenario) are extracted of the # specified filename if filename is None: yearchunk = YEARCHUNKS[scenario] filename = '{}_{}_{}_{}_{}_{}_yield-{}-{}_{}_{}_{}.nc'.format( ag_model, cl_model, bias_corr, scenario, soc, co2, crop, irr, fn_str_var, yearchunk['startyear'], yearchunk['endyear']) elif scenario == 'ISIMIP2a': (_, _, _, _, _, _, _, crop, _, _, startyear, endyearnc) = filename.split('_') endyear, _ = endyearnc.split('.') yearchunk = dict() yearchunk = { 'yearrange': (int(startyear), int(endyear)), 'startyear': int(startyear), 'endyear': int(endyear) } elif scenario == 'test_file': yearchunk = dict() yearchunk = { 'yearrange': (1976, 2005), 'startyear': 1861, 'endyear': 2005, 'yearrange_mean': (1976, 2005) } ag_model, cl_model, _, _, soc, co2, crop_prop, *_ = filename.split( '_') _, crop, irr = crop_prop.split('-') else: # get yearchunk from filename, e.g., for rcp2.6 extended and ISIMIP3 (_, _, _, _, _, _, crop_irr, _, _, year1, year2) = filename.split('_') yearchunk = { 'yearrange': (int(year1), int(year2.split('.')[0])), 'startyear': int(year1), 'endyear': int(year2.split('.')[0]) } _, crop, irr = crop_irr.split('-') # if no yearrange is given, load full range from input file: if yearrange is None or len(yearrange) == 0: yearrange = yearchunk['yearrange'] # define indexes of the netcdf-bands to be extracted, and the # corresponding event names and dates # corrected indexes due to the bands in input starting with the index=1 id_bands = np.arange(yearrange[0] - yearchunk['startyear'] + 1, yearrange[1] - yearchunk['startyear'] + 2).tolist() # hazard setup: set attributes [lonmin, latmin, lonmax, latmax] = bbox self.set_raster([str(Path(input_dir, filename))], band=id_bands, geometry=list([ shapely.geometry.box(lonmin, latmin, lonmax, latmax) ])) self.intensity.data[np.isnan(self.intensity.data)] = 0.0 self.intensity.todense() self.crop = crop self.event_name = [ str(n) for n in range(int(yearrange[0]), int(yearrange[-1] + 1)) ] self.frequency = np.ones(len( self.event_name)) * (1 / len(self.event_name)) self.fraction = self.intensity.copy() self.fraction.data.fill(1.0) self.units = 't / y / ha' self.date = np.array( dt.str_to_date([event_ + '-01-01' for event_ in self.event_name])) self.centroids.set_meta_to_lat_lon() self.centroids.region_id = (coord.coord_on_land( self.centroids.lat, self.centroids.lon)).astype(dtype=int) self.check() return self
def impact_yearset(event_impacts, sampled_years=None, sampling_dict=None, correction_fac=True): """Create an annual_impacts object containing a probabilistic impact for each year in the sampled_years list (or for a list of sampled_years generated with the length of given sampled_years) by sampling events from the existing input event_impacts with a Poisson distribution centered around n_events per year (n_events = sum(event_impacts.frequency)). In contrast to the expected annual impact (eai) annual_impacts contains impact values that differ among years (the correction factor can however be used to scale the annual_impacts to fit the eai of the events_impacts object that is used to generated it) Parameters: event_impacts : impact object impact object containing impacts per event Optional parameters: sampled_years : int or list Either an integer specifying the number of years to be sampled (labelled [0001,...,sampled_years]) or a list of years that shall be covered by the resulting annual_impacts. The default is a 1000 year-long list starting in the year 0001. sampling_dict : dict The sampling dictionary specifying how to sample the annual_impacts It consists of two arrays: selected_events: array indices of sampled events in event_impacts.at_event() events_per_year: array number of events per sampled year The sampling_dict needs to be obtained in a first call, i.e. [annual_impacts, sampling_dict] = climada_yearsets.impact_yearset(...) and can then be provided in subsequent calls(s) to obtain the exact same sampling (also for a different event_impacts object) correction_fac : boolean If True a correction factor is applied to the resulting annual_impacts. They are scaled in such a way that the expected annual impact (eai) of the annual_impacts equals the eai of the events_impacts Returns: annual_impacts : impact object annual impacts for all sampled_years sampling_dict : dict the sampling dictionary containing two arrays: selected_events (array) : sampled events (len: total amount of sampled events) events_per_year (array) : events per sampled year Can be used to re-create the exact same annual_impacts yearset """ if not sampled_years and not sampling_dict: sampled_years = list(range(1, 1001)) elif isinstance(sampled_years, int): sampled_years = list(range(1, sampled_years + 1)) elif not sampled_years: sampled_years = list( range(1, len(sampling_dict['selected_events']) + 1)) elif len(sampled_years) != len(sampling_dict['events_per_year']): LOGGER.info( "The number of sampled_years and the length of the list of events_per_year " "in the sampling_dict differ. The number of years contained in the " "sampling_dict are used as number of sampled_years.") sampled_years = list( range(1, len(sampling_dict['selected_events']) + 1)) if sampling_dict and (np.sum(sampling_dict['events_per_year']) != len( sampling_dict['selected_events'])): raise ValueError( "The sampling dictionary is faulty: the sum of selected events " "does not correspond to the number of selected events.") n_sampled_years = len(sampled_years) if len(np.unique(event_impacts.frequency)) > 1: LOGGER.warning( "The frequencies of the single events in the given event_impacts " "differ among each other. Please beware that this will influence " "the resulting annual_impacts as the events are sampled uniformaly " "and different frequencies are (not yet) taken into account.") #create sampling dictionary if not given as input if not sampling_dict: n_annual_events = np.sum(event_impacts.frequency) n_input_events = len(event_impacts.event_id) sampling_dict = create_sampling_dict(n_sampled_years, n_annual_events, n_input_events) #compute annual_impacts impact_per_year = compute_annual_impacts(event_impacts, sampling_dict) #copy event_impacts object as basis for the annual_impacts object annual_impacts = copy.deepcopy(event_impacts) #save impact_per_year in annual_impacts if correction_fac: #adjust for sampling error correction_factor = calculate_correction_fac(impact_per_year, event_impacts) annual_impacts.at_event = impact_per_year / correction_factor else: annual_impacts.at_event = impact_per_year annual_impacts.event_id = np.arange(1, n_sampled_years + 1) annual_impacts.tag['annual_impacts object'] = True annual_impacts.date = u_dt.str_to_date( [str(date) + '-01-01' for date in sampled_years]) annual_impacts.frequency = np.ones(n_sampled_years) * np.sum( sampling_dict['events_per_year']) / n_sampled_years return annual_impacts, sampling_dict
def set_from_single_run(self, input_dir=None, filename=None, bbox=BBOX, yearrange=(YEARCHUNKS['historical'])['yearrange'], ag_model=None, cl_model=None, scenario='historical', soc=None, co2=None, crop=None, irr=None, fn_str_var=FN_STR_VAR): """Wrapper to fill hazard from nc_dis file from ISIMIP Parameters: input_dir (string): path to input data directory bbox (list of four floats): bounding box: [lon min, lat min, lon max, lat max] yearrange (int tuple): year range for hazard set, f.i. (1976, 2005) ag_model (str): abbrev. agricultural model (only when input_dir is selected) f.i. 'clm-crop', 'gepic','lpjml','pepic' cl_model (str): abbrev. climate model (only when input_dir is selected) f.i. ['gfdl-esm2m', 'hadgem2-es','ipsl-cm5a-lr','miroc5' scenario (str): climate change scenario (only when input_dir is selected) f.i. 'historical' or 'rcp60' or 'ISIMIP2a' soc (str): socio-economic trajectory (only when input_dir is selected) f.i. '2005soc' or 'histsoc' co2 (str): CO2 forcing scenario (only when input_dir is selected) f.i. 'co2' or '2005co2' crop (str): crop type (only when input_dir is selected) f.i. 'whe', 'mai', 'soy' or 'ric' irr (str): irrigation type (only when input_dir is selected) f.i 'noirr' or 'irr' fn_str_var (str): FileName STRing depending on VARiable and ISIMIP simuation round raises: NameError """ if input_dir is not None: if not os.path.exists(input_dir): LOGGER.error('Input directory %s does not exist', input_dir) raise NameError else: LOGGER.error('Input directory %s not set', input_dir) raise NameError # The filename is set or other variables (cl_model, scenario) are extracted of the # specified filename if filename is None: yearchunk = YEARCHUNKS[scenario] filename = os.path.join(input_dir, '%s_%s_ewembi_%s_%s_%s_yield-%s-%s_%s_%s_%s.nc' \ %(ag_model, cl_model, scenario, soc, co2, crop, irr, fn_str_var, str(yearchunk['startyear']), str(yearchunk['endyear']))) elif scenario == 'ISIMIP2a': (_, _, _, _, _, _, _, crop, _, _, startyear, endyearnc) = filename.split('_') endyear, _ = endyearnc.split('.') yearchunk = dict() yearchunk = {'yearrange': np.array([int(startyear), int(endyear)]), 'startyear': int(startyear), 'endyear': int(endyear)} filename = os.path.join(input_dir, filename) elif scenario == 'test_file': yearchunk = dict() yearchunk = {'yearrange': np.array([1976, 2005]), 'startyear': 1861, 'endyear': 2005, 'yearrange_mean': np.array([1976, 2005])} ag_model, cl_model, _, _, soc, co2, crop_prop, *_ = filename.split('_') _, crop, irr = crop_prop.split('-') filename = os.path.join(input_dir, filename) else: yearchunk = YEARCHUNKS[scenario] (_, _, _, _, _, _, crop_irr, *_) = filename.split('_') _, crop, irr = crop_irr.split('-') filename = os.path.join(input_dir, filename) # define indexes of the netcdf-bands to be extracted, and the # corresponding event names and dates # corrected indexes due to the bands in input starting with the index=1 id_bands = np.arange(yearrange[0] - yearchunk['startyear'] + 1, yearrange[1] - yearchunk['startyear'] + 2).tolist() # hazard setup: set attributes [lonmin, latmin, lonmax, latmax] = bbox self.set_raster([filename], band=id_bands, geometry=list([shapely.geometry.box(lonmin, latmin, lonmax, latmax)])) self.intensity.data[np.isnan(self.intensity.data)] = 0.0 self.intensity.todense() self.crop = crop self.event_name = [str(n) for n in range(int(yearrange[0]), int(yearrange[-1] + 1))] self.frequency = np.ones(len(self.event_name)) * (1 / len(self.event_name)) self.fraction = self.intensity.copy() self.fraction.data.fill(1.0) self.units = 't / y / ha' self.date = np.array(dt.str_to_date( [event_ + '-01-01' for event_ in self.event_name])) self.centroids.set_meta_to_lat_lon() self.centroids.region_id = ( coord.coord_on_land(self.centroids.lat, self.centroids.lon)).astype(dtype=int) self.check() return self
def select(self, date=None, orig=None, reg_id=None): """Select events within provided date and/or historical or synthetical. Frequency of the events may need to be recomputed! Parameters: date (tuple(str or int), optional): (initial date, final date) in string ISO format or datetime ordinal integer orig (bool, optional): select only historical (True) or only synthetic (False) Returns: Hazard or children """ try: haz = self.__class__() except TypeError: haz = Hazard(self.tag.haz_type) sel_ev = np.ones(self.event_id.size, bool) sel_cen = np.ones(self.centroids.size, bool) # filter events with date if isinstance(date, tuple): date_ini, date_end = date[0], date[1] if isinstance(date_ini, str): date_ini = u_dt.str_to_date(date[0]) date_end = u_dt.str_to_date(date[1]) sel_ev = np.logical_and(date_ini <= self.date, self.date <= date_end) if not np.any(sel_ev): LOGGER.info('No hazard in date range %s.', date) return None # filter events hist/synthetic if isinstance(orig, bool): sel_ev = np.logical_and(sel_ev, self.orig.astype(bool) == orig) if not np.any(sel_ev): LOGGER.info('No hazard with %s tracks.', str(orig)) return None # filter centroids if reg_id is not None: sel_cen = np.argwhere(self.centroids.region_id == reg_id).reshape(-1) if not sel_cen.size: LOGGER.info('No hazard centroids with region %s.', str(reg_id)) return None sel_ev = np.argwhere(sel_ev).squeeze() for (var_name, var_val) in self.__dict__.items(): if isinstance(var_val, np.ndarray) and var_val.ndim == 1 and \ var_val.size: setattr(haz, var_name, var_val[sel_ev]) elif isinstance(var_val, sparse.csr_matrix): setattr(haz, var_name, var_val[sel_ev, :][:, sel_cen]) elif isinstance(var_val, list) and var_val: setattr(haz, var_name, [var_val[idx] for idx in sel_ev]) elif var_name == 'centroids': if reg_id is not None: setattr(haz, var_name, var_val.select(reg_id)) else: setattr(haz, var_name, var_val) else: setattr(haz, var_name, var_val) return haz