def get_ecmwf_file(dataset='ERA5', var=None): """Returns a path to the desired ECMWF baseline climate file. If the file is not present, download it. Parameters ---------- dataset : str 'ERA5', 'ERA5L', 'CERA' var : str 'inv' for invariant 'tmp' for temperature 'pre' for precipitation Returns ------- str path to the file """ # Be sure input makes sense if dataset not in BASENAMES.keys(): raise InvalidParamsError('ECMWF dataset {} not ' 'in {}'.format(dataset, BASENAMES.keys())) if var not in BASENAMES[dataset].keys(): raise InvalidParamsError('ECMWF variable {} not ' 'in {}'.format(var, BASENAMES[dataset].keys())) # File to look for return utils.file_downloader(ECMWF_SERVER + BASENAMES[dataset][var])
def calendardate_to_hydrodate(y, m, start_month=None): """Converts a calendar (year, month) pair to a hydrological date. Parameters ---------- y : int the year m : int the month start_month : int the first month of the hydrological year """ if start_month is None: raise InvalidParamsError('In order to avoid confusion, we now force ' 'callers of this function to specify the ' 'hydrological convention they are using.') try: if m >= start_month: out_y = y + 1 out_m = m - start_month + 1 else: out_y = y out_m = m + 13 - start_month except (TypeError, ValueError): # TODO: inefficient but no time right now out_y = np.zeros(len(y), np.int64) out_m = np.zeros(len(y), np.int64) for i, (_y, _m) in enumerate(zip(y, m)): _y, _m = calendardate_to_hydrodate(_y, _m, start_month=start_month) out_y[i] = _y out_m[i] = _m return out_y, out_m
def prcp_fac(self, new_prcp_fac): # just to check that no invalid prcp_factors are used if new_prcp_fac <= 0: raise InvalidParamsError('prcp_fac has to be above zero!') self.prcp *= new_prcp_fac / self._prcp_fac # update old prcp_fac in order that it can be updated again ... self._prcp_fac = new_prcp_fac
def get_histalp_file(var=None): """Returns a path to the desired HISTALP baseline climate file. If the file is not present, download it. Parameters ---------- var : str 'tmp' for temperature 'pre' for precipitation Returns ------- str path to the CRU file """ # Be sure input makes sense if var not in ['tmp', 'pre']: raise InvalidParamsError('HISTALP variable {} ' 'does not exist!'.format(var)) # File to look for if var == 'tmp': bname = 'HISTALP_temperature_1780-2014.nc' else: bname = 'HISTALP_precipitation_all_abs_1801-2014.nc' h_url = HISTALP_SERVER + bname + '.bz2' return utils.file_extractor(utils.file_downloader(h_url))
def merge_glacier_tasks(gdirs, main_rgi_id=None, return_all=False, buffer=None, **kwargs): """Shortcut function: run all tasks to merge tributaries to a main glacier Parameters ---------- gdirs : list of :py:class:`oggm.GlacierDirectory` all glaciers, main and tributary. Preprocessed and initialised main_rgi_id: str RGI ID of the main glacier of interest. If None is provided merging will start based uppon the largest glacier return_all : bool if main_rgi_id is given and return_all = False: only the main glaicer is returned if main_rgi_is given and return_all = True, the main glacier and every remaining glacier from the initial gdirs list is returned, possible merged as well. buffer : float buffer around a flowline to first better find an overlap with another flowline. And second assure some distance between the lines at a junction. Will default to `cfg.PARAMS['kbuffer']`. kwargs: keyword argument for the recursive merging Returns ------- merged_gdirs: list of all merged :py:class:`oggm.GlacierDirectory` """ if len(gdirs) > 100: raise InvalidParamsError('this could take time! I should include an ' 'optional parameter to ignore this.') # sort all glaciers descending by area gdirs.sort(key=lambda x: x.rgi_area_m2, reverse=True) # if main glacier is asked, put it in first position if main_rgi_id is not None: gdir_main = [gd for gd in gdirs if gd.rgi_id == main_rgi_id][0] gdirs.remove(gdir_main) gdirs = [gdir_main] + gdirs merged_gdirs = [] while len(gdirs) > 1: # main glacier is always the first: either given or the largest one gdir_main = gdirs.pop(0) gdir_merged, gdirs = _recursive_merging(gdirs, gdir_main, **kwargs) merged_gdirs.append(gdir_merged) # now we have gdirs which contain all the necessary flowlines, # time to clean them up for gdir in merged_gdirs: flowline.clean_merged_flowlines(gdir, buffer=buffer) if main_rgi_id is not None and return_all is False: return [gd for gd in merged_gdirs if main_rgi_id in gd.rgi_id][0] # add the remaining glacier to the final list merged_gdirs = merged_gdirs + gdirs return merged_gdirs
def get_cru_file(var=None): """Returns a path to the desired CRU baseline climate file. If the file is not present, download it. Parameters ---------- var : str 'tmp' for temperature 'pre' for precipitation Returns ------- str path to the CRU file """ # Be sure input makes sense if var not in ['tmp', 'pre']: raise InvalidParamsError('CRU variable {} does not exist!'.format(var)) # Download cru_filename = CRU_BASE.format(var) cru_url = CRU_SERVER + '{}/'.format(var) + cru_filename + '.gz' return utils.file_extractor(utils.file_downloader(cru_url))
def _get_2d_monthly_climate(self, heights, year=None): # first get the climate data Warning('Attention: this has not been tested enough to be sure that ' 'it works') if self.mb_type == 'mb_real_daily': return self._get_climate(heights, 'monthly', year=year) else: raise InvalidParamsError('_get_2d_monthly_climate works only\ with mb_real_daily as mb_type!!!')
def __setitem__(self, key, value): # Overrides the original dic to expand the path try: value = os.path.expanduser(value) except AttributeError: raise InvalidParamsError('The value you are trying to set does ' 'not seem to be a valid path: ' '{}'.format(value)) ResettingOrderedDict.__setitem__(self, key, value)
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers and writes them to disk This task will be needed for mass balance calibration of custom climate data. For CRU and HISTALP baseline climate a precalibrated list is available and should be used instead. Parameters ---------- gdirs : list of :py:class:`oggm.GlacierDirectory` objects will be filtered for reference glaciers """ if not cfg.PARAMS['run_mb_calibration']: raise InvalidParamsError('Are you sure you want to calibrate the ' 'reference t*? There is a pre-calibrated ' 'version available. If you know what you are ' 'doing and still want to calibrate, set the ' '`run_mb_calibration` parameter to `True`.') log.info('Compute the reference t* and mu* for WGMS glaciers') # Reference glaciers only if in the list and period is good ref_gdirs = utils.get_ref_mb_glaciers(gdirs) # Run from oggm.workflow import execute_entity_task out = execute_entity_task(t_star_from_refmb, ref_gdirs) # Loop write df = pd.DataFrame() for gdir, res in zip(ref_gdirs, out): # list of mus compatibles with refmb rid = gdir.rgi_id df.loc[rid, 'lon'] = gdir.cenlon df.loc[rid, 'lat'] = gdir.cenlat df.loc[rid, 'n_mb_years'] = len(gdir.get_ref_mb_data()) df.loc[rid, 'tstar'] = res['t_star'] df.loc[rid, 'bias'] = res['bias'] # Write out df['tstar'] = df['tstar'].astype(int) df['n_mb_years'] = df['n_mb_years'].astype(int) file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def _log_param_change(self, key, value): prev = self.get(key) if prev is None: if key in ['baseline_y0', 'baseline_y1']: raise InvalidParamsError('The `baseline_y0` and `baseline_y1` ' 'parameters have been removed. ' 'You now have to set them explicitly ' 'in your call to ' '`process_climate_data`.') log.workflow('WARNING: adding an unknown parameter ' '`{}`:`{}` to PARAMS.'.format(key, value)) return if prev == value: return if key == 'use_multiprocessing': msg = 'ON' if value else 'OFF' log.workflow('Multiprocessing switched {} '.format(msg) + 'after user settings.') return if key == 'mp_processes': if value == -1: import multiprocessing value = multiprocessing.cpu_count() if PARAMS.get('use_multiprocessing', False): log.workflow('Multiprocessing: using all available ' 'processors (N={})'.format(value)) else: if PARAMS.get('use_multiprocessing', False): log.workflow('Multiprocessing: using the requested number ' 'of processors (N={})'.format(value)) return log.workflow("PARAMS['{}'] changed from `{}` to `{}`.".format(key, prev, value))
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None, disable_mp=False, timeout=0, max_level=4, logging_level='WORKFLOW'): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Input check if max_level not in [1, 2, 3, 4]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4]') # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level=logging_level) # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # Timeout cfg.PARAMS['task_timeout'] = timeout # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job an leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 rgidf['DEM_SOURCE'] = s log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_regions(rgidf, reset=rs, force=rs) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # Compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) _time_log() return if dem_source: # Force a given source rgidf['DEM_SOURCE'] = dem_source.upper() # L1 - go gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 1: _time_log() return # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 2: _time_log() return # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 3: _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) _time_log()
def write_climate_file(gdir, time, prcp, temp, ref_pix_hgt, ref_pix_lon, ref_pix_lat, gradient=None, temp_std=None, time_unit=None, calendar=None, source=None, file_name='climate_historical', filesuffix=''): """Creates a netCDF4 file with climate data timeseries. Parameters ---------- gdir: glacier directory time : ndarray the time array, in a format understood by netCDF4 prcp : ndarray the precipitation array (unit: 'kg m-2 month-1') temp : ndarray the temperature array (unit: 'degC') ref_pix_hgt : float the elevation of the dataset's reference altitude (for correction). In practice it is the same altitude as the baseline climate. ref_pix_lon : float the location of the gridded data's grid point ref_pix_lat : float the location of the gridded data's grid point gradient : ndarray, optional whether to use a time varying gradient temp_std : ndarray, optional the daily standard deviation of temperature (useful for PyGEM) time_unit : str the reference time unit for your time array. This should be chosen depending on the length of your data. The default is to choose it ourselves based on the starting year. calendar : str If you use an exotic calendar (e.g. 'noleap') source : str the climate data source (required) file_name : str How to name the file filesuffix : str Apply a suffix to the file """ if source == 'ERA5_daily' and filesuffix == '': raise InvalidParamsError('filesuffix should be "_daily" as only \ file_name climate_historical is normally \ monthly data') # overwrite is default fpath = gdir.get_filepath(file_name, filesuffix=filesuffix) if os.path.exists(fpath): os.remove(fpath) if source is None: raise InvalidParamsError('`source` kwarg is required') zlib = cfg.PARAMS['compress_climate_netcdf'] try: y0 = time[0].year y1 = time[-1].year except AttributeError: time = pd.DatetimeIndex(time) y0 = time[0].year y1 = time[-1].year if time_unit is None: # http://pandas.pydata.org/pandas-docs/stable/timeseries.html # #timestamp-limitations if y0 > 1800: time_unit = 'days since 1801-01-01 00:00:00' elif y0 >= 0: time_unit = ('days since {:04d}-01-01 ' '00:00:00'.format(time[0].year)) else: raise InvalidParamsError('Time format not supported') with ncDataset(fpath, 'w', format='NETCDF4') as nc: nc.ref_hgt = ref_pix_hgt nc.ref_pix_lon = ref_pix_lon nc.ref_pix_lat = ref_pix_lat nc.ref_pix_dis = haversine(gdir.cenlon, gdir.cenlat, ref_pix_lon, ref_pix_lat) nc.climate_source = source if time[0].month == 1: nc.hydro_yr_0 = y0 else: nc.hydro_yr_0 = y0 + 1 nc.hydro_yr_1 = y1 nc.createDimension('time', None) nc.author = 'OGGM' nc.author_info = 'Open Global Glacier Model' timev = nc.createVariable('time', 'i4', ('time', )) tatts = {'units': time_unit} if calendar is None: calendar = 'standard' tatts['calendar'] = calendar try: numdate = netCDF4.date2num([t for t in time], time_unit, calendar=calendar) except TypeError: # numpy's broken datetime only works for us precision time = time.astype('M8[us]').astype(datetime.datetime) numdate = netCDF4.date2num(time, time_unit, calendar=calendar) timev.setncatts(tatts) timev[:] = numdate v = nc.createVariable('prcp', 'f4', ('time', ), zlib=zlib) v.units = 'kg m-2' # this could be made more beautriful # just rough estimate if len(prcp) > (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 30 * 12: v.long_name = ("total daily precipitation amount, " "assumed same for each day of month") elif len(prcp) == (nc.hydro_yr_1 - nc.hydro_yr_0 + 1) * 12: v.long_name = 'total monthly precipitation amount' else: v.long_name = 'total monthly precipitation amount' warnings.warn("there might be a conflict in the prcp timeseries," "please check!") v[:] = prcp v = nc.createVariable('temp', 'f4', ('time', ), zlib=zlib) v.units = 'degC' if source == 'ERA5_daily' and len(temp) > (y1 - y0) * 30 * 12: v.long_name = '2m daily temperature at height ref_hgt' elif source == 'ERA5_daily' and len(temp) <= (y1 - y0) * 30 * 12: raise InvalidParamsError('if the climate dataset (here source)' 'is ERA5_daily, temperatures should be in' 'daily resolution, please check or set' 'set source to another climate dataset') else: v.long_name = '2m monthly temperature at height ref_hgt' v[:] = temp if gradient is not None: v = nc.createVariable('gradient', 'f4', ('time', ), zlib=zlib) v.units = 'degC m-1' v.long_name = ('temperature gradient from local regression or' 'lapserates') v[:] = gradient if temp_std is not None: v = nc.createVariable('temp_std', 'f4', ('time', ), zlib=zlib) v.units = 'degC' v.long_name = 'standard deviation of daily temperatures' v[:] = temp_std
def parse_args(args): """Check input arguments and env variables""" # CLI args description = ('Generate the preprocessed OGGM glacier directories for ' 'this OGGM version.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--map-border', type=int, help='the size of the map border. Is required if ' '$OGGM_MAP_BORDER is not set.') parser.add_argument('--rgi-reg', type=str, help='the rgi region to process. Is required if ' '$OGGM_RGI_REG is not set.') parser.add_argument('--rgi-version', type=str, help='the RGI version to use. Defaults to the OGGM ' 'default.') parser.add_argument('--max-level', type=int, default=4, help='the maximum level you want to run the ' 'pre-processing for (1, 2, 3 or 3).') parser.add_argument('--working-dir', type=str, help='path to the directory where to write the ' 'output. Defaults to current directory or ' '$OGGM_WORKDIR.') parser.add_argument('--output', type=str, help='path to the directory where to write the ' 'output. Defaults to current directory or ' '$OGGM_OUTDIR.') parser.add_argument('--dem-source', type=str, default='', help='which DEM source to use. Possible options are ' 'the name of a specific DEM (e.g. RAMP, SRTM...) ' 'or ALL, in which case all available DEMs will ' 'be processed and adjoined with a suffix at the ' 'end of the file name. The ALL option is only ' 'compatible with level 1 folders, after which ' 'the processing will stop. The default is to use ' 'the default OGGM DEM.') parser.add_argument('--disable-mp', nargs='?', const=True, default=False, help='if you want to disable multiprocessing.') parser.add_argument('--timeout', type=int, default=0, help='apply a timeout to the entity tasks ' '(in seconds).') parser.add_argument('--demo', nargs='?', const=True, default=False, help='if you want to run the prepro for the ' 'list of demo glaciers.') parser.add_argument('--test', nargs='?', const=True, default=False, help='if you want to do a test on a couple of ' 'glaciers first.') parser.add_argument('--logging-level', type=str, default='WORKFLOW', help='the logging level to use (DEBUG, INFO, WARNING, ' 'WORKFLOW).') args = parser.parse_args(args) # Check input rgi_reg = args.rgi_reg if args.demo: rgi_reg = 0 if not rgi_reg and not args.demo: rgi_reg = os.environ.get('OGGM_RGI_REG', None) if rgi_reg is None: raise InvalidParamsError('--rgi-reg is required!') rgi_reg = '{:02}'.format(int(rgi_reg)) rgi_version = args.rgi_version border = args.map_border if not border: border = os.environ.get('OGGM_MAP_BORDER', None) if border is None: raise InvalidParamsError('--map-border is required!') working_dir = args.working_dir if not working_dir: working_dir = os.environ.get('OGGM_WORKDIR', '') output_folder = args.output if not output_folder: output_folder = os.environ.get('OGGM_OUTDIR', '') border = int(border) output_folder = os.path.abspath(output_folder) working_dir = os.path.abspath(working_dir) # All good return dict(rgi_version=rgi_version, rgi_reg=rgi_reg, border=border, output_folder=output_folder, working_dir=working_dir, is_test=args.test, demo=args.demo, dem_source=args.dem_source, max_level=args.max_level, timeout=args.timeout, disable_mp=args.disable_mp, logging_level=args.logging_level)
def step(self, dt): """Advance one step.""" # Just a check to avoid useless computations if dt <= 0: raise InvalidParamsError('dt needs to be strictly positive') # Guarantee a precise arrival on a specific date if asked min_dt = dt if dt < self.min_dt else self.min_dt dt = utils.clip_scalar(dt, min_dt, self.max_dt) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) fl = self.fls[0] dx = fl.dx_meter # Switch to the notation from the MUSCL_1D example # This is useful to ensure that the MUSCL-SuperBee code # is working as it has been benchmarked many times # mass balance m_dot = self.get_mb(fl.surface_h, self.yr, fl_id=id(fl)) # get in the surface elevation S = fl.surface_h # get the bed B = fl.bed_h # define Glen's law here N = self.glen_n # this is the correct Gamma !! Gamma = 2. * self.glen_a * (self.rho * G)**N / (N + 2.) # time stepping c_stab = 0.165 # define the finite difference indices k = np.arange(0, fl.nx) kp = np.hstack([np.arange(1, fl.nx), fl.nx - 1]) kpp = np.hstack([np.arange(2, fl.nx), fl.nx - 1, fl.nx - 1]) km = np.hstack([0, np.arange(0, fl.nx - 1)]) kmm = np.hstack([0, 0, np.arange(0, fl.nx - 2)]) # I'm gonna introduce another level of adaptive time stepping here, # which is probably not necessary. However I keep it to be # consistent with my benchmarked and tested code. # If the OGGM time stepping is correctly working, this loop # should never run more than once # Fabi: actually no, it is your job to choose the right time step # but you can let OGGM decide whether a new time step is needed # or not -> to be meliorated one day stab_t = 0. while stab_t < dt: H = S - B # MUSCL scheme up. "up" denotes here the k+1/2 flux boundary r_up_m = (H[k] - H[km]) / (H[kp] - H[k]) # Eq. 27 H_up_m = H[k] + 0.5 * self.phi(r_up_m) * (H[kp] - H[k] ) # Eq. 23 # Eq. 27, k+1 is used instead of k r_up_p = (H[kp] - H[k]) / (H[kpp] - H[kp]) # Eq. 24 H_up_p = H[kp] - 0.5 * self.phi(r_up_p) * (H[kpp] - H[kp]) # surface slope gradient s_grad_up = ((S[kp] - S[k])**2. / dx**2.)**((N - 1.) / 2.) # like Eq. 30, now using Eq. 23 instead of Eq. 24 D_up_m = Gamma * H_up_m**(N + 2.) * s_grad_up D_up_p = Gamma * H_up_p**(N + 2.) * s_grad_up # Eq. 30 # Eq. 31 D_up_min = np.minimum(D_up_m, D_up_p) # Eq. 32 D_up_max = np.maximum(D_up_m, D_up_p) D_up = np.zeros(fl.nx) # Eq. 33 cond = (S[kp] <= S[k]) & (H_up_m <= H_up_p) D_up[cond] = D_up_min[cond] cond = (S[kp] <= S[k]) & (H_up_m > H_up_p) D_up[cond] = D_up_max[cond] cond = (S[kp] > S[k]) & (H_up_m <= H_up_p) D_up[cond] = D_up_max[cond] cond = (S[kp] > S[k]) & (H_up_m > H_up_p) D_up[cond] = D_up_min[cond] # MUSCL scheme down. "down" denotes the k-1/2 flux boundary r_dn_m = (H[km] - H[kmm]) / (H[k] - H[km]) H_dn_m = H[km] + 0.5 * self.phi(r_dn_m) * (H[k] - H[km]) r_dn_p = (H[k] - H[km]) / (H[kp] - H[k]) H_dn_p = H[k] - 0.5 * self.phi(r_dn_p) * (H[kp] - H[k]) # calculate the slope gradient s_grad_dn = ((S[k] - S[km])**2. / dx**2.)**((N - 1.) / 2.) D_dn_m = Gamma * H_dn_m**(N + 2.) * s_grad_dn D_dn_p = Gamma * H_dn_p**(N + 2.) * s_grad_dn D_dn_min = np.minimum(D_dn_m, D_dn_p) D_dn_max = np.maximum(D_dn_m, D_dn_p) D_dn = np.zeros(fl.nx) cond = (S[k] <= S[km]) & (H_dn_m <= H_dn_p) D_dn[cond] = D_dn_min[cond] cond = (S[k] <= S[km]) & (H_dn_m > H_dn_p) D_dn[cond] = D_dn_max[cond] cond = (S[k] > S[km]) & (H_dn_m <= H_dn_p) D_dn[cond] = D_dn_max[cond] cond = (S[k] > S[km]) & (H_dn_m > H_dn_p) D_dn[cond] = D_dn_min[cond] # Eq. 37 dt_stab = c_stab * dx**2. / max(max(abs(D_up)), max(abs(D_dn))) dt_use = min(dt_stab, dt - stab_t) stab_t = stab_t + dt_use # explicit time stepping scheme, Eq. 36 div_q = (D_up * (S[kp] - S[k]) / dx - D_dn * (S[k] - S[km]) / dx) / dx # Eq. 35 S = S[k] + (m_dot + div_q) * dt_use # Eq. 7 S = np.maximum(S, B) # Done with the loop, prepare output fl.thick = S - B # Next step self.t += dt
def oggm_static_paths(): """Initialise the OGGM paths from the config file.""" global PATHS, PARAMS # See if the file is there, if not create it if not os.path.exists(CONFIG_FILE): dldir = os.path.join(os.path.expanduser('~'), 'OGGM') config = ConfigObj() config['dl_cache_dir'] = os.path.join(dldir, 'download_cache') config['dl_cache_readonly'] = False config['tmp_dir'] = os.path.join(dldir, 'tmp') config['rgi_dir'] = os.path.join(dldir, 'rgi') config['test_dir'] = os.path.join(dldir, 'tests') config['has_internet'] = True config.filename = CONFIG_FILE config.write() # OK, read in the file try: config = ConfigObj(CONFIG_FILE, file_error=True) except (ConfigObjError, IOError) as e: log.critical('Config file could not be parsed (%s): %s', CONFIG_FILE, e) sys.exit() # Check that all keys are here for k in [ 'dl_cache_dir', 'dl_cache_readonly', 'tmp_dir', 'rgi_dir', 'test_dir', 'has_internet' ]: if k not in config: raise InvalidParamsError('The oggm config file ({}) should have ' 'an entry for {}.'.format(CONFIG_FILE, k)) # Override defaults with env variables if available if os.environ.get('OGGM_DOWNLOAD_CACHE_RO') is not None: ro = bool(strtobool(os.environ.get('OGGM_DOWNLOAD_CACHE_RO'))) config['dl_cache_readonly'] = ro if os.environ.get('OGGM_DOWNLOAD_CACHE') is not None: config['dl_cache_dir'] = os.environ.get('OGGM_DOWNLOAD_CACHE') if os.environ.get('OGGM_EXTRACT_DIR') is not None: # This is for the directories where OGGM needs to extract things # On the cluster it might be useful to do it on a fast disc edir = os.path.abspath(os.environ.get('OGGM_EXTRACT_DIR')) config['tmp_dir'] = os.path.join(edir, 'tmp') config['rgi_dir'] = os.path.join(edir, 'rgi') # Fill the PATH dict for k, v in config.iteritems(): if not k.endswith('_dir'): continue PATHS[k] = os.path.abspath(os.path.expanduser(v)) # Other PARAMS.do_log = False PARAMS['has_internet'] = config.as_bool('has_internet') PARAMS['dl_cache_readonly'] = config.as_bool('dl_cache_readonly') PARAMS.do_log = True # Create cache dir if possible if not os.path.exists(PATHS['dl_cache_dir']): if not PARAMS['dl_cache_readonly']: os.makedirs(PATHS['dl_cache_dir'])
def sia_thickness(slope, width, flux, shape='rectangular', glen_a=None, fs=None, shape_factor=None): """Computes the ice thickness from mass-conservation. This is a utility function tested against the true OGGM inversion function. Useful for teaching and inversion with calving. Parameters ---------- slope : -np.gradient(hgt, dx) width : section width in m flux : mass flux in m3 s-1 shape : 'rectangular' or 'parabolic' glen_a : Glen A, defaults to PARAMS fs : sliding, defaults to PARAMS shape_factor: for lateral drag Returns ------- the ice thickness (in m) """ if glen_a is None: glen_a = cfg.PARAMS['inversion_glen_a'] if fs is None: fs = cfg.PARAMS['inversion_fs'] if shape not in ['parabolic', 'rectangular']: raise InvalidParamsError('shape must be `parabolic` or `rectangular`,' 'not: {}'.format(shape)) _inv_function = _inversion_simple if fs == 0 else _inversion_poly # Ice flow params fd = 2. / (cfg.PARAMS['glen_n'] + 2) * glen_a rho = cfg.PARAMS['ice_density'] # Clip the slope, in degrees clip_angle = cfg.PARAMS['min_slope'] # Clip slope to avoid negative and small slopes slope = utils.clip_array(slope, np.deg2rad(clip_angle), np.pi / 2.) # Convert the flux to m2 s-1 (averaged to represent the sections center) flux_a0 = 1 if shape == 'rectangular' else 1.5 flux_a0 *= flux / width # Polynomial factors (a5 = 1) a0 = -flux_a0 / ((rho * cfg.G * slope)**3 * fd) a3 = fs / fd # Inversion with shape factors? sf_func = None if shape_factor == 'Adhikari' or shape_factor == 'Nye': sf_func = utils.shape_factor_adhikari elif shape_factor == 'Huss': sf_func = utils.shape_factor_huss sf = np.ones(slope.shape) # Default shape factor is 1 if sf_func is not None: # Start iteration for shape factor with first guess of 1 i = 0 sf_diff = np.ones(slope.shape) # Some hard-coded factors here sf_tol = 1e-2 max_sf_iter = 20 while i < max_sf_iter and np.any(sf_diff > sf_tol): out_thick = _compute_thick(a0, a3, flux_a0, sf, _inv_function) is_rectangular = np.repeat(shape == 'rectangular', len(width)) sf_diff[:] = sf[:] sf = sf_func(width, out_thick, is_rectangular) sf_diff = sf_diff - sf i += 1 log.info('Shape factor {:s} used, took {:d} iterations for ' 'convergence.'.format(shape_factor, i)) return _compute_thick(a0, a3, flux_a0, sf, _inv_function)
def process_gcm_data(gdir, filesuffix='', prcp=None, temp=None, year_range=('1961', '1990'), scale_stddev=True, time_unit=None, calendar=None, source=''): """ Applies the anomaly method to GCM climate data This function can be applied to any GCM data, if it is provided in a suitable :py:class:`xarray.DataArray`. See Parameter description for format details. For CESM-LME a specific function :py:func:`tasks.process_cesm_data` is available which does the preprocessing of the data and subsequently calls this function. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` where to write the data filesuffix : str append a suffix to the filename (useful for ensemble experiments). prcp : :py:class:`xarray.DataArray` | monthly total precipitation [mm month-1] | Coordinates: | lat float64 | lon float64 | time: cftime object temp : :py:class:`xarray.DataArray` | monthly temperature [K] | Coordinates: | lat float64 | lon float64 | time cftime object year_range : tuple of str the year range for which you want to compute the anomalies. Default is `('1961', '1990')` scale_stddev : bool whether or not to scale the temperature standard deviation as well time_unit : str The unit conversion for NetCDF files. It must be adapted to the length of the time series. The default is to choose it ourselves based on the starting year. For example: 'days since 0850-01-01 00:00:00' calendar : str If you use an exotic calendar (e.g. 'noleap') source : str For metadata: the source of the climate data """ # Standard sanity checks months = temp['time.month'] if months[0] != 1: raise ValueError('We expect the files to start in January!') if months[-1] < 10: raise ValueError('We expect the files to end in December!') if (np.abs(temp['lon']) > 180) or (np.abs(prcp['lon']) > 180): raise ValueError('We expect the longitude coordinates to be within ' '[-180, 180].') # from normal years to hydrological years sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] if sm != 1: prcp = prcp[sm - 1:sm - 13].load() temp = temp[sm - 1:sm - 13].load() assert len(prcp) // 12 == len( prcp) / 12, 'Somehow we didn\'t get full years' assert len(temp) // 12 == len( temp) / 12, 'Somehow we didn\'t get full years' # Get the reference data to apply the anomaly to fpath = gdir.get_filepath('climate_historical') with xr.open_dataset(fpath) as ds_ref: ds_ref = ds_ref.sel(time=slice(*year_range)) # compute monthly anomalies # of temp if scale_stddev: # This is a bit more arithmetic ts_tmp_sel = temp.sel(time=slice(*year_range)) if len(ts_tmp_sel) // 12 != len(ts_tmp_sel) / 12: raise InvalidParamsError('year_range cannot contain the first' 'or last calendar year in the series') if ((len(ts_tmp_sel) // 12) % 2) == 1: raise InvalidParamsError('We need an even number of years ' 'for this to work') ts_tmp_std = ts_tmp_sel.groupby('time.month').std(dim='time') std_fac = ds_ref.temp.groupby('time.month').std( dim='time') / ts_tmp_std if sm != 1: # Just to avoid useless roll std_fac = std_fac.roll(month=13 - sm, roll_coords=True) std_fac = np.tile(std_fac.data, len(temp) // 12) # We need an even number of years for this to work win_size = len(ts_tmp_sel) + 1 def roll_func(x, axis=None): x = x[:, ::12] n = len(x[0, :]) // 2 xm = np.nanmean(x, axis=axis) return xm + (x[:, n] - xm) * std_fac temp = temp.rolling(time=win_size, center=True, min_periods=1).reduce(roll_func) ts_tmp_sel = temp.sel(time=slice(*year_range)) if len(ts_tmp_sel.time) != len(ds_ref.time): raise InvalidParamsError('The reference climate period and the ' 'GCM period after window selection do ' 'not match.') ts_tmp_avg = ts_tmp_sel.groupby('time.month').mean(dim='time') ts_tmp = temp.groupby('time.month') - ts_tmp_avg # of precip -- scaled anomalies ts_pre_avg = prcp.sel(time=slice(*year_range)) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre_ano = prcp.groupby('time.month') - ts_pre_avg # scaled anomalies is the default. Standard anomalies above # are used later for where ts_pre_avg == 0 ts_pre = prcp.groupby('time.month') / ts_pre_avg # for temp loc_tmp = ds_ref.temp.groupby('time.month').mean() ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = ds_ref.prcp.groupby('time.month').mean() # scaled anomalies ts_pre = ts_pre.groupby('time.month') * loc_pre # standard anomalies ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre # Correct infinite values with standard anomalies ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values, ts_pre_ano.values) # The previous step might create negative values (unlikely). Clip them ts_pre.values = utils.clip_min(ts_pre.values, 0) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) gdir.write_monthly_climate_file(temp.time.values, ts_pre.values, ts_tmp.values, float(ds_ref.ref_hgt), prcp.lon.values, prcp.lat.values, time_unit=time_unit, calendar=calendar, file_name='gcm_data', source=source, filesuffix=filesuffix)
def parse_args(args): """Check input arguments and env variables""" # CLI args description = ('Generate the preprocessed OGGM glacier directories for ' 'this OGGM version.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--map-border', type=int, help='the size of the map border. Is required if ' '$OGGM_MAP_BORDER is not set.') parser.add_argument('--rgi-reg', type=str, help='the rgi region to process. Is required if ' '$OGGM_RGI_REG is not set.') parser.add_argument('--rgi-version', type=str, help='the RGI version to use. Defaults to the OGGM ' 'default.') parser.add_argument('--start-level', type=int, default=0, help='the pre-processed level to start from (default ' 'is to start from 0). If set, you will need to ' 'indicate --start-base-url as well.') parser.add_argument('--start-base-url', type=str, help='the pre-processed base-url to fetch the data ' 'from when starting from level > 0.') parser.add_argument('--max-level', type=int, default=5, help='the maximum level you want to run the ' 'pre-processing for (1, 2, 3, 4 or 5).') parser.add_argument('--working-dir', type=str, help='path to the directory where to write the ' 'output. Defaults to current directory or ' '$OGGM_WORKDIR.') parser.add_argument('--params-file', type=str, help='path to the OGGM parameter file to use in place ' 'of the default one.') parser.add_argument('--ref-tstars-base-url', type=str, help='the url where to find the pre-calibrated ' 'reference tstar list. Required as of v1.4.') parser.add_argument('--output', type=str, help='path to the directory where to write the ' 'output. Defaults to current directory or ' '$OGGM_OUTDIR.') parser.add_argument('--logging-level', type=str, default='WORKFLOW', help='the logging level to use (DEBUG, INFO, WARNING, ' 'WORKFLOW).') parser.add_argument('--elev-bands', nargs='?', const=True, default=False, help='compute the flowlines based on the Huss&Hock ' '2015 method instead of the OGGM default, which is ' 'a mix of elev_bands and centerlines.') parser.add_argument('--centerlines-only', nargs='?', const=True, default=False, help='compute the flowlines based on the OGGM ' 'centerline(s) method instead of the OGGM ' 'default, which is a mix of elev_bands and ' 'centerlines.') parser.add_argument('--match-regional-geodetic-mb', type=str, default='', help='match regional SMB values to geodetic estimates ' '(currently hugonnet: Hugonnet et al., 2020, or ' 'zemp: Zemp et al, 2019) ' 'by shifting the SMB residual.') parser.add_argument('--match-geodetic-mb-per-glacier', type=str, default='', help='match SMB values to geodetic estimates ' '(currently hugonnet: Hugonnet et al., ' '2020 only.') parser.add_argument('--evolution-model', type=str, default='fl_sia', help='which geometry evolution model to use: ' '`fl_sia` (default), or `massredis` (mass ' 'redistribution curve).') parser.add_argument('--dem-source', type=str, default='', help='which DEM source to use. Possible options are ' 'the name of a specific DEM (e.g. RAMP, SRTM...) ' 'or ALL, in which case all available DEMs will ' 'be processed and adjoined with a suffix at the ' 'end of the file name. The ALL option is only ' 'compatible with level 1 folders, after which ' 'the processing will stop. The default is to use ' 'the default OGGM DEM.') parser.add_argument('--add-consensus', nargs='?', const=True, default=False, help='adds (reprojects) the consensus estimates ' 'thickness to the glacier directories. ' 'With --elev-bands, the data will also be ' 'binned.') parser.add_argument('--demo', nargs='?', const=True, default=False, help='if you want to run the prepro for the ' 'list of demo glaciers.') parser.add_argument('--test', nargs='?', const=True, default=False, help='if you want to do a test on a couple of ' 'glaciers first.') parser.add_argument('--test-ids', nargs='+', help='if --test, specify the RGI ids to run separated ' 'by a space (default: 4 randomly selected).') parser.add_argument('--disable-dl-verify', nargs='?', const=True, default=False, help='if used OGGM downloads will not be verified ' 'against a hash sum.') parser.add_argument('--disable-mp', nargs='?', const=True, default=False, help='if you want to disable multiprocessing.') parser.add_argument('--dynamic_spinup', type=str, default='', help="include a dynamic spinup for matching 'area' OR " "'volume' at the RGI-date") args = parser.parse_args(args) # Check input rgi_reg = args.rgi_reg if args.demo: rgi_reg = 0 if not rgi_reg and not args.demo: rgi_reg = os.environ.get('OGGM_RGI_REG', None) if rgi_reg is None: raise InvalidParamsError('--rgi-reg is required!') rgi_reg = '{:02}'.format(int(rgi_reg)) ok_regs = ['{:02}'.format(int(r)) for r in range(1, 20)] if not args.demo and rgi_reg not in ok_regs: raise InvalidParamsError('--rgi-reg should range from 01 to 19!') rgi_version = args.rgi_version border = args.map_border if not border: border = os.environ.get('OGGM_MAP_BORDER', None) if border is None: raise InvalidParamsError('--map-border is required!') working_dir = args.working_dir if not working_dir: working_dir = os.environ.get('OGGM_WORKDIR', '') output_folder = args.output if not output_folder: output_folder = os.environ.get('OGGM_OUTDIR', '') border = int(border) output_folder = os.path.abspath(output_folder) working_dir = os.path.abspath(working_dir) # All good return dict( rgi_version=rgi_version, rgi_reg=rgi_reg, border=border, output_folder=output_folder, working_dir=working_dir, params_file=args.params_file, is_test=args.test, test_ids=args.test_ids, demo=args.demo, dem_source=args.dem_source, start_level=args.start_level, start_base_url=args.start_base_url, max_level=args.max_level, disable_mp=args.disable_mp, logging_level=args.logging_level, elev_bands=args.elev_bands, centerlines_only=args.centerlines_only, match_regional_geodetic_mb=args.match_regional_geodetic_mb, match_geodetic_mb_per_glacier=args.match_geodetic_mb_per_glacier, add_consensus=args.add_consensus, disable_dl_verify=args.disable_dl_verify, ref_tstars_base_url=args.ref_tstars_base_url, evolution_model=args.evolution_model, dynamic_spinup=False if args.dynamic_spinup == '' else args.dynamic_spinup, )
def parse_args(args): """Check input arguments and env variables""" # CLI args description = ('Run an OGGM benchmark on a selected RGI Region. ' 'This writes a benchmark_{border}.txt file where ' 'the results are summarized') parser = argparse.ArgumentParser(description=description) parser.add_argument('--map-border', type=int, help='the size of the map border. Is required if ' '$OGGM_MAP_BORDER is not set.') parser.add_argument('--rgi-reg', type=str, help='the rgi region to process. Is required if ' '$OGGM_RGI_REG is not set.') parser.add_argument('--rgi-version', type=str, help='the RGI version to use. Defaults to the OGGM ' 'default.') parser.add_argument('--working-dir', type=str, help='path to the directory where to write the ' 'output. Defaults to current directory or ' '$OGGM_WORKDIR.') parser.add_argument('--output', type=str, help='path to the directory where to write the ' 'output. Defaults to current directory or' '$OGGM_OUTDIR.') parser.add_argument('--test', nargs='?', const=True, default=False, help='if you want to do a test on a couple of ' 'glaciers first.') args = parser.parse_args(args) # Check input rgi_reg = args.rgi_reg if not rgi_reg: rgi_reg = os.environ.get('OGGM_RGI_REG', None) if rgi_reg is None: raise InvalidParamsError('--rgi-reg is required!') rgi_reg = '{:02}'.format(int(rgi_reg)) rgi_version = args.rgi_version border = args.map_border if not border: border = os.environ.get('OGGM_MAP_BORDER', None) if border is None: raise InvalidParamsError('--map-border is required!') working_dir = args.working_dir if not working_dir: working_dir = os.environ.get('OGGM_WORKDIR', '') output_folder = args.output if not output_folder: output_folder = os.environ.get('OGGM_OUTDIR', '') border = int(border) output_folder = os.path.abspath(output_folder) working_dir = os.path.abspath(working_dir) # All good return dict(rgi_version=rgi_version, rgi_reg=rgi_reg, border=border, output_folder=output_folder, working_dir=working_dir, is_test=args.test)
def _get_tempformelt(self, temp, pok): """ Helper function to compute tempformelt to avoid code duplication in get_monthly_climate() and _get2d_annual_climate() If using this again outside of this class, need to remove the "self", such as for 'mb_climate_on_height' in climate.py, that has no self.... (would need to change temp, t_melt ,temp_std, mb_type, N, loop) Input: stuff that is different for the different methods temp: temperature time series pok: indices of time series Returns ------- (tempformelt) """ tempformelt_without_std = temp - self.t_melt # computations change only if 'mb_daily' as mb_type! if self.mb_type == 'mb_monthly' or self.mb_type == 'mb_real_daily': tempformelt = tempformelt_without_std elif self.mb_type == 'mb_daily': itemp_std = self.temp_std[pok] tempformelt_with_std = np.full(np.shape(tempformelt_without_std), np.NaN) # matrix with N values that are distributed around 0 # showing how much fake 'daily' values vary from the mean z_scores_mean = stats.norm.ppf( np.arange(1 / self.N - 1 / (2 * self.N), 1, 1 / self.N)) z_std = np.matmul( np.atleast_2d(z_scores_mean).T, np.atleast_2d(itemp_std)) # there are two possibilities, # not using the loop is most of the times faster if self.loop is False: # without the loop: but not much faster .. tempformelt_daily = np.atleast_3d(tempformelt_without_std).T + \ np.atleast_3d(z_std) clip_min(tempformelt_daily, 0, out=tempformelt_daily) tempformelt_with_std = tempformelt_daily.mean(axis=0).T else: shape_tfm = np.shape(tempformelt_without_std) tempformelt_with_std = np.full(shape_tfm, np.NaN) z_std = np.matmul( np.atleast_2d(z_scores_mean).T, np.atleast_2d(itemp_std)) for h in np.arange(0, np.shape(tempformelt_without_std)[0]): h_tfm_daily_ = np.atleast_2d(tempformelt_without_std[h, :]) h_tempformelt_daily = h_tfm_daily_ + z_std clip_min(h_tempformelt_daily, 0, out=h_tempformelt_daily) h_tempformelt_monthly = h_tempformelt_daily.mean(axis=0) tempformelt_with_std[h, :] = h_tempformelt_monthly tempformelt = tempformelt_with_std else: raise InvalidParamsError('mb_type can only be "mb_monthly,\ mb_daily or mb_real_daily" ') # replace all values below zero to zero clip_min(tempformelt, 0, out=tempformelt) return tempformelt
def __init__(self, gdir, mu_star, bias=0, mb_type='mb_daily', N=10000, loop=False, grad_type='cte', filename='climate_historical', input_filesuffix='', repeat=False, ys=None, ye=None, t_solid=0, t_liq=2, t_melt=0, prcp_fac=2.5, default_grad=-0.0065, temp_local_gradient_bounds=[-0.009, -0.003], SEC_IN_YEAR=SEC_IN_YEAR, SEC_IN_MONTH=SEC_IN_MONTH): """ Initialize. Parameters ---------- gdir : GlacierDirectory the glacier directory mu_star : float monthly temperature sensitivity (kg /m² /mth /K), need to be prescribed, e.g. such that |mean(MODEL_MB)-mean(REF_MB)|--> 0 bias : float, optional default is to use zero bias [mm we yr-1] you want to use (the default is to use zero bias) Note that this bias is *substracted* from the computed MB. Indeed: BIAS = MODEL_MB - REFERENCE_MB. mb_type: str three types: 'mb_daily' (default: use temp_std and N percentiles), 'mb_monthly' (same as default OGGM mass balance), 'mb_real_daily' (use daily temperature values). the mb_type only work if the baseline_climate of gdir is right N : int number of percentiles used to generate gaussian-like daily temperatures from daily std and mean monthly temp loop : bool the way how the matrix multiplication is done, using np.matmul or a loop(default: False) only applied if mb_type is 'mb_daily' which one is faster? grad_type : str three types of applying the temperature gradient: 'cte' (default, constant lapse rate, set to default_grad, same as in default OGGM) 'var_an_cycle' (varies spatially and over annual cycle, but constant over the years) 'var' (varies spatially & temporally as in the climate files) filename : str, optional set to a different BASENAME if you want to use alternative climate data, default is climate_historical input_filesuffix : str, the file suffix of the input climate file, default is '', if ERA5_daily with daily temperatures, it is set to _daily repeat : bool Whether the climate period given by [ys, ye] should be repeated indefinitely in a circular way ys : int The start of the climate period where the MB model is valid (default: the period with available data) ye : int The end of the climate period where the MB model is valid (default: the period with available data) t_solid : float temperature threshold for solid precipitation (degree Celsius, default 0) t_liq: float temperature threshold for liquid precipitation (degree Celsius, default 2) t_melt : float temperature threshold where snow/ice melts (degree Celsius, default 0) prcp_fac : float, >0 multiplicative precipitation correction factor (default 2.5) default_grad : float, constant lapse rate (temperature gradient, default: -0.0065 m/K) if grad_type != cte, then this value is not used but instead the changing lapse rate from the climate datasets temp_local_gradient_bounds : [float, float], if grad_type != cte and the lapse rate does not lie in this range, set it instead to these minimum, maximum gradients (default: [-0.009, -0.003] m/K) SEC_IN_YEAR: float seconds in a year (default: 31536000s), maybe this could be changed SEC_IN_MONTH: float seconds in a month (default: 2628000s), maybe this could be changed as not each month has the same amount of seconds, in February can be a difference of 8% Attributes ---------- temp_bias : float, default 0 Add a temperature bias to the time series prcp_bias : float, default 1 Precipitation factor to the time series (called bias for consistency with `temp_bias`) """ self.mu_star = mu_star self.bias = bias # Parameters (from cfg.PARAMS in OGGM default) self.t_solid = t_solid self.t_liq = t_liq self.t_melt = t_melt self.N = N self.mb_type = mb_type self.loop = loop self.grad_type = grad_type # default rho is 900 kg/m3 self.rho = cfg.PARAMS['ice_density'] # Public attrs self.hemisphere = gdir.hemisphere self.temp_bias = 0. self.prcp_bias = 1. self.repeat = repeat self.SEC_IN_YEAR = SEC_IN_YEAR self.SEC_IN_MONTH = SEC_IN_MONTH # check if the right climate is used for the right mb_type # these checks might be changed if there are more climate datasets # available!!! # only have daily temperatures for 'ERA5_daily' baseline_climate = gdir.get_climate_info()['baseline_climate_source'] if (self.mb_type == 'mb_real_daily' and baseline_climate != 'ERA5_daily'): text = ('wrong climate for mb_real_daily, need to do e.g. ' 'process_era5_daily_data(gd) to enable ERA5_daily') raise InvalidParamsError(text) # mb_monthly does not work when daily temperatures are used if self.mb_type == 'mb_monthly' and baseline_climate == 'ERA5_daily': text = ('wrong climate for mb_monthly, need to do e.g.' 'oggm.shop.ecmwf.process_ecmwf_data(gd, dataset="ERA5dr")') raise InvalidParamsError(text) # mb_daily needs temp_std if self.mb_type == 'mb_daily' and baseline_climate == 'ERA5_daily': text = 'wrong climate for mb_daily, need to do e.g. \ oggm.shop.ecmwf.process_ecmwf_data(gd, dataset = "ERA5dr")' raise InvalidParamsError(text) if baseline_climate == 'ERA5_daily': input_filesuffix = '_daily' # Read climate file fpath = gdir.get_filepath(filename, filesuffix=input_filesuffix) # used xarray instead of netCDF4, is this slower? with xr.open_dataset(fpath) as xr_nc: if self.mb_type == 'mb_real_daily' or self.mb_type == 'mb_monthly': # even if there is temp_std inside the dataset, we won't use # it for these mb_types self.temp_std = np.NaN else: try: self.temp_std = xr_nc['temp_std'].values except KeyError: text = ('The applied climate has no temp std, do e.g.' 'oggm.shop.ecmwf.process_ecmwf_data' '(gd, dataset="ERA5dr")') raise InvalidParamsError(text) # goal is to get self.years/self.months in hydro_years if self.mb_type != 'mb_real_daily': time = xr_nc.time ny, r = divmod(len(time), 12) if r != 0: raise ValueError('Climate data should be N full years') # This is where we switch to hydro float year format # Last year gives the tone of the hydro year self.years = np.repeat( np.arange(xr_nc.time[-1].dt.year - ny + 1, xr_nc.time[-1].dt.year + 1), 12) self.months = np.tile(np.arange(1, 13), ny) elif self.mb_type == 'mb_real_daily': # use pandas to convert month/year to hydro_years # this has to be done differently than above because not # every month, year has the same amount of days pd_test = pd.DataFrame(xr_nc.time.to_series().dt.year.values, columns=['year']) pd_test.index = xr_nc.time.to_series().values pd_test['month'] = xr_nc.time.to_series().dt.month.values pd_test['hydro_year'] = np.NaN # get the month where the hydrological month starts # as chosen from the gdir climate file # default 10 for 'nh', 4 for 'sh' hydro_month_start = int(xr_nc.time[0].dt.month.values) if hydro_month_start == 1: # hydro_year corresponds to normal year pd_test.loc[pd_test.index.month >= hydro_month_start, 'hydro_year'] = pd_test['year'] else: pd_test.loc[pd_test.index.month < hydro_month_start, 'hydro_year'] = pd_test['year'] # otherwise, those days with a month>=hydro_month_start # belong to the next hydro_year pd_test.loc[pd_test.index.month >= hydro_month_start, 'hydro_year'] = pd_test['year'] + 1 # month_hydro is 1 if it is hydro_month_start month_hydro = pd_test['month'].values + (12 - hydro_month_start + 1) month_hydro[month_hydro > 12] += -12 pd_test['hydro_month'] = month_hydro pd_test = pd_test.astype('int') self.years = pd_test['hydro_year'].values ny = self.years[-1] - self.years[0] + 1 self.months = pd_test['hydro_month'].values # Read timeseries self.temp = xr_nc['temp'].values self.prcp = xr_nc['prcp'].values * prcp_fac # lapse rate (temperature gradient) if self.grad_type == 'var' or self.grad_type == 'var_an_cycle': try: grad = xr_nc['gradient'].values # Security for stuff that can happen with local gradients g_minmax = temp_local_gradient_bounds # if gradient is not a number, or positive/negative # infinity, use the default gradient grad = np.where(~np.isfinite(grad), default_grad, grad) # if outside boundaries of default -0.009 and above # -0.003 -> use the boundaries instead grad = clip_array(grad, g_minmax[0], g_minmax[1]) if self.grad_type == 'var_an_cycle': # if we want constant lapse rates over the years # that change over the annual cycle, but not over time if self.mb_type == 'mb_real_daily': grad_gb = xr_nc['gradient'].groupby('time.month') grad = grad_gb.mean().values g_minmax = temp_local_gradient_bounds # if gradient is not a number, or positive/negative # infinity, use the default gradient grad = np.where(~np.isfinite(grad), default_grad, grad) # if outside boundaries of default -0.009 and above # -0.003 -> use the boundaries instead grad = clip_array(grad, g_minmax[0], g_minmax[1]) stack_grad = grad.reshape(-1, 12) grad = np.tile(stack_grad.mean(axis=0), ny) reps_day1 = xr_nc.time[xr_nc.time.dt.day == 1] reps = reps_day1.dt.daysinmonth grad = np.repeat(grad, reps) else: stack_grad = grad.reshape(-1, 12) grad = np.tile(stack_grad.mean(axis=0), ny) except KeyError: text = ('there is no gradient available in chosen climate' 'file, try instead e.g. ERA5_daily or ERA5dr e.g.' 'oggm.shop.ecmwf.process_ecmwf_data' '(gd, dataset="ERA5dr")') raise InvalidParamsError(text) elif self.grad_type == 'cte': # if grad_type is chosen cte, we use the default_grad! grad = self.prcp * 0 + default_grad else: raise InvalidParamsError('grad_type can be either cte,' 'var or var_an_cycle') self.grad = grad self.ref_hgt = xr_nc.ref_hgt self.ys = self.years[0] if ys is None else ys self.ye = self.years[-1] if ye is None else ye
def init_glacier_regions(rgidf=None, *, reset=False, force=False, from_prepro_level=None, prepro_border=None, prepro_rgi_version=None, prepro_base_url=None, from_tar=False, delete_tar=False, use_demo_glaciers=None): """DEPRECATED: Initializes the list of Glacier Directories for this run. This is the very first task to do (always). If the directories are already available in the working directory, use them. If not, create new ones. Parameters ---------- rgidf : GeoDataFrame or list of ids, optional for pre-computed runs the RGI glacier outlines. If unavailable, OGGM will parse the information from the glacier directories found in the working directory. It is required for new runs. reset : bool delete the existing glacier directories if found. force : bool setting `reset=True` will trigger a yes/no question to the user. Set `force=True` to avoid this. from_prepro_level : int get the gdir data from the official pre-processed pool. See the documentation for more information prepro_border : int for `from_prepro_level` only: if you want to override the default behavior which is to use `cfg.PARAMS['border']` prepro_rgi_version : str for `from_prepro_level` only: if you want to override the default behavior which is to use `cfg.PARAMS['rgi_version']` prepro_base_url : str for `from_prepro_level` only: if you want to override the default URL from which to download the gdirs. Default currently is https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.1/ use_demo_glaciers : bool whether to check the demo glaciers for download (faster than the standard prepro downloads). The default is to decide whether or not to check based on simple criteria such as glacier list size. from_tar : bool, default=False extract the gdir data from a tar file. If set to `True`, will check for a tar file at the expected location in `base_dir`. delete_tar : bool, default=False delete the original tar file after extraction. delete_tar : bool, default=False delete the original tar file after extraction. Returns ------- gdirs : list of :py:class:`oggm.GlacierDirectory` objects the initialised glacier directories Notes ----- This task is deprecated in favor of the more explicit init_glacier_directories. Indeed, init_glacier_directories is very similar to init_glacier_regions, but it does not process the DEMs: a glacier directory is valid also without DEM. """ _check_duplicates(rgidf) if reset and not force: reset = utils.query_yes_no('Delete all glacier directories?') if prepro_border is None: prepro_border = int(cfg.PARAMS['border']) if from_prepro_level and prepro_border not in [10, 80, 160, 250]: if 'test' not in utils._downloads.GDIR_URL: raise InvalidParamsError("prepro_border or cfg.PARAMS['border'] " "should be one of: 10, 80, 160, 250.") # if reset delete also the log directory if reset: fpath = os.path.join(cfg.PATHS['working_dir'], 'log') if os.path.exists(fpath): rmtree(fpath) gdirs = [] new_gdirs = [] if rgidf is None: if reset: raise ValueError('Cannot use reset without setting rgidf') log.workflow('init_glacier_regions by parsing available folders ' '(can be slow).') # The dirs should be there already gl_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier') for root, _, files in os.walk(gl_dir): if files and ('dem.tif' in files): gdirs.append(oggm.GlacierDirectory(os.path.basename(root))) else: # Check if dataframe or list of strs try: entities = [] for _, entity in rgidf.iterrows(): entities.append(entity) except AttributeError: entities = utils.tolist(rgidf) # Check demo if use_demo_glaciers is None: use_demo_glaciers = len(entities) < 100 if from_prepro_level is not None: log.workflow('init_glacier_regions from prepro level {} on ' '{} glaciers.'.format(from_prepro_level, len(entities))) # Read the hash dictionary before we use multiproc if cfg.PARAMS['dl_verify']: utils.get_dl_verify_data('cluster.klima.uni-bremen.de') gdirs = execute_entity_task(gdir_from_prepro, entities, from_prepro_level=from_prepro_level, prepro_border=prepro_border, prepro_rgi_version=prepro_rgi_version, check_demo_glacier=use_demo_glaciers, base_url=prepro_base_url) else: # We can set the intersects file automatically here if (cfg.PARAMS['use_intersects'] and len(cfg.PARAMS['intersects_gdf']) == 0): rgi_ids = np.unique(np.sort([entity.RGIId for entity in entities])) rgi_version = rgi_ids[0].split('-')[0][-2:] fp = utils.get_rgi_intersects_entities(rgi_ids, version=rgi_version) cfg.set_intersects_db(fp) gdirs = execute_entity_task(utils.GlacierDirectory, entities, reset=reset, from_tar=from_tar, delete_tar=delete_tar) for gdir in gdirs: if not os.path.exists(gdir.get_filepath('dem')): new_gdirs.append(gdir) if len(new_gdirs) > 0: # If not initialized, run the task in parallel execute_entity_task(tasks.define_glacier_region, new_gdirs) return gdirs
def sia_thickness_via_optim(slope, width, flux, shape='rectangular', glen_a=None, fs=None, t_lambda=None): """Compute the thickness numerically instead of analytically. It's the only way that works for trapezoid shapes. Parameters ---------- slope : -np.gradient(hgt, dx) width : section width in m flux : mass flux in m3 s-1 shape : 'rectangular', 'trapezoid' or 'parabolic' glen_a : Glen A, defaults to PARAMS fs : sliding, defaults to PARAMS t_lambda: the trapezoid lambda, defaults to PARAMS Returns ------- the ice thickness (in m) """ if len(np.atleast_1d(slope)) > 1: shape = utils.tolist(shape, len(slope)) t_lambda = utils.tolist(t_lambda, len(slope)) out = [] for sl, w, f, s, t in zip(slope, width, flux, shape, t_lambda): out.append( sia_thickness_via_optim(sl, w, f, shape=s, glen_a=glen_a, fs=fs, t_lambda=t)) return np.asarray(out) # Sanity if flux <= 0: return 0 if width <= MIN_WIDTH_FOR_INV: return 0 if glen_a is None: glen_a = cfg.PARAMS['inversion_glen_a'] if fs is None: fs = cfg.PARAMS['inversion_fs'] if t_lambda is None: t_lambda = cfg.PARAMS['trapezoid_lambdas'] if shape not in ['parabolic', 'rectangular', 'trapezoid']: raise InvalidParamsError('shape must be `parabolic`, `trapezoid` ' 'or `rectangular`, not: {}'.format(shape)) # Ice flow params n = cfg.PARAMS['glen_n'] fd = 2 / (n + 2) * glen_a rho = cfg.PARAMS['ice_density'] rhogh = (rho * cfg.G * slope)**n # To avoid geometrical inconsistencies max_h = width / t_lambda if shape == 'trapezoid' else 1e4 def to_minimize(h): u = (h**(n + 1)) * fd * rhogh + (h**(n - 1)) * fs * rhogh if shape == 'parabolic': sect = 2. / 3. * width * h elif shape == 'trapezoid': w0m = width - t_lambda * h sect = (width + w0m) / 2 * h else: sect = width * h return sect * u - flux out_h, r = optimize.brentq(to_minimize, 0, max_h, full_output=True) return out_h
def process_histalp_data(gdir): """Processes and writes the HISTALP baseline climate data for this glacier. Extracts the nearest timeseries and writes everything to a NetCDF file. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process """ if cfg.PATHS.get('climate_file', None): warnings.warn("You seem to have set a custom climate file for this " "run, but are using the default HISTALP climate file " "instead.") if cfg.PARAMS['baseline_climate'] != 'HISTALP': raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be " "set to HISTALP.") # read the time out of the pure netcdf file ft = get_histalp_file('tmp') fp = get_histalp_file('pre') with utils.ncDataset(ft) as nc: vt = nc.variables['time'] assert vt[0] == 0 assert vt[-1] == vt.shape[0] - 1 t0 = vt.units.split(' since ')[1][:7] time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') with utils.ncDataset(fp) as nc: vt = nc.variables['time'] assert vt[0] == 0.5 assert vt[-1] == vt.shape[0] - .5 t0 = vt.units.split(' since ')[1][:7] time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') # Now open with salem nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t) nc_ts_pre = salem.GeoNetcdf(fp, time=time_p) # set temporal subset for the ts data (hydro years) # the reference time is given by precip, which is shorter sm = cfg.PARAMS['hydro_month_nh'] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0, y1 = yrs[0], yrs[-1] if cfg.PARAMS['baseline_y0'] != 0: y0 = cfg.PARAMS['baseline_y0'] if cfg.PARAMS['baseline_y1'] != 0: y1 = cfg.PARAMS['baseline_y1'] nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # Units assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in [ 'm', 'meters', 'meter', 'metres', 'metre' ] assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in [ 'degc', 'degrees', 'degrees celcius', 'degree', 'c' ] assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in [ 'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter' ] # geoloc lon = gdir.cenlon lat = gdir.cenlat nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # read the data temp = nc_ts_tmp.get_vardata('T_2M') prcp = nc_ts_pre.get_vardata('TOT_PREC') hgt = nc_ts_tmp.get_vardata('HSURF') ref_lon = nc_ts_tmp.get_vardata('lon') ref_lat = nc_ts_tmp.get_vardata('lat') source = nc_ts_tmp._nc.title[:7] nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] igrad = None if use_grad: igrad = np.zeros(len(time)) * np.NaN for t, loct in enumerate(temp): slope, _, _, p_val, _ = stats.linregress(hgt.flatten(), loct.flatten()) igrad[t] = slope if (p_val < 0.01) else np.NaN gdir.write_monthly_climate_file(time, prcp[:, 1, 1], temp[:, 1, 1], hgt[1, 1], ref_lon[1], ref_lat[1], gradient=igrad) # metadata out = { 'baseline_climate_source': source, 'baseline_hydro_yr_0': y0 + 1, 'baseline_hydro_yr_1': y1 } gdir.write_json(out, 'climate_info')
def process_histalp_data(gdir, y0=None, y1=None, output_filesuffix=None): """Processes and writes the HISTALP baseline climate data for this glacier. Extracts the nearest timeseries and writes everything to a NetCDF file. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process y0 : int the starting year of the timeseries to write. The default is to take 1850 (because the data is quite bad before that) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) """ if cfg.PATHS.get('climate_file', None): warnings.warn("You seem to have set a custom climate file for this " "run, but are using the default HISTALP climate file " "instead.") if cfg.PARAMS['baseline_climate'] != 'HISTALP': raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be " "set to HISTALP.") # read the time out of the pure netcdf file ft = get_histalp_file('tmp') fp = get_histalp_file('pre') with utils.ncDataset(ft) as nc: vt = nc.variables['time'] assert vt[0] == 0 assert vt[-1] == vt.shape[0] - 1 t0 = vt.units.split(' since ')[1][:7] time_t = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') with utils.ncDataset(fp) as nc: vt = nc.variables['time'] assert vt[0] == 0.5 assert vt[-1] == vt.shape[0] - .5 t0 = vt.units.split(' since ')[1][:7] time_p = pd.date_range(start=t0, periods=vt.shape[0], freq='MS') # Now open with salem nc_ts_tmp = salem.GeoNetcdf(ft, time=time_t) nc_ts_pre = salem.GeoNetcdf(fp, time=time_p) # Some default if y0 is None: y0 = 1850 # set temporal subset for the ts data (hydro years) # the reference time is given by precip, which is shorter sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # Units assert nc_ts_tmp._nc.variables['HSURF'].units.lower() in [ 'm', 'meters', 'meter', 'metres', 'metre' ] assert nc_ts_tmp._nc.variables['T_2M'].units.lower() in [ 'degc', 'degrees', 'degrees celcius', 'degree', 'c' ] assert nc_ts_pre._nc.variables['TOT_PREC'].units.lower() in [ 'kg m-2', 'l m-2', 'mm', 'millimeters', 'millimeter' ] # geoloc lon = gdir.cenlon lat = gdir.cenlat nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # read the data temp = nc_ts_tmp.get_vardata('T_2M') prcp = nc_ts_pre.get_vardata('TOT_PREC') hgt = nc_ts_tmp.get_vardata('HSURF') ref_lon = nc_ts_tmp.get_vardata('lon') ref_lat = nc_ts_tmp.get_vardata('lat') source = nc_ts_tmp._nc.title[:7] nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] igrad = None if use_grad: igrad = np.zeros(len(time)) * np.NaN for t, loct in enumerate(temp): slope, _, _, p_val, _ = stats.linregress(hgt.flatten(), loct.flatten()) igrad[t] = slope if (p_val < 0.01) else np.NaN gdir.write_monthly_climate_file(time, prcp[:, 1, 1], temp[:, 1, 1], hgt[1, 1], ref_lon[1], ref_lat[1], gradient=igrad, filesuffix=output_filesuffix, source=source)
def process_cru_data(gdir, tmp_file=None, pre_file=None, y0=None, y1=None, output_filesuffix=None): """Processes and writes the CRU baseline climate data for this glacier. Interpolates the CRU TS data to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process tmp_file : str path to the CRU temperature file (defaults to the current OGGM chosen CRU version) pre_file : str path to the CRU precip file (defaults to the current OGGM chosen CRU version) y0 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) """ if cfg.PATHS.get('climate_file', None): warnings.warn("You seem to have set a custom climate file for this " "run, but are using the default CRU climate " "file instead.") if cfg.PARAMS['baseline_climate'] != 'CRU': raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be " "set to CRU") # read the climatology ncclim = salem.GeoNetcdf(get_cru_cl_file()) # and the TS data if tmp_file is None: tmp_file = get_cru_file('tmp') if pre_file is None: pre_file = get_cru_file('pre') nc_ts_tmp = salem.GeoNetcdf(tmp_file, monthbegin=True) nc_ts_pre = salem.GeoNetcdf(pre_file, monthbegin=True) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('(%s) I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. # Should we compute the gradient? use_grad = cfg.PARAMS['temp_use_local_gradient'] ts_grad = None if use_grad and len(hgt_f) >= 5: ts_grad = np.zeros(12) * np.NaN for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else np.NaN # convert to a timeseries and hydrological years ts_grad = ts_grad.tolist() ts_grad = ts_grad[em:] + ts_grad[0:em] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg # scaled anomalies is the default. Standard anomalies above # are used later for where ts_pre_avg == 0 ts_pre = ts_pre.groupby('time.month') / ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope there's one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi] found_it = True if not found_it: msg = '({}) there is no climate data'.format(gdir.rgi_id) raise MassBalanceCalibrationError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': ts_tmp_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time': time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time': time}) ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'], coords={'time': time}) # scaled anomalies ts_pre = ts_pre.groupby('time.month') * loc_pre # standard anomalies ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre # Correct infinite values with standard anomalies ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values, ts_pre_ano.values) # The last step might create negative values (unlikely). Clip them ts_pre.values = utils.clip_min(ts_pre.values, 0) # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, loc_hgt, loc_lon, loc_lat, filesuffix=output_filesuffix, gradient=ts_grad, source=nc_ts_tmp._nc.title[:10]) ncclim._nc.close() nc_ts_tmp._nc.close() nc_ts_pre._nc.close()
def init_glacier_directories(rgidf=None, *, reset=False, force=False, from_prepro_level=None, prepro_border=None, prepro_rgi_version=None, prepro_base_url=None, from_tar=False, delete_tar=False, use_demo_glaciers=None): """Initializes the list of Glacier Directories for this run. This is the very first task to do (always). If the directories are already available in the working directory, use them. If not, create new ones. Parameters ---------- rgidf : GeoDataFrame or list of ids, optional for pre-computed runs the RGI glacier outlines. If unavailable, OGGM will parse the information from the glacier directories found in the working directory. It is required for new runs. reset : bool delete the existing glacier directories if found. force : bool setting `reset=True` will trigger a yes/no question to the user. Set `force=True` to avoid this. from_prepro_level : int get the gdir data from the official pre-processed pool. See the documentation for more information prepro_border : int for `from_prepro_level` only: if you want to override the default behavior which is to use `cfg.PARAMS['border']` prepro_rgi_version : str for `from_prepro_level` only: if you want to override the default behavior which is to use `cfg.PARAMS['rgi_version']` prepro_base_url : str for `from_prepro_level` only: if you want to override the default URL from which to download the gdirs. Default currently is https://cluster.klima.uni-bremen.de/~fmaussion/gdirs/oggm_v1.1/ use_demo_glaciers : bool whether to check the demo glaciers for download (faster than the standard prepro downloads). The default is to decide whether or not to check based on simple criteria such as glacier list size. from_tar : bool, default=False extract the gdir data from a tar file. If set to `True`, will check for a tar file at the expected location in `base_dir`. delete_tar : bool, default=False delete the original tar file after extraction. Returns ------- gdirs : list of :py:class:`oggm.GlacierDirectory` objects the initialised glacier directories Notes ----- This task is very similar to init_glacier_regions, with one main difference: it does not process the DEMs for this glacier. Eventually, init_glacier_regions will be deprecated and removed from the codebase. """ _check_duplicates(rgidf) if reset and not force: reset = utils.query_yes_no('Delete all glacier directories?') if from_prepro_level: url = utils.get_prepro_base_url(base_url=prepro_base_url, border=prepro_border, prepro_level=from_prepro_level, rgi_version=prepro_rgi_version) if cfg.PARAMS['has_internet'] and not utils.url_exists(url): raise InvalidParamsError("base url seems unreachable with these " "parameters: {}".format(url)) # if reset delete also the log directory if reset: fpath = os.path.join(cfg.PATHS['working_dir'], 'log') if os.path.exists(fpath): rmtree(fpath) if rgidf is None: # Infer the glacier directories from folders available in working dir if reset: raise ValueError('Cannot use reset without setting rgidf') log.workflow('init_glacier_directories by parsing all available ' 'folders (this takes time: if possible, provide rgidf ' 'instead).') # The dirs should be there already gl_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier') gdirs = [] for root, _, files in os.walk(gl_dir): if files and ('outlines.shp' in files or 'outlines.tar.gz' in files): gdirs.append(oggm.GlacierDirectory(os.path.basename(root))) else: # Create glacier directories from input # Check if dataframe or list of str try: entities = [] for _, entity in rgidf.iterrows(): entities.append(entity) except AttributeError: entities = utils.tolist(rgidf) # Check demo if use_demo_glaciers is None: use_demo_glaciers = len(entities) < 100 if from_prepro_level is not None: log.workflow('init_glacier_directories from prepro level {} on ' '{} glaciers.'.format(from_prepro_level, len(entities))) # Read the hash dictionary before we use multiproc if cfg.PARAMS['dl_verify']: utils.get_dl_verify_data('cluster.klima.uni-bremen.de') gdirs = execute_entity_task(gdir_from_prepro, entities, from_prepro_level=from_prepro_level, prepro_border=prepro_border, prepro_rgi_version=prepro_rgi_version, check_demo_glacier=use_demo_glaciers, base_url=prepro_base_url) else: # We can set the intersects file automatically here if (cfg.PARAMS['use_intersects'] and len(cfg.PARAMS['intersects_gdf']) == 0): try: rgi_ids = np.unique(np.sort([entity.RGIId for entity in entities])) rgi_version = rgi_ids[0].split('-')[0][-2:] fp = utils.get_rgi_intersects_entities(rgi_ids, version=rgi_version) cfg.set_intersects_db(fp) except AttributeError: # List of str pass gdirs = execute_entity_task(utils.GlacierDirectory, entities, reset=reset, from_tar=from_tar, delete_tar=delete_tar) return gdirs
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, test_ids=None, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, disable_mp=False, params_file=None, elev_bands=False, match_regional_geodetic_mb=False, match_geodetic_mb_per_glacier=False, evolution_model='fl_sia', centerlines_only=False, override_params=None, add_consensus=False, start_level=None, start_base_url=None, max_level=5, ref_tstars_base_url='', logging_level='WORKFLOW', disable_dl_verify=False, dynamic_spinup=False, continue_on_error=True): """Generate the preprocessed OGGM glacier directories for this OGGM version Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory ref_tstars_base_url : str url where to find the pre-calibrated reference tstar list. Required as of v1.4. params_file : str path to the OGGM parameter file (to override defaults) is_test : bool to test on a couple of glaciers only! test_ids : list if is_test: list of ids to process demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing elev_bands : bool compute all flowlines based on the Huss&Hock 2015 method instead of the OGGM default, which is a mix of elev_bands and centerlines. centerlines_only : bool compute all flowlines based on the OGGM centerline(s) method instead of the OGGM default, which is a mix of elev_bands and centerlines. match_regional_geodetic_mb : str match the regional mass-balance estimates at the regional level ('hugonnet': Hugonnet et al., 2020 or 'zemp': Zemp et al., 2019). match_geodetic_mb_per_glacier : str match the mass-balance estimates at the glacier level (currently only 'hugonnet': Hugonnet et al., 2020). evolution_model : str which geometry evolution model to use: `fl_sia` (default), or `massredis` (mass redistribution curve). add_consensus : bool adds (reprojects) the consensus estimates thickness to the glacier directories. With elev_bands=True, the data will also be binned. start_level : int the pre-processed level to start from (default is to start from scratch). If set, you'll need to indicate start_base_url as well. start_base_url : str the pre-processed base-url to fetch the data from. max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) override_params : dict a dict of parameters to override. disable_dl_verify : bool disable the hash verification of OGGM downloads dynamic_spinup: str include a dynamic spinup matching 'area' OR 'volume' at the RGI-date """ # Input check if max_level not in [1, 2, 3, 4, 5]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4, 5]') if start_level is not None: if start_level not in [0, 1, 2]: raise InvalidParamsError('start_level should be one of [0, 1, 2]') if start_level > 0 and start_base_url is None: raise InvalidParamsError('With start_level, please also indicate ' 'start_base_url') else: start_level = 0 if match_regional_geodetic_mb and match_geodetic_mb_per_glacier: raise InvalidParamsError( 'match_regional_geodetic_mb incompatible with ' 'match_geodetic_mb_per_glacier!') if match_geodetic_mb_per_glacier and match_geodetic_mb_per_glacier != 'hugonnet': raise InvalidParamsError('Currently only `hugonnet` is available for ' 'match_geodetic_mb_per_glacier.') if evolution_model not in ['fl_sia', 'massredis']: raise InvalidParamsError('evolution_model should be one of ' "['fl_sia', 'massredis'].") if dynamic_spinup and dynamic_spinup not in ['area', 'volume']: raise InvalidParamsError(f"Dynamic spinup option '{dynamic_spinup}' " "not supported") if dynamic_spinup and evolution_model == 'massredis': raise InvalidParamsError("Dynamic spinup is not working/tested" "with massredis!") # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Local paths if override_params is None: override_params = {} utils.mkdir(working_dir) override_params['working_dir'] = working_dir # Initialize OGGM and set up the run parameters cfg.initialize(file=params_file, params=override_params, logging_level=logging_level, future=True) if match_geodetic_mb_per_glacier and (cfg.PARAMS['hydro_month_nh'] != 1 or cfg.PARAMS['hydro_month_sh'] != 1): raise InvalidParamsError('We recommend to set hydro_month_nh and sh ' 'to 1 for the geodetic MB calibration per ' 'glacier.') # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = continue_on_error # Check for the integrity of the files OGGM downloads at run time # For large files (e.g. using a 1 tif DEM like ALASKA) calculating the hash # takes a long time, so deactivating this can make sense cfg.PARAMS['dl_verify'] = not disable_dl_verify # Other things that make sense cfg.PARAMS['store_model_geometry'] = True # Log the parameters msg = '# OGGM Run parameters:' for k, v in cfg.PARAMS.items(): if type(v) in [pd.DataFrame, dict]: continue msg += '\n {}: {}'.format(k, v) log.workflow(msg) if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] output_base_dir = os.path.join(output_folder, 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) # Add a package version file utils.mkdir(output_base_dir) opath = os.path.join(output_base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) # Some RGI input quality checks - this is based on visual checks # of large glaciers in the RGI ids_to_ice_cap = [ 'RGI60-05.10315', # huge Greenland ice cap 'RGI60-03.01466', # strange thing next to Devon 'RGI60-09.00918', # Academy of sciences Ice cap 'RGI60-09.00969', 'RGI60-09.00958', 'RGI60-09.00957', ] rgidf.loc[rgidf.RGIId.isin(ids_to_ice_cap), 'Form'] = '1' # In AA almost all large ice bodies are actually ice caps if rgi_reg == '19': rgidf.loc[rgidf.Area > 100, 'Form'] = '1' # For greenland we omit connectivity level 2 if rgi_reg == '05': rgidf = rgidf.loc[rgidf['Connect'] != 2] else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: if test_ids is not None: rgidf = rgidf.loc[rgidf.RGIId.isin(test_ids)] else: rgidf = rgidf.sample(4) if max_level > 2: # Also use ref tstars utils.apply_test_ref_tstars() if max_level > 2 and ref_tstars_base_url: workflow.download_ref_tstars(base_url=ref_tstars_base_url) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # L0 - go if start_level == 0: gdirs = workflow.init_glacier_directories(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L0', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L0 OK - compress all in output directory log.workflow('L0 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L0') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 0: _time_log() return else: gdirs = workflow.init_glacier_directories( rgidf, reset=True, force=True, from_prepro_level=start_level, prepro_border=border, prepro_rgi_version=rgi_version, prepro_base_url=start_base_url) # L1 - Add dem files if start_level == 0: if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job and leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_directories(rgidf, reset=rs, force=rs) workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=s) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # make a GeoTiff mask of the glacier, choose any source workflow.execute_entity_task(gis.rasterio_glacier_mask, gdirs, source='ALL') # Compress all in output directory level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log() return # Force a given source source = dem_source.upper() if dem_source else None # L1 - go workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=source) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory log.workflow('L1 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 1: _time_log() return # L2 - Tasks if start_level <= 1: # Check which glaciers will be processed as what if elev_bands: gdirs_band = gdirs gdirs_cent = [] elif centerlines_only: gdirs_band = [] gdirs_cent = gdirs else: # Default is to centerlines_only, but it used to be a mix # (e.g. bands for ice caps, etc) # I still keep this logic here in case we want to mix again gdirs_band = [] gdirs_cent = gdirs log.workflow('Start flowline processing with: ' 'N centerline type: {}, ' 'N elev bands type: {}.' ''.format(len(gdirs_cent), len(gdirs_band))) # HH2015 method workflow.execute_entity_task(tasks.simple_glacier_masks, gdirs_band) # Centerlines OGGM workflow.execute_entity_task(tasks.glacier_masks, gdirs_cent) if add_consensus: from oggm.shop.bedtopo import add_consensus_thickness workflow.execute_entity_task(add_consensus_thickness, gdirs_band) workflow.execute_entity_task(add_consensus_thickness, gdirs_cent) # Elev bands with var data vn = 'consensus_ice_thickness' workflow.execute_entity_task(tasks.elevation_band_flowline, gdirs_band, bin_variables=vn) workflow.execute_entity_task( tasks.fixed_dx_elevation_band_flowline, gdirs_band, bin_variables=vn) else: # HH2015 method without it task_list = [ tasks.elevation_band_flowline, tasks.fixed_dx_elevation_band_flowline, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # Centerlines OGGM task_list = [ tasks.compute_centerlines, tasks.initialize_flowlines, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, ] for task in task_list: workflow.execute_entity_task(task, gdirs_cent) # Same for all glaciers if border >= 20: task_list = [ tasks.compute_downstream_line, tasks.compute_downstream_bedshape, ] for task in task_list: workflow.execute_entity_task(task, gdirs) else: log.workflow('L2: for map border values < 20, wont compute ' 'downstream lines.') # Glacier stats sum_dir = os.path.join(output_base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # And for level 2: shapes if len(gdirs_cent) > 0: opath = os.path.join(sum_dir, 'centerlines_{}.shp'.format(rgi_reg)) utils.write_centerlines_to_shape(gdirs_cent, to_tar=True, path=opath) # L2 OK - compress all in output directory log.workflow('L2 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 2: _time_log() return # L3 - Tasks sum_dir = os.path.join(output_base_dir, 'L3', 'summary') utils.mkdir(sum_dir) # Climate workflow.execute_entity_task(tasks.process_climate_data, gdirs) if cfg.PARAMS['climate_qc_months'] > 0: workflow.execute_entity_task(tasks.historical_climate_qc, gdirs) if match_geodetic_mb_per_glacier: utils.get_geodetic_mb_dataframe() # Small optim to avoid concurrency workflow.execute_entity_task( tasks.mu_star_calibration_from_geodetic_mb, gdirs) workflow.execute_entity_task(tasks.apparent_mb_from_any_mb, gdirs) else: workflow.execute_entity_task(tasks.local_t_star, gdirs) workflow.execute_entity_task(tasks.mu_star_calibration, gdirs) # Inversion: we match the consensus filter = border >= 20 workflow.calibrate_inversion_from_consensus(gdirs, apply_fs_on_mismatch=True, error_on_mismatch=False, filter_inversion_output=filter) # Do we want to match geodetic estimates? # This affects only the bias so we can actually do this *after* # the inversion, but we really want to take calving into account here if match_regional_geodetic_mb: opath = os.path.join( sum_dir, 'fixed_geometry_mass_balance_' 'before_match_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) workflow.match_regional_geodetic_mb(gdirs, rgi_reg=rgi_reg, dataset=match_regional_geodetic_mb) # We get ready for modelling if border >= 20: workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) else: log.workflow( 'L3: for map border values < 20, wont initialize glaciers ' 'for the run.') # Glacier stats opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) # L3 OK - compress all in output directory log.workflow('L3 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 3: _time_log() return if border < 20: log.workflow('L3: for map border values < 20, wont compute L4 and L5.') _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir_L3 = sum_dir sum_dir = os.path.join(output_base_dir, 'L4', 'summary') utils.mkdir(sum_dir) # Copy L3 files for consistency for bn in [ 'glacier_statistics', 'climate_statistics', 'fixed_geometry_mass_balance' ]: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Copy mini data to new dir mini_base_dir = os.path.join(working_dir, 'mini_perglacier', 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=mini_base_dir) # L4 OK - compress all in output directory log.workflow('L4 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L4') workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 4: _time_log() return # L5 - spinup run in mini gdirs gdirs = mini_gdirs # Get end date. The first gdir might have blown up, try some others i = 0 while True: if i >= len(gdirs): raise RuntimeError('Found no valid glaciers!') try: y0 = gdirs[i].get_climate_info()['baseline_hydro_yr_0'] # One adds 1 because the run ends at the end of the year ye = gdirs[i].get_climate_info()['baseline_hydro_yr_1'] + 1 break except BaseException: i += 1 # Which model? if evolution_model == 'massredis': from oggm.core.flowline import MassRedistributionCurveModel evolution_model = MassRedistributionCurveModel else: from oggm.core.flowline import FluxBasedModel evolution_model = FluxBasedModel # OK - run if dynamic_spinup: workflow.execute_entity_task( tasks.run_dynamic_spinup, gdirs, evolution_model=evolution_model, minimise_for=dynamic_spinup, precision_percent=1, output_filesuffix='_dynamic_spinup', ) workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, evolution_model=evolution_model, init_model_filesuffix='_dynamic_spinup', output_filesuffix='_hist_spin') workflow.execute_entity_task(tasks.merge_consecutive_run_outputs, gdirs, input_filesuffix_1='_dynamic_spinup', input_filesuffix_2='_hist_spin', output_filesuffix='_historical_spinup', delete_input=True) workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, evolution_model=evolution_model, output_filesuffix='_historical') # Now compile the output sum_dir = os.path.join(output_base_dir, 'L5', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, f'historical_run_output_{rgi_reg}.nc') utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical') if dynamic_spinup: opath = os.path.join(sum_dir, f'historical_spinup_run_output_{rgi_reg}.nc') utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical_spinup') # Glacier statistics we recompute here for error analysis opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Other stats for consistency for bn in ['climate_statistics', 'fixed_geometry_mass_balance']: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Add the extended files pf = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) mf = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) # This is crucial - extending calving only possible with L3 data!!! sf = os.path.join(sum_dir_L3, 'glacier_statistics_{}.csv'.format(rgi_reg)) opath = os.path.join( sum_dir, 'historical_run_output_extended_{}.nc'.format(rgi_reg)) utils.extend_past_climate_run(past_run_file=pf, fixed_geometry_mb_file=mf, glacier_statistics_file=sf, path=opath) # L5 OK - compress all in output directory log.workflow('L5 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L5') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log()
def _reproject_and_write_shapefile(self, entity, filesuffix=''): # Make a local glacier map params = dict(name='tmerc', lat_0=0., lon_0=self.cenlon, k=0.9996, x_0=0, y_0=0, datum='WGS84') proj4_str = "+proj={name} +lat_0={lat_0} +lon_0={lon_0} +k={k} " \ "+x_0={x_0} +y_0={y_0} +datum={datum}".format(**params) # Reproject proj_in = pyproj.Proj("epsg:4326", preserve_units=True) proj_out = pyproj.Proj(proj4_str, preserve_units=True) # transform geometry to map project = partial(transform_proj, proj_in, proj_out) geometry = shp_trafo(project, entity['geometry']) geometry = multipolygon_to_polygon(geometry, gdir=self) # Save transformed geometry to disk entity = entity.copy() entity['geometry'] = geometry # Do we want to use the RGI area or ours? if not cfg.PARAMS['use_rgi_area']: # Update Area area = geometry.area * 1e-6 entity['Area'] = area # Avoid fiona bug: https://github.com/Toblerity/Fiona/issues/365 for k, s in entity.iteritems(): if type(s) in [np.int32, np.int64]: entity[k] = int(s) towrite = gpd.GeoDataFrame(entity).T towrite.crs = proj4_str # Write shapefile self.write_shapefile(towrite, 'outlines', filesuffix=filesuffix) # Also transform the intersects if necessary gdf = cfg.PARAMS['intersects_gdf'] if len(gdf) > 0: gdf = gdf.loc[((gdf.RGIId_1 == self.rgi_id) | (gdf.RGIId_2 == self.rgi_id))] if len(gdf) > 0: gdf = salem.transform_geopandas(gdf, to_crs=proj_out) if hasattr(gdf.crs, 'srs'): # salem uses pyproj gdf.crs = gdf.crs.srs self.write_shapefile(gdf, 'intersects') else: # Sanity check if cfg.PARAMS['use_intersects']: raise InvalidParamsError( 'You seem to have forgotten to set the ' 'intersects file for this run. OGGM ' 'works better with such a file. If you ' 'know what your are doing, set ' "cfg.PARAMS['use_intersects'] = False to " "suppress this error.")
def process_era5_daily_data(gdir, y0=None, y1=None, output_filesuffix='_daily', cluster=False): """Processes and writes the era5 daily baseline climate data for a glacier. into climate_historical_daily.nc Extracts the nearest timeseries and writes everything to a NetCDF file. This uses only the ERA5 daily temperatures. The precipitation, lapse rate and standard deviations are used from ERA5dr. TODO: see _verified_download_helper no known hash for era5_daily_t2m_1979-2018_flat.nc and era5_glacier_invariant_flat ---------- y0 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) y1 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str this add a suffix to the output file (useful to avoid overwriting previous experiments) cluster : bool default is False, if this is run on the cluster, set it to True, because we do not need to download the files """ # era5daily only for temperature dataset = 'ERA5_daily' # for the other variables use the data of ERA5dr dataset_othervars = 'ERA5dr' # get the central longitude/latidudes of the glacier lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon lat = gdir.cenlat cluster_path = '/home/www/oggm/climate/' if cluster: path = cluster_path + BASENAMES[dataset]['tmp'] else: path = get_ecmwf_file(dataset, 'tmp') # Use xarray to read the data # would go faster with netCDF -.- with xr.open_dataset(path) as ds: assert ds.longitude.min() >= 0 # set temporal subset for the ts data (hydro years) if gdir.hemisphere == 'nh': sm = cfg.PARAMS['hydro_month_nh'] elif gdir.hemisphere == 'sh': sm = cfg.PARAMS['hydro_month_sh'] em = sm - 1 if (sm > 1) else 12 yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 if y1 > 2018 or y0 < 1979: text = 'The climate files only go from 1979--2018,\ choose another y0 and y1' raise InvalidParamsError(text) # if default settings: this is the last day in March or September time_f = '{}-{:02d}'.format(y1, em) end_day = int(ds.sel(time=time_f).time.dt.daysinmonth[-1].values) # this was tested also for hydro_month = 1 ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '{}-{:02d}-{}'.format(y1, em, end_day))) try: # computing all the distances and choose the nearest gridpoint c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=c.argmin()) # I turned this around except ValueError: ds = ds.sel(longitude=lon, latitude=lat, method='nearest') # normally if I do the flattening, this here should not occur # temperature should be in degree Celsius for the glacier climate files temp = ds['t2m'].data - 273.15 time = ds.time.data ref_lon = float(ds['longitude']) ref_lat = float(ds['latitude']) ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon # pre should be done as in ERA5dr datasets with xr.open_dataset(get_ecmwf_file(dataset_othervars, 'pre')) as ds: assert ds.longitude.min() >= 0 yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 # Attention here we take the same y0 and y1 as given from the # daily tmp dataset (goes till end of 2018) ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '{}-{:02d}-01'.format(y1, em))) try: # prcp is not flattened, so this here should work normally ds = ds.sel(longitude=lon, latitude=lat, method='nearest') except ValueError: # if Flattened ERA5_precipitation? c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=c.argmin()) # the prcp dataset needs to be restructured to have values for each day prcp = ds['tp'].data * 1000 # just assume that precipitation is every day the same: prcp = np.repeat(prcp, ds['time.daysinmonth']) # Attention the unit is now prcp per day # (not per month as in OGGM default: # prcp = ds['tp'].data * 1000 * ds['time.daysinmonth'] if cluster: path_inv = cluster_path + BASENAMES[dataset]['inv'] else: path_inv = get_ecmwf_file(dataset, 'inv') with xr.open_dataset(path_inv) as ds: assert ds.longitude.min() >= 0 ds = ds.isel(time=0) try: # Flattened ERA5_invariant (only possibility at the moment) c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=c.argmin()) except ValueError: # this should not occur ds = ds.sel(longitude=lon, latitude=lat, method='nearest') G = cfg.G # 9.80665 hgt = ds['z'].data / G gradient = None temp_std = None path_lapserates = get_ecmwf_file(dataset_othervars, 'lapserates') with xr.open_dataset(path_lapserates) as ds: assert ds.longitude.min() >= 0 yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 # Attention here we take the same y0 and y1 as given from the # daily tmp dataset (goes till end of 2018) ds = ds.sel(time=slice('{}-{:02d}-01'.format(y0, sm), '{}-{:02d}-01'.format(y1, em))) # no flattening done for the ERA5dr gradient dataset ds = ds.sel(longitude=lon, latitude=lat, method='nearest') # get the monthly gradient values gradient = ds['lapserate'].data # gradient needs to be restructured to have values for each day gradient = np.repeat(gradient, ds['time.daysinmonth']) # assume same gradient for each day # OK, ready to write write_climate_file(gdir, time, prcp, temp, hgt, ref_lon, ref_lat, filesuffix=output_filesuffix, gradient=gradient, temp_std=temp_std, source=dataset, file_name='climate_historical')