def test_download_cru(self): tmp = cfg.PATHS['cru_dir'] cfg.PATHS['cru_dir'] = os.path.join(TEST_DIR, 'cru_extract') of = utils.get_cru_file('tmp') self.assertTrue(os.path.exists(of)) cfg.PATHS['cru_dir'] = tmp
def test_download_cru(self): cfg.initialize() tmp = cfg.PATHS['cru_dir'] cfg.PATHS['cru_dir'] = TEST_DIR of = utils.get_cru_file('tmp') self.assertTrue(os.path.exists(of)) cfg.PATHS['cru_dir'] = tmp
# Set to True for operational runs cfg.CONTINUE_ON_ERROR = False cfg.PARAMS['auto_skip_task'] = False # Test cfg.PARAMS['mixed_min_shape'] = 0.003 cfg.PARAMS['default_parabolic_bedshape'] = 0.003 cfg.PARAMS['trapezoid_lambdas'] = 0. # Don't use divides for now cfg.set_divides_db() cfg.set_intersects_db() # Pre-download other files which will be needed later _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') # Copy the precalibrated tstar file # --------------------------------- # Note that to be exact, this procedure can only be applied if the run # parameters don't change between the calibration and the run. # After testing, it appears that changing the 'border' parameter won't affect # the results much (expectedly), so that it's ok to change it. All the rest # (e.g. smoothing, dx, prcp factor...) should imply a re-calibration mbf = '' mbf = utils.file_downloader(mbf) shutil.copyfile(mbf, os.path.join(WORKING_DIR, 'ref_tstars.csv'))
cfg.PARAMS['min_mu_star'] = 0.0 cfg.PARAMS['inversion_fs'] = 5.7e-20 cfg.PARAMS['use_tar_shapefiles'] = False cfg.PARAMS['use_intersects'] = True cfg.PARAMS['use_compression'] = False cfg.PARAMS['compress_climate_netcdf'] = False # We use intersects path = utils.get_rgi_intersects_region_file(rgi_region, version=rgi_version) cfg.set_intersects_db(path) # RGI file rgidf = gpd.read_file(RGI_FILE) # Pre-download other files which will be needed later _ = utils.get_cru_file(var='tmp') p = utils.get_cru_file(var='pre') print('CRU file: ' + p) # Run only for Lake Terminating and Marine Terminating glac_type = [0] keep_glactype = [(i not in glac_type) for i in rgidf.TermType] rgidf = rgidf.iloc[keep_glactype] # Run only glaciers that have a week connection or are # not connected to the ice-sheet connection = [2] keep_connection = [(i not in connection) for i in rgidf.Connect] rgidf = rgidf.iloc[keep_connection] # Run glaciers without errors
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None, disable_mp=False, timeout=0, max_level=4, logging_level='WORKFLOW'): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Input check if max_level not in [1, 2, 3, 4]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4]') # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level=logging_level) # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # Timeout cfg.PARAMS['task_timeout'] = timeout # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job an leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 rgidf['DEM_SOURCE'] = s log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_regions(rgidf, reset=rs, force=rs) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # Compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) _time_log() return if dem_source: # Force a given source rgidf['DEM_SOURCE'] = dem_source.upper() # L1 - go gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 1: _time_log() return # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 2: _time_log() return # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 3: _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) _time_log()
def process_cru_data(gdir): """Processes and writes the climate data for this glacier. Interpolates the CRU TS data to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. """ # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 yrs = nc_ts_pre.time.year y0, y1 = yrs[0], yrs[-1] nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm), t1='{}-{:02d}-01'.format(y1, em)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # gradient default params use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('(%s) I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. ts_grad = np.zeros(12) + def_grad if use_grad and len(hgt_f) >= 5: for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else def_grad # ... but dont exaggerate too much ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1]) # convert to timeserie and hydroyears ts_grad = ts_grad.tolist() ts_grad = ts_grad[em:] + ts_grad[0:em] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg # scaled anomalies is the default. Standard anomalies above # are used later for where ts_pre_avg == 0 ts_pre = ts_pre.groupby('time.month') / ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope there's one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi] found_it = True if not found_it: msg = '({}) there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': ts_tmp_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time': time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time': time}) ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'], coords={'time': time}) # scaled anomalies ts_pre = ts_pre.groupby('time.month') * loc_pre # standard anomalies ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre # Correct infinite values with standard anomalies ts_pre.values = np.where(np.isfinite(ts_pre.values), ts_pre.values, ts_pre_ano.values) # The last step might create negative values (unlikely). Clip them ts_pre.values = ts_pre.values.clip(0) # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, ts_grad, loc_hgt, loc_lon, loc_lat) ncclim._nc.close() nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # metadata out = { 'climate_source': 'CRU data', 'hydro_yr_0': y0 + 1, 'hydro_yr_1': y1 } gdir.write_pickle(out, 'climate_info')
def _distribute_cru_style_nonparallel(gdirs): """More general solution for OGGM globally. It uses the CRU CL2 ten-minutes climatology as baseline (provided with OGGM) """ # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) nc_ts_tmp.set_period(t0='1901-10-01', t1='2014-09-01') nc_ts_pre.set_period(t0='1901-10-01', t1='2014-09-01') time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # gradient default params use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] prcp_scaling_factor = cfg.PARAMS['prcp_scaling_factor'] for gdir in gdirs:'%s: %s', gdir.rgi_id, 'distribute_cru_style') lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get monthly gradient ... loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() ts_grad = np.zeros(12) + def_grad if use_grad and len(hgt_f) >= 5: for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else def_grad # ... but dont exaggerate too much ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1]) # convert to timeserie and hydroyears ts_grad = ts_grad.tolist() ts_grad = ts_grad[9:] + ts_grad[0:9] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre = ts_pre.groupby('time.month') - ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope theres one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] found_it = True if not found_it: msg = '{}: OMG there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time': time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month': ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time': time}) ts_pre = ts_pre.groupby('time.month') + loc_pre * prcp_scaling_factor # done loc_hgt = loc_hgt[1, 1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, ts_grad, loc_hgt)
def setup_cache(self): setattr(full_workflow.setup_cache, "timeout", 360) utils.mkdir(self.testdir, reset=True) self.cfg_init() # Pre-download other files which will be needed later utils.get_cru_cl_file() utils.get_cru_file(var='tmp') utils.get_cru_file(var='pre') # Get the RGI glaciers for the run. rgi_list = ['RGI60-01.10299', 'RGI60-11.00897', 'RGI60-18.02342'] rgidf = utils.get_rgi_glacier_entities(rgi_list) # We use intersects db = utils.get_rgi_intersects_region_file(version='61', rgi_ids=rgi_list) cfg.set_intersects_db(db) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf) # Preprocessing tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, ] for task in task_list: execute_entity_task(task, gdirs) # Climate tasks -- only data IO and tstar interpolation! execute_entity_task(tasks.process_cru_data, gdirs) execute_entity_task(tasks.local_mustar, gdirs) execute_entity_task(tasks.apparent_mb, gdirs) # Inversion tasks execute_entity_task(tasks.prepare_for_inversion, gdirs) # We use the default parameters for this run execute_entity_task(tasks.mass_conservation_inversion, gdirs) execute_entity_task(tasks.filter_inversion_output, gdirs) # Final preparation for the run execute_entity_task(tasks.init_present_time_glacier, gdirs) # Random climate representative for the tstar climate, without bias # In an ideal world this would imply that the glaciers remain stable, # but it doesn't have to be so execute_entity_task(tasks.run_constant_climate, gdirs, bias=0, nyears=100, output_filesuffix='_tstar') execute_entity_task(tasks.run_constant_climate, gdirs, y0=1990, nyears=100, output_filesuffix='_pd') # Compile output utils.glacier_characteristics(gdirs) utils.compile_run_output(gdirs, filesuffix='_tstar') utils.compile_run_output(gdirs, filesuffix='_pd') utils.compile_climate_input(gdirs) return gdirs
def run_benchmark(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics odf = pd.DataFrame() if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] base_dir = os.path.join(output_folder) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) # Read RGI start = time.time() if test_rgidf is None: # Get the RGI file rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(2) _add_time_to_df(odf, 'Read RGI', time.time()-start) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Initialize working directories start = time.time() gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) _add_time_to_df(odf, 'init_glacier_regions', time.time()-start) # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir # Tasks task_list = [ tasks.process_cru_data, tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier, ] for task in task_list: start = time.time() workflow.execute_entity_task(task, gdirs) _add_time_to_df(odf, task.__name__, time.time()-start) # Runs start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, bias=0, seed=0, output_filesuffix='_tstar') _add_time_to_df(odf, 'run_random_climate_tstar_250', time.time()-start) start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, y0=1995, seed=0, output_filesuffix='_commit') _add_time_to_df(odf, 'run_random_climate_commit_250', time.time()-start) # Compile results start = time.time() utils.compile_glacier_statistics(gdirs) _add_time_to_df(odf, 'compile_glacier_statistics', time.time()-start) start = time.time() utils.compile_climate_statistics(gdirs, add_climate_period=[1920, 1960, 2000]) _add_time_to_df(odf, 'compile_climate_statistics', time.time()-start) start = time.time() utils.compile_run_output(gdirs, filesuffix='_tstar') _add_time_to_df(odf, 'compile_run_output_tstar', time.time()-start) start = time.time() utils.compile_run_output(gdirs, filesuffix='_commit') _add_time_to_df(odf, 'compile_run_output_commit', time.time()-start) # Log opath = os.path.join(base_dir, 'benchmarks_b{:03d}.csv'.format(border)) = 'Task' odf.to_csv(opath) log.workflow('OGGM benchmarks is done!')
ids_with_mb = pd.read_csv(flink)['RGI_ID'].values # get some tw-glaciers that we want to test inside alaska region, also that are # inside GlathiDa keep_ids = [ 'RGI50-01.10689', 'RGI50-01.20791', 'RGI50-01.00037', 'RGI50-01.10402', 'RGI50-01.22193', 'RGI50-01.22699' ] keep_indexes = [((i in keep_ids) or (i in ids_with_mb)) for i in rgidf.RGIID] rgidf = rgidf.iloc[keep_indexes]'Number of glaciers: {}'.format(len(rgidf))) # Download other files if needed _ = utils.get_cru_file(var='tmp') # Go - initialize working directories # gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) gdirs = workflow.init_glacier_regions(rgidf) # Pre-pro tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.compute_downstream_lines, tasks.catchment_area, tasks.initialize_flowlines, tasks.catchment_width_geom, tasks.catchment_width_correction ] if RUN_GIS_PREPRO: for task in task_list: execute_entity_task(task, gdirs)
def run_benchmark(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics odf = pd.DataFrame() if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] base_dir = os.path.join(output_folder) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) # Read RGI start = time.time() if test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(2) _add_time_to_df(odf, 'Read RGI', time.time() - start) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Initialize working directories start = time.time() gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) _add_time_to_df(odf, 'init_glacier_regions', time.time() - start) # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir # Tasks task_list = [ tasks.process_cru_data, tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier, ] for task in task_list: start = time.time() workflow.execute_entity_task(task, gdirs) _add_time_to_df(odf, task.__name__, time.time() - start) # Runs start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, bias=0, seed=0, output_filesuffix='_tstar') _add_time_to_df(odf, 'run_random_climate_tstar_250', time.time() - start) start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, y0=1995, seed=0, output_filesuffix='_commit') _add_time_to_df(odf, 'run_random_climate_commit_250', time.time() - start) # Compile results start = time.time() utils.compile_glacier_statistics(gdirs) _add_time_to_df(odf, 'compile_glacier_statistics', time.time() - start) start = time.time() utils.compile_climate_statistics(gdirs, add_climate_period=[1920, 1960, 2000]) _add_time_to_df(odf, 'compile_climate_statistics', time.time() - start) start = time.time() utils.compile_run_output(gdirs, filesuffix='_tstar') _add_time_to_df(odf, 'compile_run_output_tstar', time.time() - start) start = time.time() utils.compile_run_output(gdirs, filesuffix='_commit') _add_time_to_df(odf, 'compile_run_output_commit', time.time() - start) # Log opath = os.path.join(base_dir, 'benchmarks_b{:03d}.csv'.format(border)) = 'Task' odf.to_csv(opath) log.workflow('OGGM benchmarks is done!')
def _distribute_cru_style_nonparallel(gdirs): """More general solution for OGGM globally. It uses the CRU CL2 ten-minutes climatology as baseline (provided with OGGM) """ # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) nc_ts_tmp.set_period(t0='1901-10-01', t1='2014-09-01') nc_ts_pre.set_period(t0='1901-10-01', t1='2014-09-01') time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # gradient default params use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] prcp_scaling_factor = cfg.PARAMS['prcp_scaling_factor'] for gdir in gdirs:'%s: %s', gdir.rgi_id, 'distribute_cru_style') lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get monthly gradient ... loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() ts_grad = np.zeros(12) + def_grad if use_grad and len(hgt_f) >= 5: for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else def_grad # ... but dont exaggerate too much ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1]) # convert to timeserie and hydroyears ts_grad = ts_grad.tolist() ts_grad = ts_grad[9:] + ts_grad[0:9] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre = ts_pre.groupby('time.month') - ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope theres one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] found_it = True if not found_it: msg = '{}: OMG there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month':ts_pre_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time':time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month':ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time':time}) ts_pre = ts_pre.groupby('time.month') + loc_pre * prcp_scaling_factor # done loc_hgt = loc_hgt[1, 1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, ts_grad, loc_hgt)
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Time start = time.time() # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DEMO_GLACIERS.index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L4 - Tasks workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) # Log m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))
def initialization_selection(): # ------------- # Initialization # ------------- cfg.initialize() # working directories cfg.PATHS['working_dir'] = mbcfg.PATHS['working_dir'] cfg.PATHS['rgi_version'] = mbcfg.PARAMS['rgi_version'] # We are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # No need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs # maybe also here? cfg.PARAMS['continue_on_error'] = False # set negative flux filtering to false. should be standard soon cfg.PARAMS['filter_for_neg_flux'] = False # Pre-download other files which will be needed later _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') rgi_dir = utils.get_rgi_dir(version=cfg.PATHS['rgi_version']) # Get the reference glacier ids (they are different for each RGI version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(cfg.PATHS['rgi_version'])] # Make a new dataframe with those (this takes a while) rgidf = [] for reg in df['RGI_REG'].unique(): if reg == '19': continue # we have no climate data in Antarctica if mbcfg.PARAMS['region'] is not None\ and reg != mbcfg.PARAMS['region']: continue fn = '*' + reg + '_rgi{}0_*.shp'.format(cfg.PATHS['rgi_version']) fs = list(sorted(glob(os.path.join(rgi_dir, '*', fn))))[0] sh = gpd.read_file(fs) rgidf.append(sh.loc[sh.RGIId.isin(rids)]) rgidf = pd.concat(rgidf) = # for geolocalisation # reduce Europe to Histalp area (exclude Pyrenees, etc...) if mbcfg.PARAMS['histalp']: rgidf = rgidf.loc[(rgidf.CenLon >= 4) & (rgidf.CenLon < 20) & (rgidf.CenLat >= 43) & (rgidf.CenLat < 47)] # We have to check which of them actually have enough mb data. # Let OGGM do it: gdirs = workflow.init_glacier_regions(rgidf) # We need to know which period we have data for if mbcfg.PARAMS['histalp']: cfg.PATHS['climate_file'] = mbcfg.PATHS['histalpfile'] execute_entity_task(tasks.process_custom_climate_data, gdirs) else: execute_entity_task(tasks.process_cru_data, gdirs, print_log=False) gdirs = utils.get_ref_mb_glaciers(gdirs) # Keep only these rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])] # Save rgidf.to_file(os.path.join(cfg.PATHS['working_dir'], 'mb_ref_glaciers.shp')) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) return gdirs
def process_cru_data(gdir): """Processes and writes the climate data for this glacier. Interpolates the CRU TS data to the high-resolution CL2 climatologies (provided with OGGM) and writes everything to a NetCDF file. """ # read the climatology clfile = utils.get_cru_cl_file() ncclim = salem.GeoNetcdf(clfile) # and the TS data nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True) nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True) # set temporal subset for the ts data (hydro years) yrs = nc_ts_pre.time.year y0, y1 = yrs[0], yrs[-1] nc_ts_tmp.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1)) nc_ts_pre.set_period(t0='{}-10-01'.format(y0), t1='{}-09-01'.format(y1)) time = nc_ts_pre.time ny, r = divmod(len(time), 12) assert r == 0 # gradient default params use_grad = cfg.PARAMS['temp_use_local_gradient'] def_grad = cfg.PARAMS['temp_default_gradient'] g_minmax = cfg.PARAMS['temp_local_gradient_bounds'] lon = gdir.cenlon lat = gdir.cenlat # This is guaranteed to work because I prepared the file (I hope) ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # get climatology data loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') # see if the center is ok if not np.isfinite(loc_hgt[1, 1]): # take another candidate where finite isok = np.isfinite(loc_hgt) # wait: some areas are entirely NaNs, make the subset larger _margin = 1 while not np.any(isok): _margin += 1 ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin) loc_hgt = ncclim.get_vardata('elev') isok = np.isfinite(loc_hgt) if _margin > 1: log.debug('%s: I had to look up for far climate pixels: %s', gdir.rgi_id, _margin) # Take the first candidate (doesn't matter which) lon, lat = ncclim.grid.ll_coordinates lon = lon[isok][0] lat = lat[isok][0] # Resubset ncclim.set_subset() ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1) loc_hgt = ncclim.get_vardata('elev') loc_tmp = ncclim.get_vardata('temp') loc_pre = ncclim.get_vardata('prcp') loc_lon = ncclim.get_vardata('lon') loc_lat = ncclim.get_vardata('lat') assert np.isfinite(loc_hgt[1, 1]) isok = np.isfinite(loc_hgt) hgt_f = loc_hgt[isok].flatten() assert len(hgt_f) > 0. ts_grad = np.zeros(12) + def_grad if use_grad and len(hgt_f) >= 5: for i in range(12): loc_tmp_mth = loc_tmp[i, ...][isok].flatten() slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth) ts_grad[i] = slope if (p_val < 0.01) else def_grad # ... but dont exaggerate too much ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1]) # convert to timeserie and hydroyears ts_grad = ts_grad.tolist() ts_grad = ts_grad[9:] + ts_grad[0:9] ts_grad = np.asarray(ts_grad * ny) # maybe this will throw out of bounds warnings nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1) nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1) # compute monthly anomalies # of temp ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True) ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01')) ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time') ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg # of precip ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True) ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01')) ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time') ts_pre = ts_pre.groupby('time.month') - ts_pre_avg # interpolate to HR grid if np.any(~np.isfinite(ts_tmp[:, 1, 1])): # Extreme case, middle pix is not valid # take any valid pix from the 3*3 (and hope there's one) found_it = False for idi in range(2): for idj in range(2): if np.all(np.isfinite(ts_tmp[:, idj, idi])): ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi] ts_pre[:, 1, 1] = ts_pre[:, idj, idi] found_it = True if not found_it: msg = '{}: OMG there is no climate data'.format(gdir.rgi_id) raise RuntimeError(msg) elif np.any(~np.isfinite(ts_tmp)): # maybe the side is nan, but we can do nearest ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='nearest') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='nearest') else: # We can do bilinear ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid, interp='linear') ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid, interp='linear') # take the center pixel and add it to the CRU CL clim # for temp loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'], coords={'month':ts_pre_avg.month}) ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'], coords={'time':time}) ts_tmp = ts_tmp.groupby('time.month') + loc_tmp # for prcp loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'], coords={'month':ts_pre_avg.month}) ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'], coords={'time':time}) ts_pre = ts_pre.groupby('time.month') + loc_pre # done loc_hgt = loc_hgt[1, 1] loc_lon = loc_lon[1] loc_lat = loc_lat[1] assert np.isfinite(loc_hgt) assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) assert np.all(np.isfinite(ts_grad)) gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values, ts_grad, loc_hgt, loc_lon, loc_lat) ncclim._nc.close() nc_ts_tmp._nc.close() nc_ts_pre._nc.close() # metadata out = {'climate_source': 'CRU data', 'hydro_yr_0': y0+1, 'hydro_yr_1': y1} gdir.write_pickle(out, 'climate_info')
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Time start = time.time() # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L4 - Tasks workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) # Log m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))