def test_workflow(self): # This is a check that the inversion workflow works fine # Download the RGI file for the run # Make a new dataframe of those rgidf = gpd.read_file(get_demo_file('SouthGlacier.shp')) # Go - initialize working directories gdirs = workflow.init_glacier_directories(rgidf) # Preprocessing tasks task_list = [ tasks.define_glacier_region, tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, ] for task in task_list: execute_entity_task(task, gdirs) execute_entity_task(tasks.process_cru_data, gdirs, tmp_file=self.tf, pre_file=self.pf) execute_entity_task(tasks.local_t_star, gdirs) execute_entity_task(tasks.mu_star_calibration, gdirs) # Inversion tasks execute_entity_task(tasks.prepare_for_inversion, gdirs) # We use the default parameters for this run execute_entity_task(tasks.mass_conservation_inversion, gdirs) execute_entity_task(tasks.filter_inversion_output, gdirs) df = utils.compile_glacier_statistics(gdirs) df['inv_thickness_m'] = df['inv_volume_km3'] / df['rgi_area_km2'] * 1e3 assert df.inv_thickness_m[0] < 100 df = utils.compile_fixed_geometry_mass_balance(gdirs) assert len(df) > 100 if do_plot: import matplotlib.pyplot as plt from oggm.graphics import plot_inversion plot_inversion(gdirs) plt.show()
def match_regional_geodetic_mb(gdirs, rgi_reg): """Regional shift of the mass-balance residual to match observations. This is useful for operational runs, but also quite hacky. Let's hope we won't need this for too long. Parameters ---------- gdirs : the list of gdirs (ideally the entire region_ rgi_reg : str the rgi region to match """ # Get the mass-balance OGGM would give out of the box df = utils.compile_fixed_geometry_mass_balance(gdirs, path=False) df = df.dropna(axis=0, how='all').dropna(axis=1, how='all') # And also the Area and calving fluxes dfs = utils.compile_glacier_statistics(gdirs, path=False) odf = pd.DataFrame(df.loc[2006:2018].mean(), columns=['SMB']) odf['AREA'] = dfs.rgi_area_km2 * 1e6 # Just take the calving rate and change its units # Original units: km3 a-1, to change to mm a-1 (units of specific MB) rho = cfg.PARAMS['ice_density'] if 'calving_flux' in dfs: odf['CALVING'] = dfs['calving_flux'].fillna(0) * 1e9 * rho / odf['AREA'] else: odf['CALVING'] = 0 # We have to drop nans here, which occur when calving glaciers fail to run odf = odf.dropna() # Compare area with total RGI area rdf = 'rgi62_areas.csv' rdf = pd.read_csv(utils.get_demo_file(rdf), dtype={'O1Region': str}) ref_area = rdf.loc[rdf['O1Region'] == rgi_reg].iloc[0]['AreaNoC2NoNominal'] diff = (1 - odf['AREA'].sum() * 1e-6 / ref_area) * 100 msg = 'Applying geodetic MB correction on RGI reg {}. Diff area: {:.2f}%' log.workflow(msg.format(rgi_reg, diff)) # Total MB OGGM out_smb = np.average(odf['SMB'], weights=odf['AREA']) # for logging out_cal = np.average(odf['CALVING'], weights=odf['AREA']) # for logging smb_oggm = np.average(odf['SMB'] - odf['CALVING'], weights=odf['AREA']) # Total MB Reference df = 'table_hugonnet_regions_10yr_20yr_ar6period.csv' df = pd.read_csv(utils.get_demo_file(df)) df = df.loc[df.period == '2006-01-01_2019-01-01'].set_index('reg') smb_ref = df.loc[int(rgi_reg), 'dmdtda'] # Diff between the two residual = smb_ref - smb_oggm # Let's just shift log.workflow('Shifting regional MB bias by {}'.format(residual)) log.workflow('Observations give {}'.format(smb_ref)) log.workflow('OGGM SMB gives {}'.format(out_smb)) log.workflow('OGGM frontal ablation gives {}'.format(out_cal)) for gdir in gdirs: try: df = gdir.read_json('local_mustar') gdir.add_to_diagnostics('mb_bias_before_geodetic_corr', df['bias']) df['bias'] = df['bias'] - residual gdir.write_json(df, 'local_mustar') except FileNotFoundError: pass
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, test_ids=None, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, disable_mp=False, params_file=None, elev_bands=False, match_regional_geodetic_mb=False, match_geodetic_mb_per_glacier=False, evolution_model='fl_sia', centerlines_only=False, override_params=None, add_consensus=False, start_level=None, start_base_url=None, max_level=5, ref_tstars_base_url='', logging_level='WORKFLOW', disable_dl_verify=False, dynamic_spinup=False, continue_on_error=True): """Generate the preprocessed OGGM glacier directories for this OGGM version Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory ref_tstars_base_url : str url where to find the pre-calibrated reference tstar list. Required as of v1.4. params_file : str path to the OGGM parameter file (to override defaults) is_test : bool to test on a couple of glaciers only! test_ids : list if is_test: list of ids to process demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing elev_bands : bool compute all flowlines based on the Huss&Hock 2015 method instead of the OGGM default, which is a mix of elev_bands and centerlines. centerlines_only : bool compute all flowlines based on the OGGM centerline(s) method instead of the OGGM default, which is a mix of elev_bands and centerlines. match_regional_geodetic_mb : str match the regional mass-balance estimates at the regional level ('hugonnet': Hugonnet et al., 2020 or 'zemp': Zemp et al., 2019). match_geodetic_mb_per_glacier : str match the mass-balance estimates at the glacier level (currently only 'hugonnet': Hugonnet et al., 2020). evolution_model : str which geometry evolution model to use: `fl_sia` (default), or `massredis` (mass redistribution curve). add_consensus : bool adds (reprojects) the consensus estimates thickness to the glacier directories. With elev_bands=True, the data will also be binned. start_level : int the pre-processed level to start from (default is to start from scratch). If set, you'll need to indicate start_base_url as well. start_base_url : str the pre-processed base-url to fetch the data from. max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) override_params : dict a dict of parameters to override. disable_dl_verify : bool disable the hash verification of OGGM downloads dynamic_spinup: str include a dynamic spinup matching 'area' OR 'volume' at the RGI-date """ # Input check if max_level not in [1, 2, 3, 4, 5]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4, 5]') if start_level is not None: if start_level not in [0, 1, 2]: raise InvalidParamsError('start_level should be one of [0, 1, 2]') if start_level > 0 and start_base_url is None: raise InvalidParamsError('With start_level, please also indicate ' 'start_base_url') else: start_level = 0 if match_regional_geodetic_mb and match_geodetic_mb_per_glacier: raise InvalidParamsError( 'match_regional_geodetic_mb incompatible with ' 'match_geodetic_mb_per_glacier!') if match_geodetic_mb_per_glacier and match_geodetic_mb_per_glacier != 'hugonnet': raise InvalidParamsError('Currently only `hugonnet` is available for ' 'match_geodetic_mb_per_glacier.') if evolution_model not in ['fl_sia', 'massredis']: raise InvalidParamsError('evolution_model should be one of ' "['fl_sia', 'massredis'].") if dynamic_spinup and dynamic_spinup not in ['area', 'volume']: raise InvalidParamsError(f"Dynamic spinup option '{dynamic_spinup}' " "not supported") if dynamic_spinup and evolution_model == 'massredis': raise InvalidParamsError("Dynamic spinup is not working/tested" "with massredis!") # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Local paths if override_params is None: override_params = {} utils.mkdir(working_dir) override_params['working_dir'] = working_dir # Initialize OGGM and set up the run parameters cfg.initialize(file=params_file, params=override_params, logging_level=logging_level, future=True) if match_geodetic_mb_per_glacier and (cfg.PARAMS['hydro_month_nh'] != 1 or cfg.PARAMS['hydro_month_sh'] != 1): raise InvalidParamsError('We recommend to set hydro_month_nh and sh ' 'to 1 for the geodetic MB calibration per ' 'glacier.') # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = continue_on_error # Check for the integrity of the files OGGM downloads at run time # For large files (e.g. using a 1 tif DEM like ALASKA) calculating the hash # takes a long time, so deactivating this can make sense cfg.PARAMS['dl_verify'] = not disable_dl_verify # Other things that make sense cfg.PARAMS['store_model_geometry'] = True # Log the parameters msg = '# OGGM Run parameters:' for k, v in cfg.PARAMS.items(): if type(v) in [pd.DataFrame, dict]: continue msg += '\n {}: {}'.format(k, v) log.workflow(msg) if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] output_base_dir = os.path.join(output_folder, 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) # Add a package version file utils.mkdir(output_base_dir) opath = os.path.join(output_base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) # Some RGI input quality checks - this is based on visual checks # of large glaciers in the RGI ids_to_ice_cap = [ 'RGI60-05.10315', # huge Greenland ice cap 'RGI60-03.01466', # strange thing next to Devon 'RGI60-09.00918', # Academy of sciences Ice cap 'RGI60-09.00969', 'RGI60-09.00958', 'RGI60-09.00957', ] rgidf.loc[rgidf.RGIId.isin(ids_to_ice_cap), 'Form'] = '1' # In AA almost all large ice bodies are actually ice caps if rgi_reg == '19': rgidf.loc[rgidf.Area > 100, 'Form'] = '1' # For greenland we omit connectivity level 2 if rgi_reg == '05': rgidf = rgidf.loc[rgidf['Connect'] != 2] else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: if test_ids is not None: rgidf = rgidf.loc[rgidf.RGIId.isin(test_ids)] else: rgidf = rgidf.sample(4) if max_level > 2: # Also use ref tstars utils.apply_test_ref_tstars() if max_level > 2 and ref_tstars_base_url: workflow.download_ref_tstars(base_url=ref_tstars_base_url) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # L0 - go if start_level == 0: gdirs = workflow.init_glacier_directories(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L0', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L0 OK - compress all in output directory log.workflow('L0 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L0') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 0: _time_log() return else: gdirs = workflow.init_glacier_directories( rgidf, reset=True, force=True, from_prepro_level=start_level, prepro_border=border, prepro_rgi_version=rgi_version, prepro_base_url=start_base_url) # L1 - Add dem files if start_level == 0: if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job and leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_directories(rgidf, reset=rs, force=rs) workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=s) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # make a GeoTiff mask of the glacier, choose any source workflow.execute_entity_task(gis.rasterio_glacier_mask, gdirs, source='ALL') # Compress all in output directory level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log() return # Force a given source source = dem_source.upper() if dem_source else None # L1 - go workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=source) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory log.workflow('L1 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 1: _time_log() return # L2 - Tasks if start_level <= 1: # Check which glaciers will be processed as what if elev_bands: gdirs_band = gdirs gdirs_cent = [] elif centerlines_only: gdirs_band = [] gdirs_cent = gdirs else: # Default is to centerlines_only, but it used to be a mix # (e.g. bands for ice caps, etc) # I still keep this logic here in case we want to mix again gdirs_band = [] gdirs_cent = gdirs log.workflow('Start flowline processing with: ' 'N centerline type: {}, ' 'N elev bands type: {}.' ''.format(len(gdirs_cent), len(gdirs_band))) # HH2015 method workflow.execute_entity_task(tasks.simple_glacier_masks, gdirs_band) # Centerlines OGGM workflow.execute_entity_task(tasks.glacier_masks, gdirs_cent) if add_consensus: from oggm.shop.bedtopo import add_consensus_thickness workflow.execute_entity_task(add_consensus_thickness, gdirs_band) workflow.execute_entity_task(add_consensus_thickness, gdirs_cent) # Elev bands with var data vn = 'consensus_ice_thickness' workflow.execute_entity_task(tasks.elevation_band_flowline, gdirs_band, bin_variables=vn) workflow.execute_entity_task( tasks.fixed_dx_elevation_band_flowline, gdirs_band, bin_variables=vn) else: # HH2015 method without it task_list = [ tasks.elevation_band_flowline, tasks.fixed_dx_elevation_band_flowline, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # Centerlines OGGM task_list = [ tasks.compute_centerlines, tasks.initialize_flowlines, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, ] for task in task_list: workflow.execute_entity_task(task, gdirs_cent) # Same for all glaciers if border >= 20: task_list = [ tasks.compute_downstream_line, tasks.compute_downstream_bedshape, ] for task in task_list: workflow.execute_entity_task(task, gdirs) else: log.workflow('L2: for map border values < 20, wont compute ' 'downstream lines.') # Glacier stats sum_dir = os.path.join(output_base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # And for level 2: shapes if len(gdirs_cent) > 0: opath = os.path.join(sum_dir, 'centerlines_{}.shp'.format(rgi_reg)) utils.write_centerlines_to_shape(gdirs_cent, to_tar=True, path=opath) # L2 OK - compress all in output directory log.workflow('L2 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 2: _time_log() return # L3 - Tasks sum_dir = os.path.join(output_base_dir, 'L3', 'summary') utils.mkdir(sum_dir) # Climate workflow.execute_entity_task(tasks.process_climate_data, gdirs) if cfg.PARAMS['climate_qc_months'] > 0: workflow.execute_entity_task(tasks.historical_climate_qc, gdirs) if match_geodetic_mb_per_glacier: utils.get_geodetic_mb_dataframe() # Small optim to avoid concurrency workflow.execute_entity_task( tasks.mu_star_calibration_from_geodetic_mb, gdirs) workflow.execute_entity_task(tasks.apparent_mb_from_any_mb, gdirs) else: workflow.execute_entity_task(tasks.local_t_star, gdirs) workflow.execute_entity_task(tasks.mu_star_calibration, gdirs) # Inversion: we match the consensus filter = border >= 20 workflow.calibrate_inversion_from_consensus(gdirs, apply_fs_on_mismatch=True, error_on_mismatch=False, filter_inversion_output=filter) # Do we want to match geodetic estimates? # This affects only the bias so we can actually do this *after* # the inversion, but we really want to take calving into account here if match_regional_geodetic_mb: opath = os.path.join( sum_dir, 'fixed_geometry_mass_balance_' 'before_match_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) workflow.match_regional_geodetic_mb(gdirs, rgi_reg=rgi_reg, dataset=match_regional_geodetic_mb) # We get ready for modelling if border >= 20: workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) else: log.workflow( 'L3: for map border values < 20, wont initialize glaciers ' 'for the run.') # Glacier stats opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) # L3 OK - compress all in output directory log.workflow('L3 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 3: _time_log() return if border < 20: log.workflow('L3: for map border values < 20, wont compute L4 and L5.') _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir_L3 = sum_dir sum_dir = os.path.join(output_base_dir, 'L4', 'summary') utils.mkdir(sum_dir) # Copy L3 files for consistency for bn in [ 'glacier_statistics', 'climate_statistics', 'fixed_geometry_mass_balance' ]: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Copy mini data to new dir mini_base_dir = os.path.join(working_dir, 'mini_perglacier', 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=mini_base_dir) # L4 OK - compress all in output directory log.workflow('L4 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L4') workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 4: _time_log() return # L5 - spinup run in mini gdirs gdirs = mini_gdirs # Get end date. The first gdir might have blown up, try some others i = 0 while True: if i >= len(gdirs): raise RuntimeError('Found no valid glaciers!') try: y0 = gdirs[i].get_climate_info()['baseline_hydro_yr_0'] # One adds 1 because the run ends at the end of the year ye = gdirs[i].get_climate_info()['baseline_hydro_yr_1'] + 1 break except BaseException: i += 1 # Which model? if evolution_model == 'massredis': from oggm.core.flowline import MassRedistributionCurveModel evolution_model = MassRedistributionCurveModel else: from oggm.core.flowline import FluxBasedModel evolution_model = FluxBasedModel # OK - run if dynamic_spinup: workflow.execute_entity_task( tasks.run_dynamic_spinup, gdirs, evolution_model=evolution_model, minimise_for=dynamic_spinup, precision_percent=1, output_filesuffix='_dynamic_spinup', ) workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, evolution_model=evolution_model, init_model_filesuffix='_dynamic_spinup', output_filesuffix='_hist_spin') workflow.execute_entity_task(tasks.merge_consecutive_run_outputs, gdirs, input_filesuffix_1='_dynamic_spinup', input_filesuffix_2='_hist_spin', output_filesuffix='_historical_spinup', delete_input=True) workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, evolution_model=evolution_model, output_filesuffix='_historical') # Now compile the output sum_dir = os.path.join(output_base_dir, 'L5', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, f'historical_run_output_{rgi_reg}.nc') utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical') if dynamic_spinup: opath = os.path.join(sum_dir, f'historical_spinup_run_output_{rgi_reg}.nc') utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical_spinup') # Glacier statistics we recompute here for error analysis opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Other stats for consistency for bn in ['climate_statistics', 'fixed_geometry_mass_balance']: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Add the extended files pf = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) mf = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) # This is crucial - extending calving only possible with L3 data!!! sf = os.path.join(sum_dir_L3, 'glacier_statistics_{}.csv'.format(rgi_reg)) opath = os.path.join( sum_dir, 'historical_run_output_extended_{}.nc'.format(rgi_reg)) utils.extend_past_climate_run(past_run_file=pf, fixed_geometry_mb_file=mf, glacier_statistics_file=sf, path=opath) # L5 OK - compress all in output directory log.workflow('L5 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L5') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log()
def match_geodetic_mb_for_selection(gdirs, period='2000-01-01_2020-01-01', file_path=None, fail_safe=False): """Shift the mass-balance residual to match geodetic mb observations. It is similar to match_regional_geodetic_mb but uses the raw, glacier per glacier tabular data. This method finds the "best mass-balance residual" to match all glaciers in gdirs with available OGGM mass balance and available geodetic mass-balance measurements from Hugonnet 2021 or any other file with the same format. The default is to use hugonnet_2021_ds_rgi60_pergla_rates_10_20_worldwide_filled.hdf in https://cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/ Parameters ---------- gdirs : the list of gdirs period : str One of '2000-01-01_2020-01-01', '2000-01-01_2010-01-01', '2010-01-01_2020-01-01'. file_path: str local file path to tabular file containing geodetic measurements, file must contain the columns: - 'rgiid': is the RGIId as in the RGI 6.0 - 'period': time intervall of the measurements in the format shown above - 'dmdtda': the specific-mass change rate in meters water-equivalent per year, - 'area': is the glacier area (same as in RGI 6.0) in meters square fail_safe : bool some glaciers in the obs data have been corrected with the regional average. We don't use these values, unless there is no other choice and in which case you can set fail_safe to True """ # Get the mass-balance OGGM would give out of the box df = utils.compile_fixed_geometry_mass_balance(gdirs, path=False) df = df.dropna(axis=0, how='all').dropna(axis=1, how='all') # And also the Area and calving fluxes dfs = utils.compile_glacier_statistics(gdirs, path=False) y0 = int(period.split('_')[0].split('-')[0]) y1 = int(period.split('_')[1].split('-')[0]) - 1 odf = pd.DataFrame(df.loc[y0:y1].mean(), columns=['SMB']) odf['AREA'] = dfs.rgi_area_km2 * 1e6 # Just take the calving rate and change its units # Original units: km3 a-1, to change to mm a-1 (units of specific MB) rho = cfg.PARAMS['ice_density'] if 'calving_flux' in dfs: odf['CALVING'] = dfs['calving_flux'].fillna( 0) * 1e9 * rho / odf['AREA'] else: odf['CALVING'] = 0 # We have to drop nans here, which occur when calving glaciers fail to run odf = odf.dropna() # save all rgi_ids for which a valid OGGM mb is available rgi_ids_oggm = odf.index.values # Fetch the reference data df = utils.get_geodetic_mb_dataframe(file_path=file_path) # get the correct period from the whole dataset df = df.loc[df['period'] == period] # get only geodetic measurements for which a valid OGGM mb is available rdf_all = df.loc[rgi_ids_oggm] if rdf_all.empty: raise InvalidWorkflowError('No geodetic MB measurements available for ' 'this glacier selection!') # drop glaciers with no valid geodetic measurements rdf = rdf_all.loc[~rdf_all['is_cor']] if rdf.empty: if not fail_safe: raise InvalidWorkflowError( 'No gedoetic MB measurements available for ' 'this glacier selection! Set ' 'fail_safe=True to use the ' 'corrected values.') rdf = rdf_all # the remaining glaciers now have a OGGM mb and geodetic measurements rgi_ids = rdf.index.values msg = ('Applying geodetic MB correction using {} of {} glaciers, with ' 'available OGGM MB and available geodetic measurements.') log.workflow(msg.format(len(rgi_ids), len(gdirs))) # Total MB OGGM, only using glaciers with OGGM mb and geodetic measurements odf = odf.loc[rgi_ids] out_smb = np.average(odf['SMB'], weights=odf['AREA']) # for logging out_cal = np.average(odf['CALVING'], weights=odf['AREA']) # for logging smb_oggm = np.average(odf['SMB'] - odf['CALVING'], weights=odf['AREA']) # Total geodetic MB, no need for indexing smb_ref = rdf.dmdtda.values * 1000 # m to mm conversion area_ref = rdf.area.values smb_ref = np.average(smb_ref, weights=area_ref) # Diff between the two residual = smb_ref - smb_oggm # Let's just shift log.workflow('Shifting regional MB bias by {}'.format(residual)) log.workflow('Observations give {}'.format(smb_ref)) log.workflow('OGGM SMB gives {}'.format(out_smb)) log.workflow('OGGM frontal ablation gives {}'.format(out_cal)) # This time we shift over all glaciers for gdir in gdirs: try: df = gdir.read_json('local_mustar') gdir.add_to_diagnostics('mb_bias_before_geodetic_corr', df['bias']) df['bias'] = df['bias'] - residual gdir.write_json(df, 'local_mustar') except FileNotFoundError: pass
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, test_ids=None, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, disable_mp=False, params_file=None, elev_bands=False, match_geodetic_mb=False, centerlines_only=False, add_consensus=False, max_level=5, logging_level='WORKFLOW', disable_dl_verify=False): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory params_file : str path to the OGGM parameter file (to override defaults) is_test : bool to test on a couple of glaciers only! test_ids : list if is_test: list of ids to process demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing elev_bands : bool compute all flowlines based on the Huss&Hock 2015 method instead of the OGGM default, which is a mix of elev_bands and centerlines. centerlines_only : bool compute all flowlines based on the OGGM centerline(s) method instead of the OGGM default, which is a mix of elev_bands and centerlines. match_geodetic_mb : bool match the regional mass-balance estimates at the regional level (currently Hugonnet et al., 2020). add_consensus : bool adds (reprojects) the consensus estimates thickness to the glacier directories. With elev_bands=True, the data will also be binned. max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) disable_dl_verify : bool disable the hash verification of OGGM downloads """ # TODO: temporarily silence Fiona and other deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Input check if max_level not in [1, 2, 3, 4, 5]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4, 5]') # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Config Override Params params = {} # Local paths utils.mkdir(working_dir) params['working_dir'] = working_dir # Initialize OGGM and set up the run parameters cfg.initialize(file=params_file, params=params, logging_level=logging_level, future=True) # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # Check for the integrity of the files OGGM downloads at run time # For large files (e.g. using a 1 tif DEM like ALASKA) calculating the hash # takes a long time, so deactivating this can make sense cfg.PARAMS['dl_verify'] = not disable_dl_verify # Log the parameters msg = '# OGGM Run parameters:' for k, v in cfg.PARAMS.items(): if type(v) in [pd.DataFrame, dict]: continue msg += '\n {}: {}'.format(k, v) log.workflow(msg) if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] output_base_dir = os.path.join(output_folder, 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) # Add a package version file utils.mkdir(output_base_dir) opath = os.path.join(output_base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) # Some RGI input quality checks - this is based on visual checks # of large glaciers in the RGI ids_to_ice_cap = [ 'RGI60-05.10315', # huge Greenland ice cap 'RGI60-03.01466', # strange thing next to Devon 'RGI60-09.00918', # Academy of sciences Ice cap 'RGI60-09.00969', 'RGI60-09.00958', 'RGI60-09.00957', ] rgidf.loc[rgidf.RGIId.isin(ids_to_ice_cap), 'Form'] = '1' # In AA almost all large ice bodies are actually ice caps if rgi_reg == '19': rgidf.loc[rgidf.Area > 100, 'Form'] = '1' # For greenland we omit connectivity level 2 if rgi_reg == '05': rgidf = rgidf.loc[rgidf['Connect'] != 2] else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: if test_ids is not None: rgidf = rgidf.loc[rgidf.RGIId.isin(test_ids)] else: rgidf = rgidf.sample(4) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # L0 - go gdirs = workflow.init_glacier_directories(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L0', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L0 OK - compress all in output directory log.workflow('L0 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L0') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 0: _time_log() return # L1 - Add dem files if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job and leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_directories(rgidf, reset=rs, force=rs) workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=s) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # make a GeoTiff mask of the glacier, choose any source workflow.execute_entity_task(gis.rasterio_glacier_mask, gdirs, source='ALL') # Compress all in output directory level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log() return # Force a given source source = dem_source.upper() if dem_source else None # L1 - go workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=source) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory log.workflow('L1 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 1: _time_log() return # L2 - Tasks # Check which glaciers will be processed as what if elev_bands: gdirs_band = gdirs gdirs_cent = [] elif centerlines_only: gdirs_band = [] gdirs_cent = gdirs else: # Default is to mix # Curated list of large (> 50 km2) glaciers that don't run # (CFL error) mostly because the centerlines are crap # This is a really temporary fix until we have some better # solution here ids_to_bands = [ 'RGI60-01.13696', 'RGI60-03.01710', 'RGI60-01.13635', 'RGI60-01.14443', 'RGI60-03.01678', 'RGI60-03.03274', 'RGI60-01.17566', 'RGI60-03.02849', 'RGI60-01.16201', 'RGI60-01.14683', 'RGI60-07.01506', 'RGI60-07.01559', 'RGI60-03.02687', 'RGI60-17.00172', 'RGI60-01.23649', 'RGI60-09.00077', 'RGI60-03.00994', 'RGI60-01.26738', 'RGI60-03.00283', 'RGI60-01.16121', 'RGI60-01.27108', 'RGI60-09.00132', 'RGI60-13.43483', 'RGI60-09.00069', 'RGI60-14.04404', 'RGI60-17.01218', 'RGI60-17.15877', 'RGI60-13.30888', 'RGI60-17.13796', 'RGI60-17.15825', 'RGI60-01.09783' ] if rgi_reg == '19': gdirs_band = gdirs gdirs_cent = [] else: gdirs_band = [] gdirs_cent = [] for gdir in gdirs: if gdir.is_icecap or gdir.rgi_id in ids_to_bands: gdirs_band.append(gdir) else: gdirs_cent.append(gdir) log.workflow('Start flowline processing with: ' 'N centerline type: {}, ' 'N elev bands type: {}.' ''.format(len(gdirs_cent), len(gdirs_band))) # HH2015 method workflow.execute_entity_task(tasks.simple_glacier_masks, gdirs_band) # Centerlines OGGM workflow.execute_entity_task(tasks.glacier_masks, gdirs_cent) if add_consensus: from oggm.shop.bedtopo import add_consensus_thickness workflow.execute_entity_task(add_consensus_thickness, gdirs_band) workflow.execute_entity_task(add_consensus_thickness, gdirs_cent) # Elev bands with var data vn = 'consensus_ice_thickness' workflow.execute_entity_task(tasks.elevation_band_flowline, gdirs_band, bin_variables=vn) workflow.execute_entity_task(tasks.fixed_dx_elevation_band_flowline, gdirs_band, bin_variables=vn) else: # HH2015 method without it task_list = [ tasks.elevation_band_flowline, tasks.fixed_dx_elevation_band_flowline, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # HH2015 method task_list = [ tasks.compute_downstream_line, tasks.compute_downstream_bedshape, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # Centerlines OGGM task_list = [ tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, ] for task in task_list: workflow.execute_entity_task(task, gdirs_cent) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory log.workflow('L2 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 2: _time_log() return # L3 - Tasks task_list = [ tasks.process_climate_data, tasks.historical_climate_qc, tasks.local_t_star, tasks.mu_star_calibration, ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Inversion: we match the consensus workflow.calibrate_inversion_from_consensus(gdirs, apply_fs_on_mismatch=True, error_on_mismatch=False) # Do we want to match geodetic estimates? # This affects only the bias so we can actually do this *after* # the inversion, but we really want to take calving into account here if match_geodetic_mb: workflow.match_regional_geodetic_mb(gdirs, rgi_reg) # We get ready for modelling workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) # L3 OK - compress all in output directory log.workflow('L3 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 3: _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir_L3 = sum_dir sum_dir = os.path.join(output_base_dir, 'L4', 'summary') utils.mkdir(sum_dir) # Copy L3 files for consistency for bn in [ 'glacier_statistics', 'climate_statistics', 'fixed_geometry_mass_balance' ]: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Copy mini data to new dir mini_base_dir = os.path.join(working_dir, 'mini_perglacier') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=mini_base_dir) # L4 OK - compress all in output directory log.workflow('L4 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L4') workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 4: _time_log() return # L5 - spinup run in mini gdirs gdirs = mini_gdirs # Get end date. The first gdir might have blown up, try some others i = 0 while True: if i >= len(gdirs): raise RuntimeError('Found no valid glaciers!') try: y0 = gdirs[i].get_climate_info()['baseline_hydro_yr_0'] # One adds 1 because the run ends at the end of the year ye = gdirs[i].get_climate_info()['baseline_hydro_yr_1'] + 1 break except BaseException: i += 1 # OK - run workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, output_filesuffix='_historical') # Now compile the output sum_dir = os.path.join(output_base_dir, 'L5', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical') # Glacier statistics we recompute here for error analysis opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Other stats for consistency for bn in ['climate_statistics', 'fixed_geometry_mass_balance']: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Add the extended files pf = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) mf = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) # This is crucial - extending calving only with L3 data!!! sf = os.path.join(sum_dir_L3, 'glacier_statistics_{}.csv'.format(rgi_reg)) opath = os.path.join( sum_dir, 'historical_run_output_extended_{}.nc'.format(rgi_reg)) utils.extend_past_climate_run(past_run_file=pf, fixed_geometry_mb_file=mf, glacier_statistics_file=sf, path=opath) # L5 OK - compress all in output directory log.workflow('L5 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L5') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log()