def pre_process_tasks(run_for_test=False): path10 = utils.get_rgi_region_file('10', '61') path13 = utils.get_rgi_region_file('13', '61') path14 = utils.get_rgi_region_file('14', '61') path15 = utils.get_rgi_region_file('15', '61') rgidf10 = gpd.read_file(path10) rgidf10 = rgidf10[rgidf10.O2Region == '4'] rgidf13 = gpd.read_file(path13) rgidf14 = gpd.read_file(path14) rgidf15 = gpd.read_file(path15) rgidf = pd.concat([rgidf10, rgidf13, rgidf14, rgidf15]) if (not run_in_cluster) or run_for_test: rgidf = rgidf10.iloc[0:5, :] cfg.initialize() cfg.PARAMS['border'] = 160 cfg.PATHS['working_dir'] = utils.mkdir(working_dir) cfg.PARAMS['continue_on_error'] = True cfg.PARAMS['use_multiprocessing'] = True gdirs = workflow.init_glacier_directories(rgidf, from_prepro_level=1, reset=True, force=True) task_list = [ tasks.define_glacier_region, tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.process_cru_data, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier ] for task in task_list: workflow.execute_entity_task(task, gdirs) return gdirs
def test_rgi(self): # Make a fake RGI file rgi_dir = os.path.join(self.dldir, 'rgi50') utils.mkdir(rgi_dir) make_fake_zipdir(os.path.join(rgi_dir, '01_rgi50_Region'), fakefile='test.txt') rgi_f = make_fake_zipdir(rgi_dir, fakefile='000_rgi50_manifest.txt') def down_check(url, cache_name=None, reset=False): expected = 'http://www.glims.org/RGI/rgi50_files/rgi50.zip' self.assertEqual(url, expected) return rgi_f with FakeDownloadManager('_progress_urlretrieve', down_check): rgi = utils.get_rgi_dir() assert os.path.isdir(rgi) assert os.path.exists(os.path.join(rgi, '000_rgi50_manifest.txt')) assert os.path.exists(os.path.join(rgi, '01_rgi50_Region', 'test.txt')) # Make a fake RGI file rgi_dir = os.path.join(self.dldir, 'rgi60') utils.mkdir(rgi_dir) make_fake_zipdir(os.path.join(rgi_dir, '01_rgi60_Region'), fakefile='01_rgi60_Region.shp') rgi_f = make_fake_zipdir(rgi_dir, fakefile='000_rgi60_manifest.txt') def down_check(url, cache_name=None, reset=False): expected = 'http://www.glims.org/RGI/rgi60_files/00_rgi60.zip' self.assertEqual(url, expected) return rgi_f with FakeDownloadManager('_progress_urlretrieve', down_check): rgi = utils.get_rgi_dir(version='6') assert os.path.isdir(rgi) assert os.path.exists(os.path.join(rgi, '000_rgi60_manifest.txt')) assert os.path.exists( os.path.join(rgi, '01_rgi60_Region', '01_rgi60_Region.shp')) with FakeDownloadManager('_progress_urlretrieve', down_check): rgi_f = utils.get_rgi_region_file('01', version='6') assert os.path.exists(rgi_f) assert '01_rgi60_Region.shp' in rgi_f
def merge_wgms_lec(wgms, lec, repeat=False): if repeat: lec.loc[:, 'region'] = lec.rgi_id.apply( lambda x: x.split('RGI60-')[-1].split('.')[0]) # sort values by RGI Id and set index lec = lec.sort_values(by='rgi_id') lec = lec.set_index('rgi_id').drop('Unnamed: 0', axis=1) lec = lec[~lec.index.isin(wgms_df.index)] lec.loc[:, 'rgi_area'] = np.nan lec.loc[:, 'rgi_date'] = np.nan for region in lec.region.unique(): ids = lec[lec.region == region].drop('region', axis=1) # RGI file path = utils.get_rgi_region_file(str(region).zfill(2), version='61') rgidf = gpd.read_file(path).set_index('RGIId') rgidf.index.name = 'rgi_id' df2 = pd.DataFrame() df2.loc[:, 'rgi_area'] = rgidf[rgidf.index.isin(ids.index)].Area date = rgidf[rgidf.index.isin( ids.index)].BgnDate.apply(lambda x: int(str(x)[:4])) df2.loc[:, 'rgi_date'] = date print(df2.rgi_date) lec.update(df2) lec.loc[:, 'relative_diff'] = lec.area_diff / lec.rgi_area lec = lec.rename(columns={"bias": "mb_bias"}) all = wgms.append(lec) all = all[[ 'region', 'rgi_date', 'rgi_area', 'temp_bias', 'iterations', 'mb_bias', 'area_diff', 'relative_diff', 'error' ]] all.to_csv( os.path.join(cfg.PATHS['working_dir'], 'all_experiment_df.csv')) else: all = pd.read_csv( os.path.join(cfg.PATHS['working_dir'], 'all_experiment_df.csv')) return all
def create_model_lec_df(home, repeat=False): model_df = pd.DataFrame() p = os.path.join(home, 'lec_model_df.pkl') if repeat or not os.path.isfile(p): for d in [d for d in os.listdir(home) if d.startswith('temp')]: temp_bias = float(d.split('_')[-1]) dir = os.path.join(home, d) if temp_bias <= 0: for file in [ os.path.join(dir, d1) for d1 in os.listdir(dir) if d1.endswith('.pkl') ]: df = pd.read_pickle(file, compression='gzip') df = df.assign(temp_bias=temp_bias) model_df = model_df.append(df, ignore_index=False, sort=True) model_df.index.name = 'rgi_id' model_df.loc[:, 'rgi_date'] = model_df.ex_mod.apply( lambda x: int(x.area_km2_ts().index[-1])) model_df.loc[:, 'region'] = model_df.index.map( lambda x: int(x.split('RGI60-')[-1].split('.')[0])) for region in model_df.region.unique(): ids = model_df[model_df.region == region].index.get_level_values(0) # RGI file path = utils.get_rgi_region_file(str(region).zfill(2), version='61') rgidf = gpd.read_file(path).set_index('RGIId') rgidf = rgidf[rgidf.index.isin(ids)] model_df.loc[rgidf.index, 'rgi_area'] = rgidf.Area model_df = model_df.reset_index().set_index(['rgi_id', 'temp_bias' ]).sort_index() model_df.loc[:, 'ex_area'] = model_df.ex_mod.apply( lambda x: x.area_km2_ts().values[-1]) model_df.to_pickle(p, compression='gzip') return model_df else: return pd.read_pickle(p, compression='gzip')
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None, disable_mp=False, timeout=0, max_level=4, logging_level='WORKFLOW'): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Input check if max_level not in [1, 2, 3, 4]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4]') # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level=logging_level) # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # Timeout cfg.PARAMS['task_timeout'] = timeout # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job an leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 rgidf['DEM_SOURCE'] = s log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_regions(rgidf, reset=rs, force=rs) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # Compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) _time_log() return if dem_source: # Force a given source rgidf['DEM_SOURCE'] = dem_source.upper() # L1 - go gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 1: _time_log() return # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 2: _time_log() return # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) if max_level == 3: _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) _time_log()
if ON_CLUSTER: OUT_DIR = os.environ.get("OUTDIR") cfg.PATHS['working_dir'] = OUT_DIR REG = os.environ.get("I") else: WORKING_DIR = '/home/juliaeis/Dokumente/OGGM/work_dir/reconstruction/global' OUT_DIR = WORKING_DIR cfg.PATHS['working_dir'] = WORKING_DIR utils.mkdir(WORKING_DIR, reset=False) cfg.PATHS['plot_dir'] = os.path.join(cfg.PATHS['working_dir'], 'plots') utils.mkdir(cfg.PATHS['plot_dir'], reset=False) for dir in os.listdir(OUT_DIR): if dir.startswith('reg1'): cfg.PATHS['working_dir'] = os.path.join(OUT_DIR, dir) REGION = dir.split('reg')[-1].split('-')[0].zfill(2) if REGION == REG: # RGI file path = utils.get_rgi_region_file(REGION, version='61') rgidf = gpd.read_file(path) #rgidf = rgidf.sort_values('Area', ascending=False) # exclude non-landterminating glaciers rgidf = rgidf[rgidf.TermType == 0] rgidf = rgidf[rgidf.Connect != 2] gdirs = workflow.init_glacier_regions(rgidf.head(9)) df = read_results(gdirs) print(df)
# We are using which baseline data? cfg.PARAMS['baseline_climate'] = 'CUSTOM' # change the custom climate data cfg.PATHS[ 'climate_file'] = '/exports/csce/datastore/geos/groups/geos_iceocean/kinnear/SWARM_files/oggm_SWARM_input_cru_ref_hgts/oggm_cru_hgt_input.nc' # Set to True for operational runs - here we want all glaciers to run cfg.PARAMS['continue_on_error'] = True # Change the minimum timestep cfg.PARAMS['cfl_min_dt'] = 10.0 # Local working directory (where OGGM will write its output) Need to create this beforehand and put the mass balance data in (ref_t_stars.csv). WORKING_DIR = '/exports/csce/datastore/geos/groups/geos_iceocean/kinnear/oggm_runs/oggm_mswep_era_reference_run_90_geodetic' cfg.PATHS['working_dir'] = WORKING_DIR # RGI file setup, easiest but very inelegant way to do this atm is to make a list of all 13,14,15 RGI glaciers then filtering #Start with 13,14 and 15 path = utils.get_rgi_region_file('13', version=rgi_version) rgidf_13 = gpd.read_file(path) path = utils.get_rgi_region_file('14', version=rgi_version) rgidf_14 = gpd.read_file(path) path = utils.get_rgi_region_file('15', version=rgi_version) rgidf_15 = gpd.read_file(path) #Now combine the files # rgidf_temp = rgidf_13.concat(rgidf_14) # rgidf = rgidf_temp.concat(rgidf_15) rgidf = gpd.GeoDataFrame(pd.concat([rgidf_13, rgidf_14, rgidf_15])) #print(rgidf) # Get the shapefile basin = gpd.read_file( '/exports/csce/datastore/geos/groups/geos_iceocean/kinnear/SWARM-OGGM-Model/shape_files/swarm_grid.shp' ) print('got glacier shp')
# Initialize OGGM and set up the default run parameters cfg.initialize() # How many grid points around the glacier? cfg.PARAMS['border'] = 10 # Make it robust cfg.PARAMS['use_intersects'] = False cfg.PARAMS['continue_on_error'] = True # Local working directory (where OGGM will write its output) cfg.PATHS['working_dir'] = utils.get_temp_dir('some_wd') # RGI file path = utils.get_rgi_region_file('11') rgidf = gpd.read_file(path) # Select only 2 glaciers rgidf = rgidf.iloc[:2] # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # Go - create the pre-processed glacier directories gdirs = workflow.init_glacier_directories(rgidf) # Our task now from dummy_task_module import dummy_task workflow.execute_entity_task(dummy_task, gdirs)
def run_benchmark(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics odf = pd.DataFrame() if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] base_dir = os.path.join(output_folder) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) # Read RGI start = time.time() if test_rgidf is None: # Get the RGI file rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(2) _add_time_to_df(odf, 'Read RGI', time.time()-start) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Initialize working directories start = time.time() gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) _add_time_to_df(odf, 'init_glacier_regions', time.time()-start) # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir # Tasks task_list = [ tasks.process_cru_data, tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier, ] for task in task_list: start = time.time() workflow.execute_entity_task(task, gdirs) _add_time_to_df(odf, task.__name__, time.time()-start) # Runs start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, bias=0, seed=0, output_filesuffix='_tstar') _add_time_to_df(odf, 'run_random_climate_tstar_250', time.time()-start) start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, y0=1995, seed=0, output_filesuffix='_commit') _add_time_to_df(odf, 'run_random_climate_commit_250', time.time()-start) # Compile results start = time.time() utils.compile_glacier_statistics(gdirs) _add_time_to_df(odf, 'compile_glacier_statistics', time.time()-start) start = time.time() utils.compile_climate_statistics(gdirs, add_climate_period=[1920, 1960, 2000]) _add_time_to_df(odf, 'compile_climate_statistics', time.time()-start) start = time.time() utils.compile_run_output(gdirs, filesuffix='_tstar') _add_time_to_df(odf, 'compile_run_output_tstar', time.time()-start) start = time.time() utils.compile_run_output(gdirs, filesuffix='_commit') _add_time_to_df(odf, 'compile_run_output_commit', time.time()-start) # Log opath = os.path.join(base_dir, 'benchmarks_b{:03d}.csv'.format(border)) odf.index.name = 'Task' odf.to_csv(opath) log.workflow('OGGM benchmarks is done!')
def run_benchmark(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, test_rgidf=None, test_intersects_file=None, test_topofile=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics odf = pd.DataFrame() if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] base_dir = os.path.join(output_folder) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) # Read RGI start = time.time() if test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(2) _add_time_to_df(odf, 'Read RGI', time.time() - start) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Initialize working directories start = time.time() gdirs = workflow.init_glacier_directories(rgidf, reset=True, force=True) _add_time_to_df(odf, 'init_glacier_directories', time.time() - start) # Tasks task_list = [ tasks.define_glacier_region, tasks.process_cru_data, tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, tasks.init_present_time_glacier, ] for task in task_list: start = time.time() workflow.execute_entity_task(task, gdirs) _add_time_to_df(odf, task.__name__, time.time() - start) # Runs start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, bias=0, seed=0, output_filesuffix='_tstar') _add_time_to_df(odf, 'run_random_climate_tstar_250', time.time() - start) start = time.time() workflow.execute_entity_task(tasks.run_random_climate, gdirs, nyears=250, y0=1995, seed=0, output_filesuffix='_commit') _add_time_to_df(odf, 'run_random_climate_commit_250', time.time() - start) # Compile results start = time.time() utils.compile_glacier_statistics(gdirs) _add_time_to_df(odf, 'compile_glacier_statistics', time.time() - start) start = time.time() utils.compile_climate_statistics(gdirs, add_climate_period=[1920, 1960, 2000]) _add_time_to_df(odf, 'compile_climate_statistics', time.time() - start) start = time.time() utils.compile_run_output(gdirs, input_filesuffix='_tstar') _add_time_to_df(odf, 'compile_run_output_tstar', time.time() - start) start = time.time() utils.compile_run_output(gdirs, input_filesuffix='_commit') _add_time_to_df(odf, 'compile_run_output_commit', time.time() - start) # Log opath = os.path.join(base_dir, 'benchmarks_b{:03d}.csv'.format(border)) odf.index.name = 'Task' odf.to_csv(opath) log.workflow('OGGM benchmarks is done!')
utils.mkdir(WORKING_DIR) cfg.PATHS['working_dir'] = WORKING_DIR # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = 160 # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True cfg.PARAMS['auto_skip_task'] = False # Get the RGI file rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # Module logger log = logging.getLogger(__name__) log.info('Starting run for RGI reg: ' + rgi_reg) log.info('Number of glaciers: {}'.format(len(rgidf))) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, from_prepro_level=0) # Tasks workflow.execute_entity_task(tasks.glacier_masks, gdirs)
cfg.PARAMS['temp_melt'] = -1.75 cfg.PARAMS['temp_all_solid'] = 0.0 # add to BASENAMES _doc = 'contains observed and searched glacier from synthetic experiment to find intial state' cfg.BASENAMES['synthetic_experiment'] = ('synthetic_experiment.pkl', _doc) # We use intersects db = utils.get_rgi_intersects_region_file(version='61', region='11') cfg.set_intersects_db(db) cfg.PARAMS['run_mb_calibration'] = False cfg.PARAMS['optimize_inversion_params'] = False # RGI file path = utils.get_rgi_region_file('11', version='61') rgidf = gpd.read_file(path) rgidf = rgidf[rgidf.RGIId == 'RGI60-11.00779'] # sort for efficient using rgidf = rgidf.sort_values('Area', ascending=False) if ON_CLUSTER: rgidf = rgidf[job_nr:len(rgidf):80] gdirs = workflow.init_glacier_regions(rgidf) for gdir in gdirs: if os.path.isfile(os.path.join(gdir.dir, 'model_run_experiment.nc')): start = time.time()
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Time start = time.time() # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DEMO_GLACIERS.index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file(utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L4 - Tasks workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) # Log m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))
rgi_version = '61' rgi_region = '11' # Region Central Europe # Here we override some of the default parameters # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large: # here, 80 is more than enough cfg.PARAMS['border'] = 80 # Local working directory (where OGGM will write its output) WORKING_DIR = utils.gettempdir('OGGM_Rofental') utils.mkdir(WORKING_DIR, reset=True) cfg.PATHS['working_dir'] = WORKING_DIR # RGI file path = utils.get_rgi_region_file(rgi_region, version=rgi_version) rgidf = gpd.read_file(path) # Get the Rofental Basin file path = utils.get_demo_file('rofental_hydrosheds.shp') basin = gpd.read_file(path) # Take all glaciers in the Rofental Basin in_bas = [basin.geometry.contains(shpg.Point(x, y))[0] for (x, y) in zip(rgidf.CenLon, rgidf.CenLat)] rgidf = rgidf.loc[in_bas] # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting OGGM run')
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, test_ids=None, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, disable_mp=False, params_file=None, elev_bands=False, match_geodetic_mb=False, centerlines_only=False, add_consensus=False, max_level=5, logging_level='WORKFLOW', disable_dl_verify=False): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory params_file : str path to the OGGM parameter file (to override defaults) is_test : bool to test on a couple of glaciers only! test_ids : list if is_test: list of ids to process demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing elev_bands : bool compute all flowlines based on the Huss&Hock 2015 method instead of the OGGM default, which is a mix of elev_bands and centerlines. centerlines_only : bool compute all flowlines based on the OGGM centerline(s) method instead of the OGGM default, which is a mix of elev_bands and centerlines. match_geodetic_mb : bool match the regional mass-balance estimates at the regional level (currently Hugonnet et al., 2020). add_consensus : bool adds (reprojects) the consensus estimates thickness to the glacier directories. With elev_bands=True, the data will also be binned. max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) disable_dl_verify : bool disable the hash verification of OGGM downloads """ # TODO: temporarily silence Fiona and other deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Input check if max_level not in [1, 2, 3, 4, 5]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4, 5]') # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Config Override Params params = {} # Local paths utils.mkdir(working_dir) params['working_dir'] = working_dir # Initialize OGGM and set up the run parameters cfg.initialize(file=params_file, params=params, logging_level=logging_level, future=True) # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # Check for the integrity of the files OGGM downloads at run time # For large files (e.g. using a 1 tif DEM like ALASKA) calculating the hash # takes a long time, so deactivating this can make sense cfg.PARAMS['dl_verify'] = not disable_dl_verify # Log the parameters msg = '# OGGM Run parameters:' for k, v in cfg.PARAMS.items(): if type(v) in [pd.DataFrame, dict]: continue msg += '\n {}: {}'.format(k, v) log.workflow(msg) if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] output_base_dir = os.path.join(output_folder, 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) # Add a package version file utils.mkdir(output_base_dir) opath = os.path.join(output_base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) # Some RGI input quality checks - this is based on visual checks # of large glaciers in the RGI ids_to_ice_cap = [ 'RGI60-05.10315', # huge Greenland ice cap 'RGI60-03.01466', # strange thing next to Devon 'RGI60-09.00918', # Academy of sciences Ice cap 'RGI60-09.00969', 'RGI60-09.00958', 'RGI60-09.00957', ] rgidf.loc[rgidf.RGIId.isin(ids_to_ice_cap), 'Form'] = '1' # In AA almost all large ice bodies are actually ice caps if rgi_reg == '19': rgidf.loc[rgidf.Area > 100, 'Form'] = '1' # For greenland we omit connectivity level 2 if rgi_reg == '05': rgidf = rgidf.loc[rgidf['Connect'] != 2] else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: if test_ids is not None: rgidf = rgidf.loc[rgidf.RGIId.isin(test_ids)] else: rgidf = rgidf.sample(4) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # L0 - go gdirs = workflow.init_glacier_directories(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L0', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L0 OK - compress all in output directory log.workflow('L0 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L0') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 0: _time_log() return # L1 - Add dem files if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job and leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_directories(rgidf, reset=rs, force=rs) workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=s) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # make a GeoTiff mask of the glacier, choose any source workflow.execute_entity_task(gis.rasterio_glacier_mask, gdirs, source='ALL') # Compress all in output directory level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log() return # Force a given source source = dem_source.upper() if dem_source else None # L1 - go workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=source) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory log.workflow('L1 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 1: _time_log() return # L2 - Tasks # Check which glaciers will be processed as what if elev_bands: gdirs_band = gdirs gdirs_cent = [] elif centerlines_only: gdirs_band = [] gdirs_cent = gdirs else: # Default is to mix # Curated list of large (> 50 km2) glaciers that don't run # (CFL error) mostly because the centerlines are crap # This is a really temporary fix until we have some better # solution here ids_to_bands = [ 'RGI60-01.13696', 'RGI60-03.01710', 'RGI60-01.13635', 'RGI60-01.14443', 'RGI60-03.01678', 'RGI60-03.03274', 'RGI60-01.17566', 'RGI60-03.02849', 'RGI60-01.16201', 'RGI60-01.14683', 'RGI60-07.01506', 'RGI60-07.01559', 'RGI60-03.02687', 'RGI60-17.00172', 'RGI60-01.23649', 'RGI60-09.00077', 'RGI60-03.00994', 'RGI60-01.26738', 'RGI60-03.00283', 'RGI60-01.16121', 'RGI60-01.27108', 'RGI60-09.00132', 'RGI60-13.43483', 'RGI60-09.00069', 'RGI60-14.04404', 'RGI60-17.01218', 'RGI60-17.15877', 'RGI60-13.30888', 'RGI60-17.13796', 'RGI60-17.15825', 'RGI60-01.09783' ] if rgi_reg == '19': gdirs_band = gdirs gdirs_cent = [] else: gdirs_band = [] gdirs_cent = [] for gdir in gdirs: if gdir.is_icecap or gdir.rgi_id in ids_to_bands: gdirs_band.append(gdir) else: gdirs_cent.append(gdir) log.workflow('Start flowline processing with: ' 'N centerline type: {}, ' 'N elev bands type: {}.' ''.format(len(gdirs_cent), len(gdirs_band))) # HH2015 method workflow.execute_entity_task(tasks.simple_glacier_masks, gdirs_band) # Centerlines OGGM workflow.execute_entity_task(tasks.glacier_masks, gdirs_cent) if add_consensus: from oggm.shop.bedtopo import add_consensus_thickness workflow.execute_entity_task(add_consensus_thickness, gdirs_band) workflow.execute_entity_task(add_consensus_thickness, gdirs_cent) # Elev bands with var data vn = 'consensus_ice_thickness' workflow.execute_entity_task(tasks.elevation_band_flowline, gdirs_band, bin_variables=vn) workflow.execute_entity_task(tasks.fixed_dx_elevation_band_flowline, gdirs_band, bin_variables=vn) else: # HH2015 method without it task_list = [ tasks.elevation_band_flowline, tasks.fixed_dx_elevation_band_flowline, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # HH2015 method task_list = [ tasks.compute_downstream_line, tasks.compute_downstream_bedshape, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # Centerlines OGGM task_list = [ tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, ] for task in task_list: workflow.execute_entity_task(task, gdirs_cent) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory log.workflow('L2 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 2: _time_log() return # L3 - Tasks task_list = [ tasks.process_climate_data, tasks.historical_climate_qc, tasks.local_t_star, tasks.mu_star_calibration, ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Inversion: we match the consensus workflow.calibrate_inversion_from_consensus(gdirs, apply_fs_on_mismatch=True, error_on_mismatch=False) # Do we want to match geodetic estimates? # This affects only the bias so we can actually do this *after* # the inversion, but we really want to take calving into account here if match_geodetic_mb: workflow.match_regional_geodetic_mb(gdirs, rgi_reg) # We get ready for modelling workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) # L3 OK - compress all in output directory log.workflow('L3 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 3: _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir_L3 = sum_dir sum_dir = os.path.join(output_base_dir, 'L4', 'summary') utils.mkdir(sum_dir) # Copy L3 files for consistency for bn in [ 'glacier_statistics', 'climate_statistics', 'fixed_geometry_mass_balance' ]: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Copy mini data to new dir mini_base_dir = os.path.join(working_dir, 'mini_perglacier') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=mini_base_dir) # L4 OK - compress all in output directory log.workflow('L4 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L4') workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 4: _time_log() return # L5 - spinup run in mini gdirs gdirs = mini_gdirs # Get end date. The first gdir might have blown up, try some others i = 0 while True: if i >= len(gdirs): raise RuntimeError('Found no valid glaciers!') try: y0 = gdirs[i].get_climate_info()['baseline_hydro_yr_0'] # One adds 1 because the run ends at the end of the year ye = gdirs[i].get_climate_info()['baseline_hydro_yr_1'] + 1 break except BaseException: i += 1 # OK - run workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, output_filesuffix='_historical') # Now compile the output sum_dir = os.path.join(output_base_dir, 'L5', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical') # Glacier statistics we recompute here for error analysis opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Other stats for consistency for bn in ['climate_statistics', 'fixed_geometry_mass_balance']: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Add the extended files pf = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) mf = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) # This is crucial - extending calving only with L3 data!!! sf = os.path.join(sum_dir_L3, 'glacier_statistics_{}.csv'.format(rgi_reg)) opath = os.path.join( sum_dir, 'historical_run_output_extended_{}.nc'.format(rgi_reg)) utils.extend_past_climate_run(past_run_file=pf, fixed_geometry_mb_file=mf, glacier_statistics_file=sf, path=opath) # L5 OK - compress all in output directory log.workflow('L5 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L5') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log()
#!/usr/bin/env python # coding: utf-8 # Extract glacier directories import os import geopandas as gpd from oggm import cfg, utils, workflow # define paths prepro_dir = '/home/users/moberrauch/run_output/vas_prepro' working_dir = '/home/users/moberrauch/wdirs/historical' # OGGM initialization cfg.initialize() cfg.PATHS['working_dir'] = working_dir # specify RGI version and regions rgi_version = '62' rgi_regions = [11, 13, 14, 15] # get RGI IDs for rgi_region in rgi_regions: fpath = utils.get_rgi_region_file(rgi_region, rgi_version) if rgi_region == rgi_regions[0]: rgi_ids = gpd.read_file(fpath) else: rgi_ids = rgi_ids.append(gpd.read_file(fpath)) # extract working directories from *.tar files workflow.init_glacier_directories(rgi_ids, from_tar=prepro_dir)
def run_cmip(): """ """ # Initialize OGGM and set up the default run parameters vascaling.initialize(logging_level='DEBUG') rgi_version = '62' cfg.PARAMS['border'] = 80 # CLUSTER paths wdir = os.environ.get('WORKDIR', '') utils.mkdir(wdir) cfg.PATHS['working_dir'] = wdir outdir = os.environ.get('OUTDIR', '') utils.mkdir(outdir) # define the baseline climate CRU or HISTALP cfg.PARAMS['baseline_climate'] = 'CRU' # set the mb hyper parameters accordingly cfg.PARAMS['prcp_scaling_factor'] = 3 cfg.PARAMS['temp_melt'] = 0 cfg.PARAMS['temp_all_solid'] = 4 cfg.PARAMS['prcp_default_gradient'] = 4e-4 cfg.PARAMS['run_mb_calibration'] = False # set minimum ice thickness to include in glacier length computation # this reduces weird spikes in length records cfg.PARAMS['min_ice_thick_for_length'] = 0.1 # the bias is defined to be zero during the calibration process, # which is why we don't use it here to reproduce the results cfg.PARAMS['use_bias_for_run'] = True # read RGI entry for the glaciers as DataFrame # containing the outline area as shapefile # RGI glaciers rgi_reg = os.environ.get('RGI_REG', '') if rgi_reg not in ['{:02d}'.format(r) for r in range(1, 20)]: raise RuntimeError('Need an RGI Region') rgi_ids = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # For greenland we omit connectivity level 2 if rgi_reg == '05': rgi_ids = rgi_ids.loc[rgi_ids['Connect'] != 2] # get and set path to intersect shapefile intersects_db = utils.get_rgi_intersects_region_file(region=rgi_reg) cfg.set_intersects_db(intersects_db) # operational run, all glaciers should run cfg.PARAMS['continue_on_error'] = True # Module logger log = logging.getLogger(__name__) log.workflow('Starting run for RGI reg {}'.format(rgi_reg)) # Go - get the pre-processed glacier directories base_url = 'https://cluster.klima.uni-bremen.de/' \ '~moberrauch/prepro_vas_paper/' gdirs = workflow.init_glacier_directories(rgi_ids, from_prepro_level=3, prepro_base_url=base_url, prepro_rgi_version=rgi_version) # read gcm list gcms = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0) # iterate over all specified GCMs for gcm in sys.argv[1:]: # iterate over all SSPs (Shared Socioeconomic Pathways) df1 = gcms.loc[gcms.gcm == gcm] for ssp in df1.ssp.unique(): df2 = df1.loc[df1.ssp == ssp] assert len(df2) == 2 # get temperature projections ft = df2.loc[df2['var'] == 'tas'].iloc[0] # get precipitation projections fp = df2.loc[df2['var'] == 'pr'].iloc[0].path rid = ft.fname.replace('_r1i1p1f1_tas.nc', '') ft = ft.path log.workflow('Starting run for {}'.format(rid)) workflow.execute_entity_task(gcm_climate.process_cmip_data, gdirs, # recognize the climate file for later filesuffix='_' + rid, # temperature projections fpath_temp=ft, # precip projections fpath_precip=fp, year_range=('1981', '2020')) workflow.execute_entity_task(vascaling.run_from_climate_data, gdirs, # use gcm_data, not climate_historical climate_filename='gcm_data', # use a different scenario climate_input_filesuffix='_' + rid, # this is important! Start from 2019 init_model_filesuffix='_historical', # recognize the run for later output_filesuffix=rid, return_value=False) gcm_dir = os.path.join(outdir, 'RGI' + rgi_reg, gcm) utils.mkdir(gcm_dir) utils.compile_run_output(gdirs, input_filesuffix=rid, path=os.path.join(gcm_dir, rid + '.nc')) log.workflow('OGGM Done')
def run_cmip(): # Initialize OGGM and set up the default run parameters vascaling.initialize(logging_level='WORKFLOW') rgi_version = '62' cfg.PARAMS['border'] = 80 # CLUSTER paths wdir = os.environ.get('WORKDIR', '') cfg.PATHS['working_dir'] = wdir outdir = os.environ.get('OUTDIR', '') # define the baseline climate CRU or HISTALP cfg.PARAMS['baseline_climate'] = 'CRU' # set the mb hyper parameters accordingly cfg.PARAMS['prcp_scaling_factor'] = 3 cfg.PARAMS['temp_melt'] = 0 cfg.PARAMS['temp_all_solid'] = 4 cfg.PARAMS['run_mb_calibration'] = False # set minimum ice thickness to include in glacier length computation # this reduces weird spikes in length records cfg.PARAMS['min_ice_thick_for_length'] = 0.1 # the bias is defined to be zero during the calibration process, # which is why we don't use it here to reproduce the results cfg.PARAMS['use_bias_for_run'] = True # read RGI entry for the glaciers as DataFrame # containing the outline area as shapefile # RGI glaciers rgi_reg = os.environ.get('OGGM_RGI_REG', '') if rgi_reg not in ['{:02d}'.format(r) for r in range(1, 20)]: raise RuntimeError('Need an RGI Region') rgi_ids = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # get and set path to intersect shapefile intersects_db = utils.get_rgi_intersects_region_file(region=rgi_reg) cfg.set_intersects_db(intersects_db) # operational run, all glaciers should run cfg.PARAMS['continue_on_error'] = True # Module logger log = logging.getLogger(__name__) log.workflow('Starting run for RGI reg {}'.format(rgi_reg)) # Go - get the pre-processed glacier directories # base_url = 'https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.4/' \ # 'L3-L5_files/RGIV62_fleb_qc3_CRU_pcp2.5' prepro_dir = '/home/users/moberrauch/run_output/vas_prepro/' gdirs = workflow.init_glacier_directories(rgi_ids, from_tar=prepro_dir) # # run vascaling climate tasks # workflow.execute_entity_task(vascaling.local_t_star, gdirs) # # adjust mass balance residual with geodetic observations # vascaling.match_regional_geodetic_mb(gdirs=gdirs, rgi_reg=rgi_reg) # # prepare historic "spinup" # workflow.execute_entity_task(vascaling.run_from_climate_data, gdirs, # ys=2003, ye=2020, # output_filesuffix='_historical') # read gcm list gcms = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0) # iterate over all specified gcms for gcm in sys.argv[1:]: # iterate over all SSPs (Shared Socioeconomic Pathways) df1 = gcms.loc[gcms.gcm == gcm] for ssp in df1.ssp.unique(): df2 = df1.loc[df1.ssp == ssp] assert len(df2) == 2 # get temperature projections ft = df2.loc[df2['var'] == 'tas'].iloc[0] # get precipitation projections fp = df2.loc[df2['var'] == 'pr'].iloc[0].path rid = ft.fname.replace('_r1i1p1f1_tas.nc', '') ft = ft.path log.workflow('Starting run for {}'.format(rid)) workflow.execute_entity_task( gcm_climate.process_cmip_data, gdirs, filesuffix='_' + rid, # recognize the climate file for later fpath_temp=ft, # temperature projections fpath_precip=fp, # precip projections year_range=('1981', '2020')) workflow.execute_entity_task(vascaling.run_from_climate_data, gdirs, climate_filename='gcm_data', climate_input_filesuffix='_' + rid, init_model_filesuffix='_historical', output_filesuffix=rid, return_value=False) gcm_dir = os.path.join(outdir, 'RGI' + rgi_reg, gcm) utils.mkdir(gcm_dir) utils.compile_run_output(gdirs, input_filesuffix=rid, path=os.path.join(gcm_dir, rid + '.nc')) log.workflow('OGGM Done')
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', is_test=False, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, test_crudir=None): """Does the actual job. Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) working_dir : str path to the OGGM working directory is_test : bool to test on a couple of glaciers only! demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only """ # TODO: temporarily silence Fiona deprecation warnings import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) # Module logger log = logging.getLogger(__name__) # Time start = time.time() # Initialize OGGM and set up the run parameters cfg.initialize(logging_level='WORKFLOW') # Local paths utils.mkdir(working_dir) cfg.PATHS['working_dir'] = working_dir # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = True # For statistics climate_periods = [1920, 1960, 2000] if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] rgi_dir_name = 'RGI{}'.format(rgi_version) border_dir_name = 'b_{:03d}'.format(border) base_dir = os.path.join(output_folder, rgi_dir_name, border_dir_name) # Add a package version file utils.mkdir(base_dir) opath = os.path.join(base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: # Just for fun rgidf = rgidf.sample(4) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # Input if test_topofile: cfg.PATHS['dem_file'] = test_topofile # L1 - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L2 - Tasks # Pre-download other files just in case if test_crudir is None: _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') else: cfg.PATHS['cru_dir'] = test_crudir workflow.execute_entity_task(tasks.process_cru_data, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L2 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L3 - Tasks task_list = [ tasks.glacier_masks, tasks.compute_centerlines, tasks.initialize_flowlines, tasks.compute_downstream_line, tasks.compute_downstream_bedshape, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, tasks.local_t_star, tasks.mu_star_calibration, tasks.prepare_for_inversion, tasks.mass_conservation_inversion, tasks.filter_inversion_output, ] for task in task_list: workflow.execute_entity_task(task, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L3', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, add_climate_period=climate_periods, path=opath) # L3 OK - compress all in output directory l_base_dir = os.path.join(base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=l_base_dir) utils.base_dir_to_tar(l_base_dir) # L4 - Tasks workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) # Glacier stats sum_dir = os.path.join(base_dir, 'L4', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Copy mini data to new dir base_dir = os.path.join(base_dir, 'L4') mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=base_dir) # L4 OK - compress all in output directory workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=True) utils.base_dir_to_tar(base_dir) # Log m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)))
cfg.PARAMS['continue_on_error'] = True # Init workflow.init_mp_pool(True) rgi_reg = os.environ.get('OGGM_RGI_REG', '') if rgi_reg not in ['{:02d}'.format(r) for r in range(1, 20)]: raise RuntimeError('Need an RGI Region') # Module logger log = logging.getLogger(__name__) log.workflow('Starting run for RGI reg {}'.format(rgi_reg)) # RGI glaciers rgi_ids = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # For greenland we omit connectivity level 2 if rgi_reg == '05': rgi_ids = rgi_ids.loc[rgi_ids['Connect'] != 2] # Go - get the pre-processed glacier directories base_url = 'https://cluster.klima.uni-bremen.de/~fmaussion/gdirs/final_prepro_cmip6/era5_eb' gdirs = workflow.init_glacier_directories(rgi_ids, from_prepro_level=5, prepro_base_url=base_url, prepro_rgi_version=rgi_version) gcms = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0) n_gcms = len(sys.argv) - 1
def run_prepro_levels(rgi_version=None, rgi_reg=None, border=None, output_folder='', working_dir='', dem_source='', is_test=False, test_ids=None, demo=False, test_rgidf=None, test_intersects_file=None, test_topofile=None, disable_mp=False, params_file=None, elev_bands=False, match_regional_geodetic_mb=False, match_geodetic_mb_per_glacier=False, evolution_model='fl_sia', centerlines_only=False, override_params=None, add_consensus=False, start_level=None, start_base_url=None, max_level=5, ref_tstars_base_url='', logging_level='WORKFLOW', disable_dl_verify=False, dynamic_spinup=False, continue_on_error=True): """Generate the preprocessed OGGM glacier directories for this OGGM version Parameters ---------- rgi_version : str the RGI version to use (defaults to cfg.PARAMS) rgi_reg : str the RGI region to process border : int the number of pixels at the maps border output_folder : str path to the output folder (where to put the preprocessed tar files) dem_source : str which DEM source to use: default, SOURCE_NAME or ALL working_dir : str path to the OGGM working directory ref_tstars_base_url : str url where to find the pre-calibrated reference tstar list. Required as of v1.4. params_file : str path to the OGGM parameter file (to override defaults) is_test : bool to test on a couple of glaciers only! test_ids : list if is_test: list of ids to process demo : bool to run the prepro for the list of demo glaciers test_rgidf : shapefile for testing purposes only test_intersects_file : shapefile for testing purposes only test_topofile : str for testing purposes only test_crudir : str for testing purposes only disable_mp : bool disable multiprocessing elev_bands : bool compute all flowlines based on the Huss&Hock 2015 method instead of the OGGM default, which is a mix of elev_bands and centerlines. centerlines_only : bool compute all flowlines based on the OGGM centerline(s) method instead of the OGGM default, which is a mix of elev_bands and centerlines. match_regional_geodetic_mb : str match the regional mass-balance estimates at the regional level ('hugonnet': Hugonnet et al., 2020 or 'zemp': Zemp et al., 2019). match_geodetic_mb_per_glacier : str match the mass-balance estimates at the glacier level (currently only 'hugonnet': Hugonnet et al., 2020). evolution_model : str which geometry evolution model to use: `fl_sia` (default), or `massredis` (mass redistribution curve). add_consensus : bool adds (reprojects) the consensus estimates thickness to the glacier directories. With elev_bands=True, the data will also be binned. start_level : int the pre-processed level to start from (default is to start from scratch). If set, you'll need to indicate start_base_url as well. start_base_url : str the pre-processed base-url to fetch the data from. max_level : int the maximum pre-processing level before stopping logging_level : str the logging level to use (DEBUG, INFO, WARNING, WORKFLOW) override_params : dict a dict of parameters to override. disable_dl_verify : bool disable the hash verification of OGGM downloads dynamic_spinup: str include a dynamic spinup matching 'area' OR 'volume' at the RGI-date """ # Input check if max_level not in [1, 2, 3, 4, 5]: raise InvalidParamsError('max_level should be one of [1, 2, 3, 4, 5]') if start_level is not None: if start_level not in [0, 1, 2]: raise InvalidParamsError('start_level should be one of [0, 1, 2]') if start_level > 0 and start_base_url is None: raise InvalidParamsError('With start_level, please also indicate ' 'start_base_url') else: start_level = 0 if match_regional_geodetic_mb and match_geodetic_mb_per_glacier: raise InvalidParamsError( 'match_regional_geodetic_mb incompatible with ' 'match_geodetic_mb_per_glacier!') if match_geodetic_mb_per_glacier and match_geodetic_mb_per_glacier != 'hugonnet': raise InvalidParamsError('Currently only `hugonnet` is available for ' 'match_geodetic_mb_per_glacier.') if evolution_model not in ['fl_sia', 'massredis']: raise InvalidParamsError('evolution_model should be one of ' "['fl_sia', 'massredis'].") if dynamic_spinup and dynamic_spinup not in ['area', 'volume']: raise InvalidParamsError(f"Dynamic spinup option '{dynamic_spinup}' " "not supported") if dynamic_spinup and evolution_model == 'massredis': raise InvalidParamsError("Dynamic spinup is not working/tested" "with massredis!") # Time start = time.time() def _time_log(): # Log util m, s = divmod(time.time() - start, 60) h, m = divmod(m, 60) log.workflow('OGGM prepro_levels is done! Time needed: ' '{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s))) # Local paths if override_params is None: override_params = {} utils.mkdir(working_dir) override_params['working_dir'] = working_dir # Initialize OGGM and set up the run parameters cfg.initialize(file=params_file, params=override_params, logging_level=logging_level, future=True) if match_geodetic_mb_per_glacier and (cfg.PARAMS['hydro_month_nh'] != 1 or cfg.PARAMS['hydro_month_sh'] != 1): raise InvalidParamsError('We recommend to set hydro_month_nh and sh ' 'to 1 for the geodetic MB calibration per ' 'glacier.') # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = not disable_mp # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large cfg.PARAMS['border'] = border # Set to True for operational runs cfg.PARAMS['continue_on_error'] = continue_on_error # Check for the integrity of the files OGGM downloads at run time # For large files (e.g. using a 1 tif DEM like ALASKA) calculating the hash # takes a long time, so deactivating this can make sense cfg.PARAMS['dl_verify'] = not disable_dl_verify # Other things that make sense cfg.PARAMS['store_model_geometry'] = True # Log the parameters msg = '# OGGM Run parameters:' for k, v in cfg.PARAMS.items(): if type(v) in [pd.DataFrame, dict]: continue msg += '\n {}: {}'.format(k, v) log.workflow(msg) if rgi_version is None: rgi_version = cfg.PARAMS['rgi_version'] output_base_dir = os.path.join(output_folder, 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) # Add a package version file utils.mkdir(output_base_dir) opath = os.path.join(output_base_dir, 'package_versions.txt') with open(opath, 'w') as vfile: vfile.write(utils.show_versions(logger=log)) if demo: rgidf = utils.get_rgi_glacier_entities(cfg.DATA['demo_glaciers'].index) elif test_rgidf is None: # Get the RGI file rgidf = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # We use intersects rgif = utils.get_rgi_intersects_region_file(rgi_reg, version=rgi_version) cfg.set_intersects_db(rgif) # Some RGI input quality checks - this is based on visual checks # of large glaciers in the RGI ids_to_ice_cap = [ 'RGI60-05.10315', # huge Greenland ice cap 'RGI60-03.01466', # strange thing next to Devon 'RGI60-09.00918', # Academy of sciences Ice cap 'RGI60-09.00969', 'RGI60-09.00958', 'RGI60-09.00957', ] rgidf.loc[rgidf.RGIId.isin(ids_to_ice_cap), 'Form'] = '1' # In AA almost all large ice bodies are actually ice caps if rgi_reg == '19': rgidf.loc[rgidf.Area > 100, 'Form'] = '1' # For greenland we omit connectivity level 2 if rgi_reg == '05': rgidf = rgidf.loc[rgidf['Connect'] != 2] else: rgidf = test_rgidf cfg.set_intersects_db(test_intersects_file) if is_test: if test_ids is not None: rgidf = rgidf.loc[rgidf.RGIId.isin(test_ids)] else: rgidf = rgidf.sample(4) if max_level > 2: # Also use ref tstars utils.apply_test_ref_tstars() if max_level > 2 and ref_tstars_base_url: workflow.download_ref_tstars(base_url=ref_tstars_base_url) log.workflow('Starting prepro run for RGI reg: {} ' 'and border: {}'.format(rgi_reg, border)) log.workflow('Number of glaciers: {}'.format(len(rgidf))) # L0 - go if start_level == 0: gdirs = workflow.init_glacier_directories(rgidf, reset=True, force=True) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L0', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L0 OK - compress all in output directory log.workflow('L0 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L0') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 0: _time_log() return else: gdirs = workflow.init_glacier_directories( rgidf, reset=True, force=True, from_prepro_level=start_level, prepro_border=border, prepro_rgi_version=rgi_version, prepro_base_url=start_base_url) # L1 - Add dem files if start_level == 0: if test_topofile: cfg.PATHS['dem_file'] = test_topofile # Which DEM source? if dem_source.upper() == 'ALL': # This is the complex one, just do the job and leave log.workflow('Running prepro on ALL sources') for i, s in enumerate(utils.DEM_SOURCES): rs = i == 0 log.workflow('Running prepro on sources: {}'.format(s)) gdirs = workflow.init_glacier_directories(rgidf, reset=rs, force=rs) workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=s) workflow.execute_entity_task(_rename_dem_folder, gdirs, source=s) # make a GeoTiff mask of the glacier, choose any source workflow.execute_entity_task(gis.rasterio_glacier_mask, gdirs, source='ALL') # Compress all in output directory level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log() return # Force a given source source = dem_source.upper() if dem_source else None # L1 - go workflow.execute_entity_task(tasks.define_glacier_region, gdirs, source=source) # Glacier stats sum_dir = os.path.join(output_base_dir, 'L1', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # L1 OK - compress all in output directory log.workflow('L1 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L1') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 1: _time_log() return # L2 - Tasks if start_level <= 1: # Check which glaciers will be processed as what if elev_bands: gdirs_band = gdirs gdirs_cent = [] elif centerlines_only: gdirs_band = [] gdirs_cent = gdirs else: # Default is to centerlines_only, but it used to be a mix # (e.g. bands for ice caps, etc) # I still keep this logic here in case we want to mix again gdirs_band = [] gdirs_cent = gdirs log.workflow('Start flowline processing with: ' 'N centerline type: {}, ' 'N elev bands type: {}.' ''.format(len(gdirs_cent), len(gdirs_band))) # HH2015 method workflow.execute_entity_task(tasks.simple_glacier_masks, gdirs_band) # Centerlines OGGM workflow.execute_entity_task(tasks.glacier_masks, gdirs_cent) if add_consensus: from oggm.shop.bedtopo import add_consensus_thickness workflow.execute_entity_task(add_consensus_thickness, gdirs_band) workflow.execute_entity_task(add_consensus_thickness, gdirs_cent) # Elev bands with var data vn = 'consensus_ice_thickness' workflow.execute_entity_task(tasks.elevation_band_flowline, gdirs_band, bin_variables=vn) workflow.execute_entity_task( tasks.fixed_dx_elevation_band_flowline, gdirs_band, bin_variables=vn) else: # HH2015 method without it task_list = [ tasks.elevation_band_flowline, tasks.fixed_dx_elevation_band_flowline, ] for task in task_list: workflow.execute_entity_task(task, gdirs_band) # Centerlines OGGM task_list = [ tasks.compute_centerlines, tasks.initialize_flowlines, tasks.catchment_area, tasks.catchment_intersections, tasks.catchment_width_geom, tasks.catchment_width_correction, ] for task in task_list: workflow.execute_entity_task(task, gdirs_cent) # Same for all glaciers if border >= 20: task_list = [ tasks.compute_downstream_line, tasks.compute_downstream_bedshape, ] for task in task_list: workflow.execute_entity_task(task, gdirs) else: log.workflow('L2: for map border values < 20, wont compute ' 'downstream lines.') # Glacier stats sum_dir = os.path.join(output_base_dir, 'L2', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # And for level 2: shapes if len(gdirs_cent) > 0: opath = os.path.join(sum_dir, 'centerlines_{}.shp'.format(rgi_reg)) utils.write_centerlines_to_shape(gdirs_cent, to_tar=True, path=opath) # L2 OK - compress all in output directory log.workflow('L2 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L2') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 2: _time_log() return # L3 - Tasks sum_dir = os.path.join(output_base_dir, 'L3', 'summary') utils.mkdir(sum_dir) # Climate workflow.execute_entity_task(tasks.process_climate_data, gdirs) if cfg.PARAMS['climate_qc_months'] > 0: workflow.execute_entity_task(tasks.historical_climate_qc, gdirs) if match_geodetic_mb_per_glacier: utils.get_geodetic_mb_dataframe() # Small optim to avoid concurrency workflow.execute_entity_task( tasks.mu_star_calibration_from_geodetic_mb, gdirs) workflow.execute_entity_task(tasks.apparent_mb_from_any_mb, gdirs) else: workflow.execute_entity_task(tasks.local_t_star, gdirs) workflow.execute_entity_task(tasks.mu_star_calibration, gdirs) # Inversion: we match the consensus filter = border >= 20 workflow.calibrate_inversion_from_consensus(gdirs, apply_fs_on_mismatch=True, error_on_mismatch=False, filter_inversion_output=filter) # Do we want to match geodetic estimates? # This affects only the bias so we can actually do this *after* # the inversion, but we really want to take calving into account here if match_regional_geodetic_mb: opath = os.path.join( sum_dir, 'fixed_geometry_mass_balance_' 'before_match_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) workflow.match_regional_geodetic_mb(gdirs, rgi_reg=rgi_reg, dataset=match_regional_geodetic_mb) # We get ready for modelling if border >= 20: workflow.execute_entity_task(tasks.init_present_time_glacier, gdirs) else: log.workflow( 'L3: for map border values < 20, wont initialize glaciers ' 'for the run.') # Glacier stats opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'climate_statistics_{}.csv'.format(rgi_reg)) utils.compile_climate_statistics(gdirs, path=opath) opath = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) utils.compile_fixed_geometry_mass_balance(gdirs, path=opath) # L3 OK - compress all in output directory log.workflow('L3 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L3') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 3: _time_log() return if border < 20: log.workflow('L3: for map border values < 20, wont compute L4 and L5.') _time_log() return # L4 - No tasks: add some stats for consistency and make the dirs small sum_dir_L3 = sum_dir sum_dir = os.path.join(output_base_dir, 'L4', 'summary') utils.mkdir(sum_dir) # Copy L3 files for consistency for bn in [ 'glacier_statistics', 'climate_statistics', 'fixed_geometry_mass_balance' ]: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Copy mini data to new dir mini_base_dir = os.path.join(working_dir, 'mini_perglacier', 'RGI{}'.format(rgi_version), 'b_{:03d}'.format(border)) mini_gdirs = workflow.execute_entity_task(tasks.copy_to_basedir, gdirs, base_dir=mini_base_dir) # L4 OK - compress all in output directory log.workflow('L4 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L4') workflow.execute_entity_task(utils.gdir_to_tar, mini_gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) if max_level == 4: _time_log() return # L5 - spinup run in mini gdirs gdirs = mini_gdirs # Get end date. The first gdir might have blown up, try some others i = 0 while True: if i >= len(gdirs): raise RuntimeError('Found no valid glaciers!') try: y0 = gdirs[i].get_climate_info()['baseline_hydro_yr_0'] # One adds 1 because the run ends at the end of the year ye = gdirs[i].get_climate_info()['baseline_hydro_yr_1'] + 1 break except BaseException: i += 1 # Which model? if evolution_model == 'massredis': from oggm.core.flowline import MassRedistributionCurveModel evolution_model = MassRedistributionCurveModel else: from oggm.core.flowline import FluxBasedModel evolution_model = FluxBasedModel # OK - run if dynamic_spinup: workflow.execute_entity_task( tasks.run_dynamic_spinup, gdirs, evolution_model=evolution_model, minimise_for=dynamic_spinup, precision_percent=1, output_filesuffix='_dynamic_spinup', ) workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, evolution_model=evolution_model, init_model_filesuffix='_dynamic_spinup', output_filesuffix='_hist_spin') workflow.execute_entity_task(tasks.merge_consecutive_run_outputs, gdirs, input_filesuffix_1='_dynamic_spinup', input_filesuffix_2='_hist_spin', output_filesuffix='_historical_spinup', delete_input=True) workflow.execute_entity_task(tasks.run_from_climate_data, gdirs, min_ys=y0, ye=ye, evolution_model=evolution_model, output_filesuffix='_historical') # Now compile the output sum_dir = os.path.join(output_base_dir, 'L5', 'summary') utils.mkdir(sum_dir) opath = os.path.join(sum_dir, f'historical_run_output_{rgi_reg}.nc') utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical') if dynamic_spinup: opath = os.path.join(sum_dir, f'historical_spinup_run_output_{rgi_reg}.nc') utils.compile_run_output(gdirs, path=opath, input_filesuffix='_historical_spinup') # Glacier statistics we recompute here for error analysis opath = os.path.join(sum_dir, 'glacier_statistics_{}.csv'.format(rgi_reg)) utils.compile_glacier_statistics(gdirs, path=opath) # Other stats for consistency for bn in ['climate_statistics', 'fixed_geometry_mass_balance']: ipath = os.path.join(sum_dir_L3, bn + '_{}.csv'.format(rgi_reg)) opath = os.path.join(sum_dir, bn + '_{}.csv'.format(rgi_reg)) shutil.copyfile(ipath, opath) # Add the extended files pf = os.path.join(sum_dir, 'historical_run_output_{}.nc'.format(rgi_reg)) mf = os.path.join(sum_dir, 'fixed_geometry_mass_balance_{}.csv'.format(rgi_reg)) # This is crucial - extending calving only possible with L3 data!!! sf = os.path.join(sum_dir_L3, 'glacier_statistics_{}.csv'.format(rgi_reg)) opath = os.path.join( sum_dir, 'historical_run_output_extended_{}.nc'.format(rgi_reg)) utils.extend_past_climate_run(past_run_file=pf, fixed_geometry_mb_file=mf, glacier_statistics_file=sf, path=opath) # L5 OK - compress all in output directory log.workflow('L5 done. Writing to tar...') level_base_dir = os.path.join(output_base_dir, 'L5') workflow.execute_entity_task(utils.gdir_to_tar, gdirs, delete=False, base_dir=level_base_dir) utils.base_dir_to_tar(level_base_dir) _time_log()