def load_glacier_data(glac_no=None, rgi_regionsO1=None, rgi_regionsO2='all', rgi_glac_number='all', load_caldata=0, startyear=2000, endyear=2018, option_wateryear=3): """ Load glacier data (main_glac_rgi, hyps, and ice thickness) """ # Load glaciers main_glac_rgi_all = modelsetup.selectglaciersrgitable( rgi_regionsO1=rgi_regionsO1, rgi_regionsO2 =rgi_regionsO2, rgi_glac_number=rgi_glac_number, glac_no=glac_no) # Glacier hypsometry [km**2], total area main_glac_hyps_all = modelsetup.import_Husstable(main_glac_rgi_all, pygem_prms.hyps_filepath, pygem_prms.hyps_filedict, pygem_prms.hyps_colsdrop) # Ice thickness [m], average main_glac_icethickness_all = modelsetup.import_Husstable(main_glac_rgi_all, pygem_prms.thickness_filepath, pygem_prms.thickness_filedict, pygem_prms.thickness_colsdrop) # Additional processing main_glac_hyps_all[main_glac_icethickness_all == 0] = 0 main_glac_hyps_all = main_glac_hyps_all.fillna(0) main_glac_icethickness_all = main_glac_icethickness_all.fillna(0) # Add degree groups to main_glac_rgi_all # Degrees main_glac_rgi_all['CenLon_round'] = np.floor(main_glac_rgi_all.CenLon.values/degree_size) * degree_size main_glac_rgi_all['CenLat_round'] = np.floor(main_glac_rgi_all.CenLat.values/degree_size) * degree_size deg_groups = main_glac_rgi_all.groupby(['CenLon_round', 'CenLat_round']).size().index.values.tolist() deg_dict = dict(zip(deg_groups, np.arange(0,len(deg_groups)))) main_glac_rgi_all.reset_index(drop=True, inplace=True) cenlon_cenlat = [(main_glac_rgi_all.loc[x,'CenLon_round'], main_glac_rgi_all.loc[x,'CenLat_round']) for x in range(len(main_glac_rgi_all))] main_glac_rgi_all['CenLon_CenLat'] = cenlon_cenlat main_glac_rgi_all['deg_id'] = main_glac_rgi_all.CenLon_CenLat.map(deg_dict) if load_caldata == 1: cal_datasets = ['shean'] startyear=2000 dates_table = modelsetup.datesmodelrun(startyear=startyear, endyear=endyear, spinupyears=0, option_wateryear=option_wateryear) # Calibration data cal_data_all = pd.DataFrame() for dataset in cal_datasets: cal_subset = class_mbdata.MBData(name=dataset) cal_subset_data = cal_subset.retrieve_mb(main_glac_rgi_all, main_glac_hyps_all, dates_table) cal_data_all = cal_data_all.append(cal_subset_data, ignore_index=True) cal_data_all = cal_data_all.sort_values(['glacno', 't1_idx']) cal_data_all.reset_index(drop=True, inplace=True) if load_caldata == 0: return main_glac_rgi_all, main_glac_hyps_all, main_glac_icethickness_all else: return main_glac_rgi_all, main_glac_hyps_all, main_glac_icethickness_all, cal_data_all
# Option to remove marine-terminating glaciers option_cal_remove_marine_glaciers = 1 # Reference climate data gcm_name = 'ERA-Interim' option_gcm_downscale = 2 option_lapserate_fromgcm = 1 option_export = 1 time_start = time.time() #%% ===== LOAD GLACIER DATA ===== # RGI glacier attributes main_glac_rgi = modelsetup.selectglaciersrgitable(rgi_regionsO1=rgi_regionsO1, rgi_regionsO2='all', rgi_glac_number='all') # Glacier hypsometry [km**2], total area main_glac_hyps = modelsetup.import_Husstable(main_glac_rgi, pygem_prms.hyps_filepath, pygem_prms.hyps_filedict, pygem_prms.hyps_colsdrop) elev_bins = main_glac_hyps.columns.values.astype(int) # Ice thickness [m], average main_glac_icethickness = modelsetup.import_Husstable( main_glac_rgi, pygem_prms.thickness_filepath, pygem_prms.thickness_filedict, pygem_prms.thickness_colsdrop) main_glac_hyps[main_glac_icethickness == 0] = 0 # Width [km], average main_glac_width = modelsetup.import_Husstable(main_glac_rgi, pygem_prms.width_filepath,
cfg.PARAMS['border'] = 10 # Usually we recommend to set dl_verify to True - here it is quite slow # because of the huge files so we just turn it off. # Switch it on for real cases! cfg.PARAMS['dl_verify'] = True cfg.PARAMS['use_multiple_flowlines'] = False # temporary directory for testing (deleted on computer restart) #cfg.PATHS['working_dir'] = utils.get_temp_dir('PyGEM_ex') cfg.PATHS['working_dir'] = pygem_prms.oggm_gdir_fp # ===== LOAD GLACIERS ===== if pygem_prms.glac_no is not None: glac_no = pygem_prms.glac_no else: main_glac_rgi_all = modelsetup.selectglaciersrgitable( rgi_regionsO1=pygem_prms.rgi_regionsO1, rgi_regionsO2=pygem_prms.rgi_regionsO2, rgi_glac_number=pygem_prms.rgi_glac_number) glac_no = list(main_glac_rgi_all['rgino_str'].values) rgi_ids = ['RGI60-' + x.split('.')[0].zfill(2) + '.' + x.split('.')[1] for x in glac_no] #%% ===== SELECT BEST DEM ===== # Get the pre-processed topography data # - creates directories from scratch gdirs = rgitopo.init_glacier_directories_from_rgitopo(rgi_ids) # ===== FLOWLINES (w debris) ===== # - checks if directories are created (only use if you're on an already prepared directory) #gdirs = workflow.init_glacier_directories(rgi_ids)
def load_masschange_monthly(regions, ds_ending, netcdf_fp=sim_netcdf_fp, option_add_caldata=0): """ Load monthly mass change data """ count = 0 for region in regions: count += 1 # Load datasets ds_fn = 'R' + str(region) + ds_ending ds = xr.open_dataset(netcdf_fp + ds_fn) main_glac_rgi_region_ds = pd.DataFrame(ds.glacier_table.values, columns=ds.glac_attrs) glac_wide_massbaltotal_region = ds.massbaltotal_glac_monthly.values[:, :, 0] glac_wide_area_annual_region = ds.area_glac_annual.values[:, :, 0] time_values = pd.Series( ds.massbaltotal_glac_monthly.coords['time'].values) # ===== GLACIER DATA ===== main_glac_rgi_region = modelsetup.selectglaciersrgitable( rgi_regionsO1=[region], rgi_regionsO2='all', rgi_glac_number='all') if (main_glac_rgi_region['glacno'] - main_glac_rgi_region_ds['glacno']).sum() == 0: print('Region', str(region), ': number of glaciers match') # Glacier hypsometry main_glac_hyps_region = modelsetup.import_Husstable( main_glac_rgi_region, pygem_prms.hyps_filepath, pygem_prms.hyps_filedict, pygem_prms.hyps_colsdrop) # Ice thickness [m], average main_glac_icethickness_region = modelsetup.import_Husstable( main_glac_rgi_region, input.thickness_filepath, input.thickness_filedict, input.thickness_colsdrop) main_glac_hyps_region[main_glac_icethickness_region == 0] = 0 # ===== CALIBRATION DATA ===== if option_add_caldata == 1: dates_table_nospinup = modelsetup.datesmodelrun( startyear=input.startyear, endyear=input.endyear, spinupyears=0) cal_data_region = pd.DataFrame() for dataset in cal_datasets: cal_subset = class_mbdata.MBData(name=dataset) cal_subset_data = cal_subset.retrieve_mb( main_glac_rgi_region, main_glac_hyps_region, dates_table_nospinup) cal_data_region = cal_data_region.append(cal_subset_data, ignore_index=True) cal_data_region = cal_data_region.sort_values(['glacno', 't1_idx']) cal_data_region.reset_index(drop=True, inplace=True) # ===== APPEND DATASETS ===== if count == 1: main_glac_rgi = main_glac_rgi_region main_glac_hyps = main_glac_hyps_region main_glac_icethickness = main_glac_icethickness_region glac_wide_massbaltotal = glac_wide_massbaltotal_region glac_wide_area_annual = glac_wide_area_annual_region if option_add_caldata == 1: cal_data = cal_data_region else: main_glac_rgi = main_glac_rgi.append(main_glac_rgi_region) glac_wide_massbaltotal = np.concatenate( [glac_wide_massbaltotal, glac_wide_massbaltotal_region]) glac_wide_area_annual = np.concatenate( [glac_wide_area_annual, glac_wide_area_annual_region]) if option_add_caldata == 1: cal_data = cal_data.append(cal_data_region) # If more columns in region, then need to expand existing dataset if main_glac_hyps_region.shape[1] > main_glac_hyps.shape[1]: all_col = list(main_glac_hyps.columns.values) reg_col = list(main_glac_hyps_region.columns.values) new_cols = [item for item in reg_col if item not in all_col] for new_col in new_cols: main_glac_hyps[new_col] = 0 main_glac_icethickness[new_col] = 0 elif main_glac_hyps_region.shape[1] < main_glac_hyps.shape[1]: all_col = list(main_glac_hyps.columns.values) reg_col = list(main_glac_hyps_region.columns.values) new_cols = [item for item in all_col if item not in reg_col] for new_col in new_cols: main_glac_hyps_region[new_col] = 0 main_glac_icethickness_region[new_col] = 0 main_glac_hyps = main_glac_hyps.append(main_glac_hyps_region) main_glac_icethickness = main_glac_icethickness.append( main_glac_icethickness_region) # reset index main_glac_rgi.reset_index(inplace=True, drop=True) main_glac_hyps.reset_index(inplace=True, drop=True) main_glac_icethickness.reset_index(inplace=True, drop=True) if option_add_caldata == 1: cal_data.reset_index(inplace=True, drop=True) # Volume [km**3] and mean elevation [m a.s.l.] main_glac_rgi['Volume'], main_glac_rgi[ 'Zmean'] = modelsetup.hypsometrystats(main_glac_hyps, main_glac_icethickness) # ===== MASS CHANGE CALCULATIONS ===== # Compute glacier volume change for every time step and use this to compute mass balance glac_wide_area = np.repeat(glac_wide_area_annual[:, :-1], 12, axis=1) # Mass change [km3 mwe] # mb [mwea] * (1 km / 1000 m) * area [km2] glac_wide_masschange = glac_wide_massbaltotal / 1000 * glac_wide_area if option_add_caldata == 1: return main_glac_rgi, glac_wide_masschange, glac_wide_area, time_values, cal_data else: return main_glac_rgi, glac_wide_masschange, glac_wide_area, time_values
netcdf_fn = 'ERA-Interim_2000_2018_masschange_p' + str( int(degree_size * 100)) + 'deg.nc' output_ds_all.to_netcdf(sim_netcdf_fp + netcdf_fn, encoding=encoding) # Close datasets output_ds_all.close() print( np.round(output_ds_all.masschange_monthly[:, :, :].values.sum() / 18, 2), 'Gt/yr') #%% if option_trishuli == 1: glac_no = input.glac_fromcsv( input.main_directory + '/../qgis_himat/trishuli_shp/trishuli_RGIIds.csv') main_glac_rgi = modelsetup.selectglaciersrgitable(glac_no=glac_no) # ds_new = xr.open_dataset(input.output_sim_fp + 'ERA-Interim/Trishuli_ERA-Interim_c2_ba1_100sets_2000_2017.nc') # ds_old13 = xr.open_dataset(input.output_sim_fp + 'ERA-Interim/ERA-Interim_1980_2017_nochg/' + # 'R13_ERA-Interim_c2_ba1_100sets_1980_2017.nc') # ds_old15 = xr.open_dataset(input.output_sim_fp + 'ERA-Interim/ERA-Interim_1980_2017_nochg/' + # 'R15_ERA-Interim_c2_ba1_100sets_1980_2017.nc') # time_old_idx_start = 12*20 # time_new_idx_start = 0 # years = np.arange(2000,2018) # option_components = 1 ds_new = xr.open_dataset( input.output_sim_fp + 'IPSL-CM5A-LR/' + 'Trishuli_IPSL-CM5A-LR_rcp85_c2_ba1_100sets_2000_2100.nc') ds_old13 = xr.open_dataset(
else: debug = False # Reference GCM name print('Reference climate data is:', input.ref_gcm_name) # RGI glacier number if args.rgi_glac_number_fn is not None: with open(args.rgi_glac_number_fn, 'rb') as f: rgi_glac_number = pickle.load(f) else: rgi_glac_number = input.rgi_glac_number # Select glaciers and define chunks main_glac_rgi_all = modelsetup.selectglaciersrgitable( rgi_regionsO1=input.rgi_regionsO1, rgi_regionsO2='all', rgi_glac_number=input.rgi_glac_number) # Define chunk size for parallel processing if args.option_parallels != 0: num_cores = int( np.min( [main_glac_rgi_all.shape[0], args.num_simultaneous_processes])) chunk_size = int(np.ceil(main_glac_rgi_all.shape[0] / num_cores)) else: # if not running in parallel, chunk size is all glaciers chunk_size = main_glac_rgi_all.shape[0] # Read GCM names from argument parser gcm_name = args.gcm_list_fn if args.gcm_name is not None: gcm_list = [args.gcm_name]
def mb_bins_to_reg_glacierwide(mb_binned_fp=pygem_prms.mb_binned_fp, O1Regions=['01']): # Delete these import mb_binned_fp = pygem_prms.mb_binned_fp O1Regions = ['01'] print( '\n\n SPECIFYING UNCERTAINTY AS 0.3 mwea for model development - needs to be updated from mb providers!\n\n' ) reg_mb_mwea_err = 0.3 mb_yrfrac_dict = { '01': [2000.419, 2018.419], '02': [2000.128, 2012], '03': [2000.419, 2018.419], '04': [2000.419, 2018.419], '05': [2000.419, 2018.419], '06': [2000.419, 2018.419], '07': [2000.419, 2018.419], '08': [2000.419, 2018.419], '09': [2000.419, 2018.419], '10': [2000.128, 2012], '11': [2000.128, 2013], '12': [2000.128, 2012], 'HMA': [2000.419, 2018.419], '16': [2000.128, 2013.128], '17': [2000.128, 2013.128], '18': [2000.128, 2013] } for reg in O1Regions: reg_fp = mb_binned_fp + reg + '/' main_glac_rgi = modelsetup.selectglaciersrgitable( rgi_regionsO1=[reg], rgi_regionsO2='all', rgi_glac_number='all') reg_binned_fns = [] for i in os.listdir(reg_fp): if i.endswith('_mb_bins.csv'): reg_binned_fns.append(i) reg_binned_fns = sorted(reg_binned_fns) print('Region ' + reg + ' has binned data for ' + str(len(reg_binned_fns)) + ' glaciers.') reg_mb_df_cns = [ 'RGIId', 'O1Region', 'O2Region', 'area_km2', 'mb_mwea', 'mb_mwea_err', 't1', 't2', 'perc_valid' ] reg_mb_df = pd.DataFrame(np.zeros( (main_glac_rgi.shape[0], len(reg_mb_df_cns))), columns=reg_mb_df_cns) reg_mb_df.loc[:, :] = np.nan reg_mb_df.loc[:, 'RGIId'] = main_glac_rgi['RGIId'] reg_mb_df.loc[:, 'O1Region'] = main_glac_rgi['O1Region'] reg_mb_df.loc[:, 'O2Region'] = main_glac_rgi['O2Region'] reg_mb_df.loc[:, 'area_km2'] = main_glac_rgi['Area'] # Process binned files for nfn, reg_binned_fn in enumerate(reg_binned_fns): if nfn % 500 == 0: print(' ', nfn, reg_binned_fn) mb_binned_df = pd.read_csv(reg_fp + reg_binned_fn) glac_str = reg_binned_fn.split('_')[0] glac_rgiid = 'RGI60-' + glac_str.split('.')[0].zfill( 2) + '.' + glac_str.split('.')[1] rgi_idx = np.where(main_glac_rgi['RGIId'] == glac_rgiid)[0][0] area_km2_valid = mb_binned_df['z1_bin_area_valid_km2'].sum() mb_mwea = (mb_binned_df['z1_bin_area_valid_km2'] * mb_binned_df['mb_bin_mean_mwea']).sum() / area_km2_valid mb_mwea_err = reg_mb_mwea_err t1 = mb_yrfrac_dict[reg][0] t2 = mb_yrfrac_dict[reg][1] perc_valid = area_km2_valid / reg_mb_df.loc[rgi_idx, 'area_km2'] * 100 reg_mb_df.loc[rgi_idx, 'mb_mwea'] = mb_mwea reg_mb_df.loc[rgi_idx, 'mb_mwea_err'] = mb_mwea_err reg_mb_df.loc[rgi_idx, 't1'] = t1 reg_mb_df.loc[rgi_idx, 't2'] = t2 reg_mb_df.loc[rgi_idx, 'perc_valid'] = perc_valid #%% # Quality control O2Regions = list(set(list(main_glac_rgi['O2Region'].values))) O2Regions_mb_mwea_dict = {} rgiid_outliers = [] for O2Region in O2Regions: reg_mb_df_subset = reg_mb_df[reg_mb_df['O2Region'] == O2Region] reg_mb_df_subset = reg_mb_df_subset.dropna(subset=['mb_mwea']) # Use 1.5*IQR to remove outliers reg_mb_mwea_25 = np.percentile(reg_mb_df_subset['mb_mwea'], 25) reg_mb_mwea_50 = np.percentile(reg_mb_df_subset['mb_mwea'], 50) reg_mb_mwea_75 = np.percentile(reg_mb_df_subset['mb_mwea'], 75) reg_mb_mwea_iqr = reg_mb_mwea_75 - reg_mb_mwea_25 print(np.round(reg_mb_mwea_25, 2), np.round(reg_mb_mwea_50, 2), np.round(reg_mb_mwea_75, 2), np.round(reg_mb_mwea_iqr, 2)) reg_mb_mwea_bndlow = reg_mb_mwea_25 - 1.5 * reg_mb_mwea_iqr reg_mb_mwea_bndhigh = reg_mb_mwea_75 + 1.5 * reg_mb_mwea_iqr # Record RGIIds that are outliers rgiid_outliers.extend(reg_mb_df_subset[ (reg_mb_df_subset['mb_mwea'] < reg_mb_mwea_bndlow) | (reg_mb_df_subset['mb_mwea'] > reg_mb_mwea_bndhigh)] ['RGIId'].values) # Select non-outliers and record mean reg_mb_df_subset_qc = reg_mb_df_subset[ (reg_mb_df_subset['mb_mwea'] >= reg_mb_mwea_bndlow) & (reg_mb_df_subset['mb_mwea'] <= reg_mb_mwea_bndhigh)] reg_mb_mwea_qc_mean = reg_mb_df_subset_qc['mb_mwea'].mean() O2Regions_mb_mwea_dict[O2Region] = reg_mb_mwea_qc_mean #%% print( 'CREATE DICTIONARY FOR RGIIDs with nan values or those that are outliers' ) # print(A['mb_mwea'].mean(), A['mb_mwea'].std(), A['mb_mwea'].min(), A['mb_mwea'].max()) # print(reg_mb_mwea, reg_mb_mwea_std) #%% reg_mb_fn = reg + '_mb_glacwide_all.csv' reg_mb_df.to_csv(mb_binned_fp + reg_mb_fn, index=False) print('TO-DO LIST:') print(' - quality control based on 3-sigma filter like Shean') print(' - extrapolate for missing or outlier glaciers by region')
binned_fullfns.append(binned_fp + i) # Sorted files binned_fullfns = [x for _, x in sorted(zip(rgiids, binned_fullfns))] rgiids = sorted(rgiids) # print('\n\nDELETE ME TO RUN ALL!\n\n') # rgiids = rgiids[0:1] # binned_fullfns = binned_fullfns[0:1] # Reference GCM name gcm_name = args.ref_gcm_name print('Reference climate data is:', gcm_name) # Select all glaciers in a region main_glac_rgi_all = modelsetup.selectglaciersrgitable(glac_no=rgiids) main_glac_rgi_all['binned_fullfn'] = binned_fullfns main_glac_rgi_all['CenLon_360'] = main_glac_rgi_all['CenLon'] main_glac_rgi_all.loc[ main_glac_rgi_all['CenLon_360'] < 0, 'CenLon_360'] = ( 360 + main_glac_rgi_all.loc[main_glac_rgi_all['CenLon_360'] < 0, 'CenLon_360']) main_glac_rgi_all['RefYear'] = (main_glac_rgi_all['RefDate'] / 1e4).astype(int) # print(np.where(main_glac_rgi_all.rgino_str.values == '08.00005')) # Define chunk size for parallel processing if args.option_parallels != 0: num_cores = int( np.min(