def _get_ref_glaciers(gdirs): """Get the list of glaciers we have valid data for.""" flink, mbdatadir = utils.get_wgms_files() dfids = pd.read_csv(flink)['RGI_ID'].values # TODO: we removed marine glaciers here. Is it ok? ref_gdirs = [] for g in gdirs: if g.rgi_id not in dfids or g.terminus_type != 'Land-terminating': continue mbdf = g.get_ref_mb_data() if len(mbdf) >= 5: ref_gdirs.append(g) return ref_gdirs
def _get_ref_glaciers(gdirs): """Get the list of glaciers we have valid data for.""" flink, _ = utils.get_wgms_files() dfids = pd.read_csv(flink)[gdirs[0].rgi_version + '_ID'].values # TODO: we removed marine glaciers here. Is it ok? ref_gdirs = [] for g in gdirs: if g.rgi_id not in dfids or g.terminus_type != 'Land-terminating': continue mbdf = g.get_ref_mb_data() if len(mbdf) >= 5: ref_gdirs.append(g) return ref_gdirs
def test_download_demo_files(self): f = utils.get_demo_file('Hintereisferner.shp') self.assertTrue(os.path.exists(f)) sh = salem.read_shapefile(f) self.assertTrue(hasattr(sh, 'geometry')) # Data files cfg.initialize() lf, df = utils.get_wgms_files() self.assertTrue(os.path.exists(lf)) lf = utils.get_glathida_file() self.assertTrue(os.path.exists(lf))
def test_download_demo_files(self): f = utils.get_demo_file('Hintereisferner.shp') self.assertTrue(os.path.exists(f)) sh = salem.read_shapefile(f) self.assertTrue(hasattr(sh, 'geometry')) # Data files cfg.initialize() lf, df = utils.get_wgms_files() self.assertTrue(os.path.exists(df)) lf = utils.get_glathida_file() self.assertTrue(os.path.exists(lf))
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info('Prepare RGI df for ' + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in [ 'Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta', 'Columbia' ]: shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp = salem.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == 'MultiLineString': # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == 'LineString' exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert 'Polygon' in geometry.type shp.loc[shp.index[0], 'geometry'] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp2 = salem.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0. add = 1e-5 if row.name == 'Devon': inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if 'Multi' in buff.type: inif += add else: break x, y = multi.centroid.xy if 'Multi' in buff.type: raise RuntimeError sel.loc[sel.index[0], 'geometry'] = buff sel.loc[sel.index[0], 'Area'] = new_area sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0] sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == 'geometry': div_sel[k] = multi elif k == 'RGIId': div_sel['RGIID'] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS['itmix_divs']) log.info('N glaciers ITMIX: {}'.format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers WGMS: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers GTD: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Get ref glaciers (all glaciers with MB) flink, mbdatadir = utils.get_wgms_files() dfids = pd.read_csv(flink)['RGI_ID'].values # Reference glaciers only if in the list # TODO: we removed marine glaciers here. Is it ok? ref_gdirs = [g for g in gdirs if (g.rgi_id in dfids and g.terminus_type=='Land-terminating')] # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir) # list of mus compatibles with refmb reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv') mbdf = pd.read_csv(reff).set_index('YEAR') t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE']) # if we have just one candidate this is good if len(t_star) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias) # At least of of the X glaciers should have a single t*, otherwise we dont # know how to start if len(only_one) == 0: if os.path.basename(os.path.dirname(flink)) == 'test-workflow': # TODO: hardcoded shit here, for the test workflow only_one.append('RGI40-11.00887') gdir, t_star, res_bias = per_glacier['RGI40-11.00887'] per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]], [res_bias[-1]]) else: raise RuntimeError('Didnt expect to be here.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir, t_star, res_bias = per_glacier[id] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir, _, _ = per_glacier[id_o] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]] distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]]) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, biases, lons, lats = [], [], [], [], [] for id, (gdir, t_star, res_bias) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['tstar'] = t_stars df['bias'] = biases df['lon'] = lons df['lat'] = lats file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info('Prepare RGI df for ' + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row['rgi_reg'] + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta', 'Columbia']: shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp = salem.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == 'MultiLineString': # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == 'LineString' exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert 'Polygon' in geometry.type shp.loc[shp.index[0], 'geometry'] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, '*_' + row.name + '*.shp') shp2 = salem.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if 'Multi' in tsel.loc[tsel.index[0], 'geometry'].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0. add = 1e-5 if row.name == 'Devon': inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if 'Multi' in buff.type: inif += add else: break x, y = multi.centroid.xy if 'Multi' in buff.type: raise RuntimeError sel.loc[sel.index[0], 'geometry'] = buff sel.loc[sel.index[0], 'Area'] = new_area sel.loc[sel.index[0], 'CenLon'] = np.asarray(x)[0] sel.loc[sel.index[0], 'CenLat'] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == 'geometry': div_sel[k] = multi elif k == 'RGIId': div_sel['RGIID'] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS['itmix_divs']) log.info('N glaciers ITMIX: {}'.format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers WGMS: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info('N glaciers GTD: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + '_rgi50_*.shp') rgi_df = salem.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
# Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs - here we want all glaciers to run cfg.PARAMS['continue_on_error'] = False if baseline == 'HISTALP': # Other params: see https://oggm.org/2018/08/10/histalp-parameters/ cfg.PARAMS['baseline_y0'] = 1850 cfg.PARAMS['prcp_scaling_factor'] = 1.75 cfg.PARAMS['temp_melt'] = -1.75 # Get the reference glacier ids (they are different for each RGI version) rgi_dir = utils.get_rgi_dir(version=rgi_version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # We can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11 if baseline == 'HISTALP': rids = [rid for rid in rids if '-11.' in rid] # Make a new dataframe with those (this takes a while) log.info('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) log.info('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(rgidf)))
# No need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs cfg.PARAMS['continue_on_error'] = False # Pre-download other files which will be needed later _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') rgi_dir = utils.get_rgi_dir(version=rgi_version) # Get the reference glacier ids (they are different for each RGI version) df, _ = utils.get_wgms_files(version=rgi_version) rids = df['RGI{}0_ID'.format(rgi_version)] # Make a new dataframe with those (this takes a while) log.info('Reading the RGI shapefiles...') rgidf = [] for reg in df['RGI_REG'].unique(): if reg == '19': continue # we have no climate data in Antarctica fn = '*' + reg + '_rgi{}0_*.shp'.format(rgi_version) fs = list(sorted(glob(path.join(rgi_dir, '*', fn))))[0] sh = gpd.read_file(fs) rgidf.append(sh.loc[sh.RGIId.isin(rids)]) rgidf = pd.concat(rgidf) rgidf.crs = sh.crs # for geolocalisation
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Get ref glaciers (all glaciers with MB) flink, mbdatadir = utils.get_wgms_files() dfids = pd.read_csv(flink)['RGI_ID'].values # Reference glaciers only if in the list # TODO: we removed marine glaciers here. Is it ok? ref_gdirs = [ g for g in gdirs if (g.rgi_id in dfids and g.terminus_type == 'Land-terminating') ] # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir) # list of mus compatibles with refmb reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv') mbdf = pd.read_csv(reff).set_index('YEAR') t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE']) # if we have just one candidate this is good if len(t_star) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias) # At least of of the X glaciers should have a single t*, otherwise we dont # know how to start if len(only_one) == 0: if os.path.basename(os.path.dirname(flink)) == 'test-workflow': # TODO: hardcoded shit here, for the test workflow only_one.append('RGI40-11.00887') gdir, t_star, res_bias = per_glacier['RGI40-11.00887'] per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]], [res_bias[-1]]) else: raise RuntimeError('Didnt expect to be here.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir, t_star, res_bias = per_glacier[id] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir, _, _ = per_glacier[id_o] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]] distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]]) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, biases, lons, lats = [], [], [], [], [] for id, (gdir, t_star, res_bias) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['tstar'] = t_stars df['bias'] = biases df['lon'] = lons df['lat'] = lats file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def mb_calibration(rgi_version, baseline): """Run the mass balance calibration for the VAS model. RGI version and baseline climate must be given. Parameters ---------- rgi_version : str Version (and subversion) of the RGI, e.g., '62' baseline : str 'HISTALP' or 'CRU', name of the baseline climate """ # initialize OGGM and set up the run parameters vascaling.initialize(logging_level='WORKFLOW') # LOCAL paths (where to write the OGGM run output) # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version) # wdir = utils.gettempdir(dirname, home=True, reset=True) # utils.mkdir(wdir, reset=True) # cfg.PATHS['working_dir'] = wdir # CLUSTER paths wdir = os.environ.get('WORKDIR', '') cfg.PATHS['working_dir'] = wdir # we are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # we are using which baseline data? cfg.PARAMS['baseline_climate'] = baseline # no need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # set to True for operational runs cfg.PARAMS['continue_on_error'] = True # 10 is only for OGGM-VAS, OGGM needs 80 to run cfg.PARAMS['border'] = 80 if baseline == 'HISTALP': # OGGM HISTALP PARAMETERS from Matthias Dusch # see https://oggm.org/2018/08/10/histalp-parameters/ # cfg.PARAMS['prcp_scaling_factor'] = 1.75 # cfg.PARAMS['temp_melt'] = -1.75 # cfg.PARAMS['temp_all_solid'] = 0 # cfg.PARAMS['prcp_default_gradient'] = 0 # VAS HISTALP PARAMETERS from x-validation cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_melt'] = -0.5 cfg.PARAMS['temp_all_solid'] = 0 cfg.PARAMS['prcp_default_gradient'] = 0 elif baseline == 'CRU': # using the parameters from Marzeion et al. (2012) # cfg.PARAMS['prcp_scaling_factor'] = 2.5 # cfg.PARAMS['temp_melt'] = 1 # cfg.PARAMS['temp_all_solid'] = 3 # cfg.PARAMS['prcp_default_gradient'] = 3e-4 # using the parameters from Malles and Marzeion 2020 cfg.PARAMS['prcp_scaling_factor'] = 3 cfg.PARAMS['temp_melt'] = 0 cfg.PARAMS['temp_all_solid'] = 4 cfg.PARAMS['prcp_default_gradient'] = 4e-4 # the next step is to get all the reference glaciers, # i.e. glaciers with mass balance measurements. # get the reference glacier ids (they are different for each RGI version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # we can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11.01 (ALPS) if baseline == 'HISTALP': rids = [rid for rid in rids if '-11' in rid] # initialize the glacier regions base_url = "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.4/" \ "L3-L5_files/CRU/elev_bands/qc3/pcp2.5/match_geod" # Go - get the pre-processed glacier directories gdirs = workflow.init_glacier_directories(rids, from_prepro_level=3, prepro_base_url=base_url, prepro_rgi_version=rgi_version) # Some glaciers in RGI Region 11 are not inside the HISTALP domain if baseline == 'HISTALP': gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01'] # get reference glaciers with mass balance measurements gdirs = utils.get_ref_mb_glaciers(gdirs) # make a new dataframe with those (this takes a while) print('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(gdirs))) # run climate tasks vascaling.compute_ref_t_stars(gdirs) # execute_entity_task(vascaling.local_t_star, gdirs) # we store the associated params mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params'] with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp: json.dump(mb_calib, fp)
def get_rgi_df(reset=False): # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers df_rgi_file = os.path.expanduser('~/itmix_rgi_shp.pkl') if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, 'itmix', 'itmix_rgi_links.pkl') df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) rgidf = [] _rgi_ids = [] for i, row in df_itmix.iterrows(): # read the rgi region rgi_shp = os.path.join(RGI_DIR, "*", row['rgi_reg'] + '_rgi50_*.shp') rgi_shp = list(glob.glob(rgi_shp))[0] rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T['rgi_parts_ids'] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() _rgi_ids.extend(rgi_parts) # use the ITMIX shape where possible if row.name in ['Hellstugubreen', 'Freya', 'Aqqutikitsoq', 'Brewster', 'Kesselwandferner', 'NorthGlacier', 'SouthGlacier', 'Tasman', 'Unteraar', 'Washmawapta']: for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' + row.name + '*.shp'): pass shp = salem.utils.read_shapefile(shf) if row.name == 'Unteraar': shp = shp.iloc[[-1]] if 'LineString' == shp.iloc[0].geometry.type: shp.loc[shp.index[0], 'geometry'] = shpg.Polygon(shp.iloc[0].geometry) assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], 'geometry'] = shp sel.loc[sel.index[0], 'Area'] = area_km2 elif row.name == 'Urumqi': # ITMIX Urumqi is in fact two glaciers for shf in glob.glob(itmix_cfg.itmix_data_dir + '*/*/*_' + row.name + '*.shp'): pass shp2 = salem.utils.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], 'geometry'].contains(shp.centroid) sel.loc[sel.index[k], 'geometry'] = shp sel.loc[sel.index[k], 'Area'] = area_km2 assert len(sel) == 2 else: pass # add glacier name to the entity name = ['I:' + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'W-' + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] sel.loc[:, 'Name'] = name rgidf.append(sel) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) print('N WGMS before: {}'.format(len(wgms_df))) wgms_df = wgms_df.loc[~ wgms_df.RGI_ID.isin(_rgi_ids)] print('N WGMS after: {}'.format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = os.path.join(RGI_DIR, "*", reg + '_rgi50_*.shp') rgi_shp = list(glob.glob(rgi_shp))[0] rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['W:' + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = 'G-' + name[z] for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) _rgi_ids.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... print('N GTD before: {}'.format(len(gtd_df))) gtd_df = gtd_df.loc[~ gtd_df.RGI_ID.isin(_rgi_ids)] print('N GTD after: {}'.format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split('-')[1].split('.')[0] # read the rgi region rgi_shp = os.path.join(RGI_DIR, "*", reg + '_rgi50_*.shp') rgi_shp = list(glob.glob(rgi_shp))[0] rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace('/', 'or').replace('.', '').replace(' ', '-') name = ['G:' + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, 'Name'] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf
# Pre-download other files which will be needed later utils.get_cru_cl_file() utils.get_cru_file(var='tmp') utils.get_cru_file(var='pre') # Some globals for more control on what to run RUN_GIS_mask = False RUN_GIS_PREPRO = False # run GIS pre-processing tasks (before climate) RUN_CLIMATE_PREPRO = False # run climate pre-processing tasks RUN_INVERSION = False # run bed inversion # Read RGI file rgidf = salem.read_shapefile(RGI_FILE, cached=True) # get WGMS glaciers flink, mbdatadir = utils.get_wgms_files() ids_with_mb = flink['RGI50_ID'].values if PC: # Keep id's of glaciers in WGMS and GlathiDa V2 keep_ids = ['RGI50-01.02228', 'RGI50-01.00037', 'RGI50-01.16316', 'RGI50-01.00570', 'RGI50-01.22699'] # Glaciers in the McNabb data base terminus_data_ids = ['RGI50-01.10689', 'RGI50-01.23642'] keep_indexes = [((i in keep_ids) or (i in ids_with_mb) or (i in terminus_data_ids)) for i in rgidf.RGIID] rgidf = rgidf.iloc[keep_indexes]
def mb_calibration(rgi_version, baseline): """ Run the mass balance calibration for the VAS model. RGI version and baseline cliamte must be given. :param rgi_version: int, RGI version :param baseline: str, baseline climate 'HISTALP' or 'CRU' """ # initialize OGGM and set up the run parameters vascaling.initialize(logging_level='WORKFLOW') # local paths (where to write the OGGM run output) # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version) # wdir = utils.gettempdir(dirname, home=True, reset=True) # utils.mkdir(wdir, reset=True) wdir = os.environ['WORKDIR'] cfg.PATHS['working_dir'] = wdir # we are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # we are using which baseline data? cfg.PARAMS['baseline_climate'] = baseline # no need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # set to True for operational runs cfg.PARAMS['continue_on_error'] = True if baseline == 'HISTALP': # other params: see https://oggm.org/2018/08/10/histalp-parameters/ # cfg.PARAMS['prcp_scaling_factor'] = 1.75 # cfg.PARAMS['temp_melt'] = -1.75 cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_melt'] = -0.5 elif baseline == 'CRU': # using the parameters from Marzeion et al. (2012) cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_melt'] = 1 cfg.PARAMS['temp_all_solid'] = 3 # the next step is to get all the reference glaciers, # i.e. glaciers with mass balance measurements. # get the reference glacier ids (they are different for each RGI version) rgi_dir = utils.get_rgi_dir(version=rgi_version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # we can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11.01 (ALPS) if baseline == 'HISTALP' or True: rids = [rid for rid in rids if '-11' in rid] debug = False if debug: print("==================================\n" + "DEBUG MODE: only RGI60-11.00897\n" + "==================================") rids = [rid for rid in rids if '-11.00897' in rid] cfg.PARAMS['use_multiprocessing'] = False # make a new dataframe with those (this takes a while) print('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) print('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(rgidf))) # initialize the glacier regions gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True) workflow.execute_entity_task(gis.define_glacier_region, gdirs) workflow.execute_entity_task(gis.glacier_masks, gdirs) # we need to know which period we have data for print('Process the climate data...') if baseline == 'CRU': execute_entity_task(tasks.process_cru_data, gdirs, print_log=False) elif baseline == 'HISTALP': # Some glaciers are not in Alps gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01'] # cfg.PARAMS['continue_on_error'] = True execute_entity_task(tasks.process_histalp_data, gdirs, print_log=False, y0=1850) # cfg.PARAMS['continue_on_error'] = False else: execute_entity_task(tasks.process_custom_climate_data, gdirs, print_log=False) # get reference glaciers with mass balance measurements gdirs = utils.get_ref_mb_glaciers(gdirs) # keep only these glaciers rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])] # save to file rgidf.to_file(os.path.join(wdir, 'mb_ref_glaciers.shp')) print('For RGIV{} and {} we have {} reference glaciers'.format(rgi_version, baseline, len(rgidf))) # sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # newly initialize glacier directories gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True) workflow.execute_entity_task(gis.define_glacier_region, gdirs) workflow.execute_entity_task(gis.glacier_masks, gdirs) # run climate tasks vascaling.compute_ref_t_stars(gdirs) execute_entity_task(vascaling.local_t_star, gdirs) # we store the associated params mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params'] with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp: json.dump(mb_calib, fp)
cfg.PARAMS['optimize_inversion_params'] = False cfg.PARAMS['invert_with_sliding'] = False cfg.PARAMS['bed_shape'] = 'parabolic' # Some globals for more control on what to run RUN_GIS_PREPRO = False # run GIS preprocessing tasks (before climate) RUN_CLIMATE_PREPRO = False # run climate preprocessing tasks RUN_INVERSION = False # run bed inversion RUN_DYNAMICS = False # run dybnamics # Read RGI file rgidf = salem.utils.read_shapefile(RGI_FILE, cached=True) # Select some glaciers # Get ref glaciers (all glaciers with MB) flink, mbdatadir = utils.get_wgms_files() ids_with_mb = pd.read_csv(flink)['RGI_ID'].values # get some tw-glaciers that we want to test inside alaska region keep_ids = ['RGI50-01.20791', 'RGI50-01.00037', 'RGI50-01.10402'] keep_indexes = [((i in keep_ids) or (i in ids_with_mb)) for i in rgidf.RGIID] rgidf = rgidf.iloc[keep_indexes] # keep_ids = ['RGI50-01.20791'] # keep_indexes = [(i in keep_ids) for i in rgidf.RGIID] # rgidf = rgidf.iloc[keep_indexes] log.info('Number of glaciers: {}'.format(len(rgidf))) # Download other files if needed _ = utils.get_cru_file(var='tmp')
def initialization_selection(): # ------------- # Initialization # ------------- cfg.initialize() # working directories cfg.PATHS['working_dir'] = mbcfg.PATHS['working_dir'] cfg.PATHS['rgi_version'] = mbcfg.PARAMS['rgi_version'] # We are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # No need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs # maybe also here? cfg.PARAMS['continue_on_error'] = False # set negative flux filtering to false. should be standard soon cfg.PARAMS['filter_for_neg_flux'] = False # Pre-download other files which will be needed later _ = utils.get_cru_file(var='tmp') _ = utils.get_cru_file(var='pre') rgi_dir = utils.get_rgi_dir(version=cfg.PATHS['rgi_version']) # Get the reference glacier ids (they are different for each RGI version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(cfg.PATHS['rgi_version'])] # Make a new dataframe with those (this takes a while) rgidf = [] for reg in df['RGI_REG'].unique(): if reg == '19': continue # we have no climate data in Antarctica if mbcfg.PARAMS['region'] is not None\ and reg != mbcfg.PARAMS['region']: continue fn = '*' + reg + '_rgi{}0_*.shp'.format(cfg.PATHS['rgi_version']) fs = list(sorted(glob(os.path.join(rgi_dir, '*', fn))))[0] sh = gpd.read_file(fs) rgidf.append(sh.loc[sh.RGIId.isin(rids)]) rgidf = pd.concat(rgidf) rgidf.crs = sh.crs # for geolocalisation # reduce Europe to Histalp area (exclude Pyrenees, etc...) if mbcfg.PARAMS['histalp']: rgidf = rgidf.loc[(rgidf.CenLon >= 4) & (rgidf.CenLon < 20) & (rgidf.CenLat >= 43) & (rgidf.CenLat < 47)] # We have to check which of them actually have enough mb data. # Let OGGM do it: gdirs = workflow.init_glacier_regions(rgidf) # We need to know which period we have data for if mbcfg.PARAMS['histalp']: cfg.PATHS['climate_file'] = mbcfg.PATHS['histalpfile'] execute_entity_task(tasks.process_custom_climate_data, gdirs) else: execute_entity_task(tasks.process_cru_data, gdirs, print_log=False) gdirs = utils.get_ref_mb_glaciers(gdirs) # Keep only these rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])] # Save rgidf.to_file(os.path.join(cfg.PATHS['working_dir'], 'mb_ref_glaciers.shp')) # Sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # Go - initialize working directories gdirs = workflow.init_glacier_regions(rgidf, reset=True, force=True) return gdirs
# Use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # Set to True for operational runs - here we want all glaciers to run cfg.PARAMS['continue_on_error'] = False if baseline == 'HISTALP': # Other params: see https://oggm.org/2018/08/10/histalp-parameters/ cfg.PARAMS['baseline_y0'] = 1850 cfg.PARAMS['prcp_scaling_factor'] = 1.75 cfg.PARAMS['temp_melt'] = -1.75 # Get the reference glacier ids (they are different for each RGI version) rgi_dir = utils.get_rgi_dir(version=rgi_version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # We can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11 if baseline == 'HISTALP': rids = [rid for rid in rids if '-11.' in rid] # Make a new dataframe with those (this takes a while) log.info('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) log.info('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(rgidf)))
def mb_calibration(rgi_version, baseline): """ Run the mass balance calibration for the VAS model. RGI version and baseline cliamte must be given. :param rgi_version: int, RGI version :param baseline: str, baseline climate 'HISTALP' or 'CRU' """ # initialize OGGM and set up the run parameters vascaling.initialize(logging_level='WORKFLOW') # LOCAL paths (where to write the OGGM run output) # dirname = 'VAS_ref_mb_{}_RGIV{}'.format(baseline, rgi_version) # wdir = utils.gettempdir(dirname, home=True, reset=True) # utils.mkdir(wdir, reset=True) # cfg.PATHS['working_dir'] = wdir # CLUSTER paths wdir = os.environ.get('WORKDIR', '') cfg.PATHS['working_dir'] = wdir # we are running the calibration ourselves cfg.PARAMS['run_mb_calibration'] = True # we are using which baseline data? cfg.PARAMS['baseline_climate'] = baseline # no need for intersects since this has an effect on the inversion only cfg.PARAMS['use_intersects'] = False # use multiprocessing? cfg.PARAMS['use_multiprocessing'] = True # set to True for operational runs cfg.PARAMS['continue_on_error'] = True # 10 is only for OGGM-VAS, OGGM needs 80 to run cfg.PARAMS['border'] = 80 if baseline == 'HISTALP': # other params: see https://oggm.org/2018/08/10/histalp-parameters/ # cfg.PARAMS['prcp_scaling_factor'] = 1.75 # cfg.PARAMS['temp_melt'] = -1.75 cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_melt'] = -0.5 elif baseline == 'CRU': # using the parameters from Marzeion et al. (2012) # cfg.PARAMS['prcp_scaling_factor'] = 2.5 # cfg.PARAMS['temp_melt'] = 1 # cfg.PARAMS['temp_all_solid'] = 3 # using the parameters from Malles and Marzeion 2020 cfg.PARAMS['prcp_scaling_factor'] = 3 cfg.PARAMS['temp_melt'] = 0 cfg.PARAMS['temp_all_solid'] = 4 # cfg.PARAMS['prcp_gradient'] = 4 # the next step is to get all the reference glaciers, # i.e. glaciers with mass balance measurements. # get the reference glacier ids (they are different for each RGI version) df, _ = utils.get_wgms_files() rids = df['RGI{}0_ID'.format(rgi_version[0])] # we can't do Antarctica rids = [rid for rid in rids if not ('-19.' in rid)] # For HISTALP only RGI reg 11.01 (ALPS) if baseline == 'HISTALP': rids = [rid for rid in rids if '-11' in rid] # make a new dataframe with those (this takes a while) print('Reading the RGI shapefiles...') rgidf = utils.get_rgi_glacier_entities(rids, version=rgi_version) print('For RGIV{} we have {} candidate reference ' 'glaciers.'.format(rgi_version, len(rgidf))) # initialize the glacier regions base_url = 'https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.4/' \ 'L3-L5_files/RGIV62_fleb_qc3_CRU_pcp2.5' # Go - get the pre-processed glacier directories gdirs = workflow.init_glacier_directories(rids, from_prepro_level=3, prepro_base_url=base_url, prepro_rgi_version=rgi_version) # Some glaciers in RGI Region 11 are not inside the HISTALP domain if baseline == 'HISTALP': gdirs = [gdir for gdir in gdirs if gdir.rgi_subregion == '11-01'] # get reference glaciers with mass balance measurements gdirs = utils.get_ref_mb_glaciers(gdirs) # keep only these glaciers rgidf = rgidf.loc[rgidf.RGIId.isin([g.rgi_id for g in gdirs])] # save to file rgidf.to_file(os.path.join(wdir, 'mb_ref_glaciers.shp')) print('For RGIV{} and {} we have {} reference glaciers'.format( rgi_version, baseline, len(rgidf))) # sort for more efficient parallel computing rgidf = rgidf.sort_values('Area', ascending=False) # newly initialize glacier directories gdirs = workflow.init_glacier_directories(rgidf, reset=False, force=True) workflow.execute_entity_task(gis.define_glacier_region, gdirs) workflow.execute_entity_task(gis.glacier_masks, gdirs) # run climate tasks vascaling.compute_ref_t_stars(gdirs) execute_entity_task(vascaling.local_t_star, gdirs) # we store the associated params mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params'] with open(os.path.join(wdir, 'mb_calib_params.json'), 'w') as fp: json.dump(mb_calib, fp)
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Reference glaciers only if in the list and period is good ref_gdirs = _get_ref_glaciers(gdirs) # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir) # list of mus compatibles with refmb mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE'] t_star, res_bias, prcp_fac = t_star_from_refmb(gdir, mbdf) # store the mb (could be useful later) gdir.write_pickle(mbdf, 'ref_massbalance') # if we have just one candidate this is good if len(t_star) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias, prcp_fac) # At least of of the X glaciers should have a single t*, otherwise we dont # know how to start if len(only_one) == 0: flink, mbdatadir = utils.get_wgms_files() if os.path.basename(os.path.dirname(flink)) == 'test-workflow': # TODO: hardcoded stuff here, for the test workflow only_one.append('RGI40-11.00897') gdir, t_star, res_bias, prcp_fac = per_glacier['RGI40-11.00897'] per_glacier['RGI40-11.00897'] = (gdir, [t_star[-1]], [res_bias[-1]], prcp_fac) else: raise RuntimeError('We need at least one glacier with one ' 'tstar only.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir = per_glacier[id][0] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir = per_glacier[id_o][0] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* pg = per_glacier[ids_left[np.argmin(distances)]] gdir, t_star, res_bias, prcp_fac = pg distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]], prcp_fac) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, prcp_facs, biases, lons, lats = [], [], [], [], [], [] for id, (gdir, t_star, res_bias, prcp_fac) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) prcp_facs.append(prcp_fac) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['lon'] = lons df['lat'] = lats df['tstar'] = t_stars df['prcp_fac'] = prcp_facs df['bias'] = biases file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def prepare_divides(rgi_f): """Processes the rgi file and writes the intersects to OUTDIR""" rgi_reg = os.path.basename(rgi_f).split('_')[0] print('Start RGI reg ' + rgi_reg + ' ...') start_time = time.time() wgms, _ = get_wgms_files() f = glob(INDIR_DIVIDES + '*/*-' + rgi_reg + '.shp')[0] df = gpd.read_file(f) rdf = gpd.read_file(rgi_f) # Read glacier attrs key2 = {'0': 'Land-terminating', '1': 'Marine-terminating', '2': 'Lake-terminating', '3': 'Dry calving', '4': 'Regenerated', '5': 'Shelf-terminating', '9': 'Not assigned', } TerminusType = [key2[gtype[1]] for gtype in df.GlacType] IsTidewater = np.array([ttype in ['Marine-terminating', 'Lake-terminating'] for ttype in TerminusType]) # Plots # dfref = df.loc[df.RGIId.isin(wgms.RGI50_ID)] # for gid in np.unique(dfref.GLIMSId): # dfs = dfref.loc[dfref.GLIMSId == gid] # dfs.plot(cmap='Set3', linestyle='-', linewidth=5); # Filter df = df.loc[~IsTidewater] df = df.loc[~df.RGIId.isin(wgms.RGI50_ID)] df['CenLon'] = pd.to_numeric(df['CenLon']) df['CenLat'] = pd.to_numeric(df['CenLat']) df['Area'] = pd.to_numeric(df['Area']) # Correct areas and stuffs n_gl_before = len(df) divided_ids = [] for rid in np.unique(df.RGIId): sdf = df.loc[df.RGIId == rid].copy() srdf = rdf.loc[rdf.RGIId == rid] # Correct Area sdf.Area = np.array([float(a) for a in sdf.Area]) geo_is_ok = [] new_geo = [] for g, a in zip(sdf.geometry, sdf.Area): if a < 0.01*1e6: geo_is_ok.append(False) continue try: new_geo.append(multi_to_poly(g)) geo_is_ok.append(True) except: geo_is_ok.append(False) sdf = sdf.loc[geo_is_ok] if len(sdf) < 2: # print(rid + ' is too small or has no valid divide...') df = df[df.RGIId != rid] continue area_km = sdf.Area * 1e-6 cor_factor = srdf.Area.values / np.sum(area_km) if cor_factor > 1.2 or cor_factor < 0.8: # print(rid + ' is not OK...') df = df[df.RGIId != rid] continue area_km = cor_factor * area_km # Correct Centroid cenlon = [g.centroid.xy[0][0] for g in sdf.geometry] cenlat = [g.centroid.xy[1][0] for g in sdf.geometry] # ID new_id = [rid + '_d{:02}'.format(i + 1) for i in range(len(sdf))] # Write df.loc[sdf.index, 'Area'] = area_km df.loc[sdf.index, 'CenLon'] = cenlon df.loc[sdf.index, 'CenLat'] = cenlat df.loc[sdf.index, 'RGIId'] = new_id df.loc[sdf.index, 'geometry'] = new_geo divided_ids.append(rid) n_gl_after = len(df) # We make three data dirs: divides only, divides into rgi, divides + RGI bn = os.path.basename(rgi_f) bd = os.path.basename(os.path.dirname(rgi_f)) base_dir_1 = OUTDIR_DIVIDES + '/RGIV5_DividesOnly/' + bd base_dir_2 = OUTDIR_DIVIDES + '/RGIV5_Corrected/' + bd base_dir_3 = OUTDIR_DIVIDES + '/RGIV5_OrigAndDivides/' + bd mkdir(base_dir_1, reset=True) mkdir(base_dir_2, reset=True) mkdir(base_dir_3, reset=True) df.to_file(os.path.join(base_dir_1, bn)) dfa = pd.concat([df, rdf]).sort_values('RGIId') dfa.to_file(os.path.join(base_dir_3, bn)) dfa = dfa.loc[~dfa.RGIId.isin(divided_ids)] dfa.to_file(os.path.join(base_dir_2, bn)) print('RGI reg ' + rgi_reg + ' took {:.2f} seconds. We had to remove ' '{} divides'.format(time.time() - start_time, n_gl_before - n_gl_after)) return
def get_rgi_df(reset=False): """This function prepares a kind of `fake` RGI file, with the updated geometries for ITMIX. """ # This makes an RGI dataframe with all ITMIX + WGMS + GTD glaciers RGI_DIR = utils.get_rgi_dir() df_rgi_file = os.path.join(DATA_DIR, "itmix", "itmix_rgi_shp.pkl") if os.path.exists(df_rgi_file) and not reset: rgidf = pd.read_pickle(df_rgi_file) else: linkf = os.path.join(DATA_DIR, "itmix", "itmix_rgi_links.pkl") df_itmix = pd.read_pickle(linkf) f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) f = utils.get_glathida_file() gtd_df = pd.read_csv(f) divides = [] rgidf = [] _rgi_ids_for_overwrite = [] for i, row in df_itmix.iterrows(): log.info("Prepare RGI df for " + row.name) # read the rgi region rgi_shp = find_path(RGI_DIR, row["rgi_reg"] + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) rgi_parts = row.T["rgi_parts_ids"] sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # use the ITMIX shape where possible if row.name in [ "Hellstugubreen", "Freya", "Aqqutikitsoq", "Brewster", "Kesselwandferner", "NorthGlacier", "SouthGlacier", "Tasman", "Unteraar", "Washmawapta", "Columbia", ]: shf = find_path(SEARCHD, "*_" + row.name + "*.shp") shp = salem.utils.read_shapefile(shf) if row.name == "Unteraar": shp = shp.iloc[[-1]] if "LineString" == shp.iloc[0].geometry.type: shp.loc[shp.index[0], "geometry"] = shpg.Polygon(shp.iloc[0].geometry) if shp.iloc[0].geometry.type == "MultiLineString": # Columbia geometry = shp.iloc[0].geometry parts = list(geometry) for p in parts: assert p.type == "LineString" exterior = shpg.Polygon(parts[0]) # let's assume that all other polygons are in fact interiors interiors = [] for p in parts[1:]: assert exterior.contains(p) interiors.append(p) geometry = shpg.Polygon(parts[0], interiors) assert "Polygon" in geometry.type shp.loc[shp.index[0], "geometry"] = geometry assert len(shp) == 1 area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry sel = sel.iloc[[0]] sel.loc[sel.index[0], "geometry"] = shp sel.loc[sel.index[0], "Area"] = area_km2 elif row.name == "Urumqi": # ITMIX Urumqi is in fact two glaciers shf = find_path(SEARCHD, "*_" + row.name + "*.shp") shp2 = salem.utils.read_shapefile(shf) assert len(shp2) == 2 for k in [0, 1]: shp = shp2.iloc[[k]].copy() area_km2 = shp.iloc[0].geometry.area * 1e-6 shp = salem.gis.transform_geopandas(shp) shp = shp.iloc[0].geometry assert sel.loc[sel.index[k], "geometry"].contains(shp.centroid) sel.loc[sel.index[k], "geometry"] = shp sel.loc[sel.index[k], "Area"] = area_km2 assert len(sel) == 2 elif len(rgi_parts) > 1: # Ice-caps. Make divides # First we gather all the parts: sel = rgi_df.loc[rgi_df.RGIId.isin(rgi_parts)].copy() # Make the multipolygon for the record multi = shpg.MultiPolygon([g for g in sel.geometry]) # update the RGI attributes. We take a dummy rgi ID new_area = np.sum(sel.Area) found = False for i in range(len(sel)): tsel = sel.iloc[[i]].copy() if "Multi" in tsel.loc[tsel.index[0], "geometry"].type: continue else: found = True sel = tsel break if not found: raise RuntimeError() inif = 0.0 add = 1e-5 if row.name == "Devon": inif = 0.001 add = 1e-4 while True: buff = multi.buffer(inif) if "Multi" in buff.type: inif += add else: break x, y = multi.centroid.xy if "Multi" in buff.type: raise RuntimeError sel.loc[sel.index[0], "geometry"] = buff sel.loc[sel.index[0], "Area"] = new_area sel.loc[sel.index[0], "CenLon"] = np.asarray(x)[0] sel.loc[sel.index[0], "CenLat"] = np.asarray(y)[0] # Divides db div_sel = dict() for k, v in sel.iloc[0].iteritems(): if k == "geometry": div_sel[k] = multi elif k == "RGIId": div_sel["RGIID"] = v else: div_sel[k] = v divides.append(div_sel) else: pass # add glacier name to the entity name = ["I:" + row.name] * len(sel) add_n = sel.RGIId.isin(wgms_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "W-" + name[z] add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "G-" + name[z] sel.loc[:, "Name"] = name rgidf.append(sel) # Add divides to the original one adf = pd.DataFrame(divides) adf.to_pickle(cfg.PATHS["itmix_divs"]) log.info("N glaciers ITMIX: {}".format(len(rgidf))) # WGMS glaciers which are not already there # Actually we should remove the data of those 7 to be honest... f, d = utils.get_wgms_files() wgms_df = pd.read_csv(f) wgms_df = wgms_df.loc[~wgms_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info("N glaciers WGMS: {}".format(len(wgms_df))) for i, row in wgms_df.iterrows(): rid = row.RGI_ID reg = rid.split("-")[1].split(".")[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _cor = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-") name = ["W:" + _cor] * len(sel) add_n = sel.RGIId.isin(gtd_df.RGI_ID.values) for z, it in enumerate(add_n.values): if it: name[z] = "G-" + name[z] for n in name: if len(n) > 48: raise sel.loc[:, "Name"] = name rgidf.append(sel) _rgi_ids_for_overwrite.extend(wgms_df.RGI_ID.values) # GTD glaciers which are not already there # Actually we should remove the data of those 2 to be honest... gtd_df = gtd_df.loc[~gtd_df.RGI_ID.isin(_rgi_ids_for_overwrite)] log.info("N glaciers GTD: {}".format(len(gtd_df))) for i, row in gtd_df.iterrows(): rid = row.RGI_ID reg = rid.split("-")[1].split(".")[0] # read the rgi region rgi_shp = find_path(RGI_DIR, reg + "_rgi50_*.shp") rgi_df = salem.utils.read_shapefile(rgi_shp, cached=True) sel = rgi_df.loc[rgi_df.RGIId.isin([rid])].copy() assert len(sel) == 1 # add glacier name to the entity _corname = row.NAME.replace("/", "or").replace(".", "").replace(" ", "-") name = ["G:" + _corname] * len(sel) for n in name: if len(n) > 48: raise sel.loc[:, "Name"] = name rgidf.append(sel) # Save for not computing each time rgidf = pd.concat(rgidf) rgidf.to_pickle(df_rgi_file) return rgidf