def distribute_t_stars(gdirs): """After the computation of the reference tstars, apply the interpolation to each individual glacier. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Distribute t* and mu*') ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')) for gdir in gdirs: # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # If really close no need to divide, else weighted average if distances.iloc[0] <= 0.1: tstar = amin.tstar.iloc[0] bias = amin.bias.iloc[0] else: tstar = int(np.average(amin.tstar, weights=1./distances)) bias = np.average(amin.bias, weights=1./distances) # Go local_mustar_apparent_mb(gdir, tstar=tstar, bias=bias)
def distribute_t_stars(gdirs): """After the computation of the reference tstars, apply the interpolation to each individual glacier. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Distribute t* and mu*') ref_df = pd.read_csv( os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')) for gdir in gdirs: # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # If really close no need to divide, else weighted average if distances.iloc[0] <= 0.1: tstar = amin.tstar.iloc[0] bias = amin.bias.iloc[0] else: tstar = int(np.average(amin.tstar, weights=1. / distances)) bias = np.average(amin.bias, weights=1. / distances) # Go local_mustar_apparent_mb(gdir, tstar=tstar, bias=bias)
def distribute_t_stars(gdirs, ref_df=None, minimum_mustar=0.): """After the computation of the reference tstars, apply the interpolation to each individual glacier. Parameters ---------- gdirs : [] list of oggm.GlacierDirectory objects ref_df : pd.Dataframe replace the default calibration list minimum_mustar: float if mustar goes below this threshold, clip it to that value. If you want this to happen with `minimum_mustar=0.` you will have to set `cfg.PARAMS['allow_negative_mustar']=True` first. """ log.info('Distribute t* and mu*') if ref_df is None: fp = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') if not cfg.PARAMS['run_mb_calibration']: # Make some checks and use the default one if (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise RuntimeError('If you are using a custom climate file ' 'you should run your own MB calibration.') v = gdirs[0].rgi_version[0] # major version relevant fn = 'oggm_ref_tstars_rgi{}_cru4.csv'.format(v) fp = utils.get_demo_file(fn) ref_df = pd.read_csv(fp) for gdir in gdirs: # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # If really close no need to divide, else weighted average if distances.iloc[0] <= 0.1: tstar = amin.tstar.iloc[0] prcp_fac = amin.prcp_fac.iloc[0] bias = amin.bias.iloc[0] else: tstar = int(np.average(amin.tstar, weights=1. / distances)) prcp_fac = np.average(amin.prcp_fac, weights=1. / distances) bias = np.average(amin.bias, weights=1. / distances) # Go local_mustar(gdir, tstar=tstar, bias=bias, prcp_fac=prcp_fac, reset=True, minimum_mustar=minimum_mustar)
def distribute_t_stars(gdirs, ref_df=None): """After the computation of the reference tstars, apply the interpolation to each individual glacier. Parameters ---------- gdirs : list of oggm.GlacierDirectory objects """ log.info('Distribute t* and mu*') if ref_df is None: fp = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') if not cfg.PARAMS['run_mb_calibration']: # Make some checks and use the default one if (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise RuntimeError('If you are using a custom climate file ' 'you should run your own MB calibration.') fn = 'oggm_ref_tstars_rgi{}_cru4.csv'.format(gdirs[0].rgi_version) fp = utils.get_demo_file(fn) ref_df = pd.read_csv(fp) for gdir in gdirs: # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # If really close no need to divide, else weighted average if distances.iloc[0] <= 0.1: tstar = amin.tstar.iloc[0] prcp_fac = amin.prcp_fac.iloc[0] bias = amin.bias.iloc[0] else: tstar = int(np.average(amin.tstar, weights=1./distances)) prcp_fac = np.average(amin.prcp_fac, weights=1./distances) bias = np.average(amin.bias, weights=1./distances) # Go local_mustar(gdir, tstar=tstar, bias=bias, prcp_fac=prcp_fac, reset=True)
def interpolate_mu_star(gdir, full_ref_df=None): # make it an entity task # ---- # Interpolated mu_star # ---- tmp_ref_df = full_ref_df.loc[full_ref_df.index != gdir.rgi_id] glc = full_ref_df.loc[full_ref_df.index == gdir.rgi_id] # Compute the distance distances = utils.haversine(glc.lon.values[0], glc.lat.values[0], tmp_ref_df.lon, tmp_ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = tmp_ref_df.iloc[aso] distances = distances[aso]**2 interp = np.average(amin.mu_star_glacierwide, weights=1. / distances) return [gdir.rgi_id, interp]
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Get ref glaciers (all glaciers with MB) flink, mbdatadir = utils.get_wgms_files() dfids = pd.read_csv(flink)['RGI_ID'].values # Reference glaciers only if in the list # TODO: we removed marine glaciers here. Is it ok? ref_gdirs = [ g for g in gdirs if (g.rgi_id in dfids and g.terminus_type == 'Land-terminating') ] # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir) # list of mus compatibles with refmb reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv') mbdf = pd.read_csv(reff).set_index('YEAR') t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE']) # if we have just one candidate this is good if len(t_star) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias) # At least of of the X glaciers should have a single t*, otherwise we dont # know how to start if len(only_one) == 0: if os.path.basename(os.path.dirname(flink)) == 'test-workflow': # TODO: hardcoded shit here, for the test workflow only_one.append('RGI40-11.00887') gdir, t_star, res_bias = per_glacier['RGI40-11.00887'] per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]], [res_bias[-1]]) else: raise RuntimeError('Didnt expect to be here.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir, t_star, res_bias = per_glacier[id] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir, _, _ = per_glacier[id_o] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]] distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]]) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, biases, lons, lats = [], [], [], [], [] for id, (gdir, t_star, res_bias) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['tstar'] = t_stars df['bias'] = biases df['lon'] = lons df['lat'] = lats file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def compute_intersects(rgi_df, to_file='', job_id=''): """Computes the intersection geometries between glaciers. The output is a shapefile with three columns: - ``RGIId_1`` and ``RGIId_2``: the RGIIds of the two intersecting entities - ``geometry``: the intersection geometry (LineString or MultiLineString) Parameters ---------- rgi_df : str or geopandas.GeoDataFrame the RGI shapefile to_file : str, optional set to a valid path to write the file on disk job_id : str, optional if you want to log what happens, give a name to this job Returns ------- a geopandas.GeoDataFrame """ gdf = rgi_df.copy() out_cols = ['RGIId_1', 'RGIId_2', 'geometry'] out = gpd.GeoDataFrame(columns=out_cols) for _, major in gdf.iterrows(): # Exterior only major_poly = major.geometry.exterior # sort by distance to the current glacier gdf['dis'] = haversine(major.CenLon, major.CenLat, gdf.CenLon, gdf.CenLat) gdfs = gdf.sort_values(by='dis') # Keep glaciers in which intersect gdfs = gdfs.loc[gdfs.dis < 200000] gdfs = gdfs.loc[gdfs.RGIId != major.RGIId] gdfs = gdfs.loc[gdfs.intersects(major_poly)] for _, neighbor in gdfs.iterrows(): # Already computed? if neighbor.RGIId in out.RGIId_1 or neighbor.RGIId in out.RGIId_2: continue # Exterior only # Buffer is needed for numerical reasons # 1e-4 seems reasonable although it should be dependant on loc neighbor_poly = neighbor.geometry.exterior.buffer(1e-4) # Go mult_intersect = major_poly.intersection(neighbor_poly) # Handle the different kind of geometry output if isinstance(mult_intersect, shpg.Point): continue if isinstance(mult_intersect, shpg.linestring.LineString): mult_intersect = [mult_intersect] if len(mult_intersect) == 0: continue mult_intersect = [m for m in mult_intersect if not isinstance(m, shpg.Point)] if len(mult_intersect) == 0: continue # Simplify the geometries if possible mult_intersect = linemerge(mult_intersect) # Add each line to the output file if isinstance(mult_intersect, shpg.linestring.LineString): mult_intersect = [mult_intersect] for line in mult_intersect: assert isinstance(line, shpg.linestring.LineString) # Filter the very small ones if line.length < 1e-3: continue line = gpd.GeoDataFrame([[major.RGIId, neighbor.RGIId, line]], columns=out_cols) out = out.append(line) # Index and merge out.reset_index(inplace=True, drop=True) return out
def local_mustar(gdir, *, ref_df=None, tstar=None, bias=None, minimum_mustar=0.): """Compute the local mustar from interpolated tstars. If tstar and bias are mot provided they will be interpolated from the reference file. Parameters ---------- gdir : oggm.GlacierDirectory ref_df : pd.Dataframe, optional replace the default calibration list with your own. tstar: int, optional the year where the glacier should be equilibrium bias: float, optional the associated reference bias minimum_mustar: float, optional if mustar goes below this threshold, clip it to that value. If you want this to happen with `minimum_mustar=0.` you will have to set `cfg.PARAMS['allow_negative_mustar']=True` first. """ # Relevant mb params params = ['temp_default_gradient', 'temp_all_solid', 'temp_all_liq', 'temp_melt', 'prcp_scaling_factor'] if tstar is None or bias is None: # Do our own interpolation if ref_df is None: if not cfg.PARAMS['run_mb_calibration']: # Make some checks and use the default one climate_info = gdir.read_pickle('climate_info') source = climate_info['baseline_climate_source'] ok_source = ['CRU TS4.01', 'CRU TS3.23', 'HISTALP'] if not np.any(s in source.upper() for s in ok_source): raise RuntimeError('If you are using a custom climate ' 'file you should run your own MB ' 'calibration.') v = gdir.rgi_version[0] # major version relevant # Check that the params are fine str_s = 'cru4' if 'CRU' in source else 'histalp' vn = 'ref_tstars_rgi{}_{}_calib_params'.format(v, str_s) for k in params: if cfg.PARAMS[k] != cfg.PARAMS[vn][k]: raise ValueError('The reference t* you are trying ' 'to use was calibrated with ' 'difference MB parameters. You ' 'might have to run the calibration ' 'manually.') ref_df = cfg.PARAMS['ref_tstars_rgi{}_{}'.format(v, str_s)] else: # Use the the local calibration fp = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') ref_df = pd.read_csv(fp) # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closest aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # If really close no need to divide, else weighted average if distances.iloc[0] <= 0.1: tstar = amin.tstar.iloc[0] bias = amin.bias.iloc[0] else: tstar = int(np.average(amin.tstar, weights=1./distances)) bias = np.average(amin.bias, weights=1./distances) # Climate period mu_hp = int(cfg.PARAMS['mu_star_halfperiod']) yr = [tstar-mu_hp, tstar+mu_hp] # Do we have a calving glacier? cmb = calving_mb(gdir) log.info('(%s) local mu* for t*=%d', gdir.rgi_id, tstar) # Get the corresponding mu years, temp_yr, prcp_yr = mb_yearly_climate_on_glacier(gdir, year_range=yr) assert len(years) == (2 * mu_hp + 1) # mustar is taking calving into account (units of specific MB) mustar = (np.mean(prcp_yr) - cmb) / np.mean(temp_yr) if not np.isfinite(mustar): raise RuntimeError('{} has a non finite mu'.format(gdir.rgi_id)) if not cfg.PARAMS['allow_negative_mustar']: if mustar < 0: raise RuntimeError('{} has a negative mu'.format(gdir.rgi_id)) # For the calving param it might be useful to clip the mu mustar = np.clip(mustar, minimum_mustar, np.max(mustar)) # Add the climate related params to the GlacierDir to make sure # other tools cannot fool around with out calibration out = gdir.read_pickle('climate_info') out['mb_calib_params'] = {k: cfg.PARAMS[k] for k in params} gdir.write_pickle(out, 'climate_info') # Scalars in a small dataframe for later df = pd.DataFrame() df['rgi_id'] = [gdir.rgi_id] df['t_star'] = [tstar] df['mu_star'] = [mustar] df['bias'] = [bias] df.to_csv(gdir.get_filepath('local_mustar'), index=False)
def compute_intersects(rgi_shp): """Processes the rgi file and writes the intersects to OUTDIR""" out_path = os.path.basename(rgi_shp) odir = os.path.basename(os.path.dirname(rgi_shp)) odir = os.path.join(OUTDIR_INTERSECTS, odir) mkdir(odir) out_path = os.path.join(odir, 'intersects_' + out_path) print('Start ' + os.path.basename(rgi_shp) + ' ...') start_time = time.time() gdf = gpd.read_file(rgi_shp) # clean geometries like OGGM does ngeos = [] keep = [] for g in gdf.geometry: try: g = multi_to_poly(g) ngeos.append(g) keep.append(True) except: keep.append(False) gdf = gdf.loc[keep] gdf['geometry'] = ngeos out_cols = ['RGIId_1', 'RGIId_2', 'geometry'] out = gpd.GeoDataFrame(columns=out_cols) for i, major in gdf.iterrows(): # Exterior only major_poly = major.geometry.exterior # sort by distance to the current glacier gdf['dis'] = haversine(major.CenLon, major.CenLat, gdf.CenLon, gdf.CenLat) gdfs = gdf.sort_values(by='dis').iloc[1:] # Keep glaciers in which intersect gdfs = gdfs.loc[gdfs.dis < 200000] try: gdfs = gdfs.loc[gdfs.intersects(major_poly)] except: gdfs = gdfs.loc[gdfs.intersects(major_poly.buffer(0))] for i, neighbor in gdfs.iterrows(): if neighbor.RGIId in out.RGIId_1 or neighbor.RGIId in out.RGIId_2: continue # Exterior only # Buffer is needed for numerical reasons neighbor_poly = neighbor.geometry.exterior.buffer(0.0001) # Go try: mult_intersect = major_poly.intersection(neighbor_poly) except: continue if isinstance(mult_intersect, shpg.Point): continue if isinstance(mult_intersect, shpg.linestring.LineString): mult_intersect = [mult_intersect] if len(mult_intersect) == 0: continue mult_intersect = [m for m in mult_intersect if not isinstance(m, shpg.Point)] if len(mult_intersect) == 0: continue mult_intersect = linemerge(mult_intersect) if isinstance(mult_intersect, shpg.linestring.LineString): mult_intersect = [mult_intersect] for line in mult_intersect: assert isinstance(line, shpg.linestring.LineString) line = gpd.GeoDataFrame([[major.RGIId, neighbor.RGIId, line]], columns=out_cols) out = out.append(line) out.crs = wgs84.srs out.to_file(out_path) print(os.path.basename(rgi_shp) + ' took {0:.2f} seconds'.format(time.time() - start_time)) return
def quick_crossval_t_stars(gdirs): """Cross-validate the interpolation of tstar to each individual glacier. This version does NOT recompute the precipitation scaling factor at each round (this quite OK to do so) Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Cross-validate the t* and mu* determination') rgdirs = _get_ref_glaciers(gdirs) # This might be redundant but we redo the calc here with utils.DisableLogger(): compute_ref_t_stars(rgdirs) full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv'), index_col=0) with utils.DisableLogger(): distribute_t_stars(rgdirs, compute_apparent_mb=False) n = len(full_ref_df) for i, rid in enumerate(full_ref_df.index): # log.info('Cross-validation iteration {} of {}'.format(i+1, n)) # the glacier to look at gdir = [g for g in rgdirs if g.rgi_id == rid][0] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # before the cross-val we can get the info about "real" mustar rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'mustar'] = rdf['mu_star'].values[0] # redo the computations with utils.DisableLogger(): distribute_t_stars([gdir], ref_df=tmp_ref_df, compute_apparent_mb=False) # store rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0]) full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0] full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0] full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0] # Reproduce Ben's figure for i, rid in enumerate(full_ref_df.index): # the glacier to look at gdir = full_ref_df.loc[full_ref_df.index == rid] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # Compute the distance distances = utils.haversine(gdir.lon.values[0], gdir.lat.values[0], tmp_ref_df.lon, tmp_ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = tmp_ref_df.iloc[aso] distances = distances[aso] ** 2 interp = np.average(amin.mustar, weights=1. / distances) full_ref_df.loc[rid, 'interp_mustar'] = interp # write file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') full_ref_df.to_csv(file)
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Get ref glaciers (all glaciers with MB) flink, mbdatadir = utils.get_wgms_files() dfids = pd.read_csv(flink)['RGI_ID'].values # Reference glaciers only if in the list # TODO: we removed marine glaciers here. Is it ok? ref_gdirs = [g for g in gdirs if (g.rgi_id in dfids and g.terminus_type=='Land-terminating')] # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir) # list of mus compatibles with refmb reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv') mbdf = pd.read_csv(reff).set_index('YEAR') t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE']) # if we have just one candidate this is good if len(t_star) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias) # At least of of the X glaciers should have a single t*, otherwise we dont # know how to start if len(only_one) == 0: if os.path.basename(os.path.dirname(flink)) == 'test-workflow': # TODO: hardcoded shit here, for the test workflow only_one.append('RGI40-11.00887') gdir, t_star, res_bias = per_glacier['RGI40-11.00887'] per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]], [res_bias[-1]]) else: raise RuntimeError('Didnt expect to be here.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir, t_star, res_bias = per_glacier[id] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir, _, _ = per_glacier[id_o] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]] distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]]) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, biases, lons, lats = [], [], [], [], [] for id, (gdir, t_star, res_bias) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['tstar'] = t_stars df['bias'] = biases df['lon'] = lons df['lat'] = lats file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def quick_crossval(gdirs, xval, major=0): # following climate.quick_crossval_t_stars # but minimized for performance full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv'), index_col=0) tmpdf = pd.DataFrame( [], columns=['std_oggm', 'std_ref', 'rmse', 'core', 'bias']) for i, rid in enumerate(full_ref_df.index): # the glacier to look at gdir = [g for g in gdirs if g.rgi_id == rid][0] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # select reference glacier directories # Only necessary if tasks.compute_ref_t_stars is uncommented below # ref_gdirs = [g for g in gdirs if g.rgi_id != rid] # before the cross-val store the info about "real" mustar rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'mustar'] = rdf['mu_star'].values[0] # redistribute t_star with utils.DisableLogger(): # compute_ref_t_stars should be done again for # every crossvalidation step # This will/might have an influence if one of the 10 surrounding # glaciers of the current glacier has more than one t_star # If so, the currently crossvalidated glacier was probably # used to select one t_star for this surrounding glacier. # # But: compute_ref_t_stars is very time consuming. And the # influence is probably very small. Also only 40 out of the 253 # reference glaciers do have more than one possible t_star. # # tasks.compute_ref_t_stars(ref_gdirs) tasks.distribute_t_stars([gdir], ref_df=tmp_ref_df) # read crossvalidated values rdf = pd.read_csv(gdir.get_filepath('local_mustar')) # ---- # --- MASS-BALANCE MODEL heights, widths = gdir.get_inversion_flowline_hw() mb_mod = PastMassBalance(gdir, mu_star=rdf['mu_star'].values[0], bias=rdf['bias'].values[0], prcp_fac=rdf['prcp_fac'].values[0]) # Mass-blaance timeseries, observed and simulated refmb = gdir.get_ref_mb_data().copy() refmb['OGGM'] = mb_mod.get_specific_mb(heights, widths, year=refmb.index) # store single glacier results bias = refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean() rmse = np.sqrt(np.mean(refmb.OGGM - refmb.ANNUAL_BALANCE)**2) rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1] ref_std = refmb.ANNUAL_BALANCE.std() # unclear how to treat this best if ref_std == 0: ref_std = refmb.OGGM.std() rcor = 1 tmpdf.loc[len(tmpdf.index)] = { 'std_oggm': refmb.OGGM.std(), 'std_ref': ref_std, 'bias': bias, 'rmse': rmse, 'core': rcor } if not major: # store cross validated values full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0]) full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0] full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0] full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0] # and store mean values std_quot = np.mean(tmpdf.std_oggm / tmpdf.std_ref) xval.loc[len(xval.index)] = { 'prcpsf': cfg.PARAMS['prcp_scaling_factor'], 'tliq': cfg.PARAMS['temp_all_liq'], 'tmelt': cfg.PARAMS['temp_melt'], 'tgrad': cfg.PARAMS['temp_default_gradient'], 'std_quot': std_quot, 'bias': tmpdf['bias'].mean(), 'rmse': tmpdf['rmse'].mean(), 'core': tmpdf['core'].mean() } if major: return xval else: for i, rid in enumerate(full_ref_df.index): # the glacier to look at gdir = full_ref_df.loc[full_ref_df.index == rid] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # Compute the distance distances = utils.haversine(gdir.lon.values[0], gdir.lat.values[0], tmp_ref_df.lon, tmp_ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = tmp_ref_df.iloc[aso] distances = distances[aso]**2 interp = np.average(amin.mustar, weights=1. / distances) full_ref_df.loc[rid, 'interp_mustar'] = interp # write file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') full_ref_df.to_csv(file) # alternative: do not write csv file, but store the needed values # within xval_minor_statistics return xval
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Assumes pre-processed directories with following tasks completed:: - define_glacier_region - gis.glacier_masks - climate.distribute_climate_data - climate.mu_candidates """ log.info('Compute the reference t* and mu*') # Loop mbdatadir = os.path.join(os.path.dirname(cfg.paths['wgms_rgi_links']), 'WGMS') only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in gdirs: reff = os.path.join(mbdatadir, 'mbdata_' + gdir.rgi_id + '.csv') mbdf = pd.read_csv(reff).set_index('YEAR') t_star, res_bias = t_star_from_refmb(gdir, mbdf['ANNUAL_BALANCE']) if len(t_star) == 1: only_one.append(gdir.rgi_id) per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias) if len(only_one) == 0: # TODO: hardcoded shit here only_one.append('RGI40-11.00887') gdir, t_star, res_bias = per_glacier['RGI40-11.00887'] per_glacier['RGI40-11.00887'] = (gdir, [t_star[-1]], [res_bias[-1]]) # raise RuntimeError('Didnt expect to be here.') # Ok. now loop over the glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir, t_star, res_bias = per_glacier[id] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir, _, _ = per_glacier[id_o] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* gdir, t_star, res_bias = per_glacier[ids_left[np.argmin(distances)]] distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]]) only_one.append(gdir.rgi_id) # Write out the data rgis_ids = [] t_stars = [] biases = [] lons = [] lats = [] for id, (gdir, t_star, res_bias) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['tstar'] = t_stars df['bias'] = biases df['lon'] = lons df['lat'] = lats file = os.path.join(cfg.paths['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def crossval_t_stars(gdirs): """Cross-validate the interpolation of tstar to each individual glacier. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Cross-validate the t* and mu* determination') full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv'), index_col=0) rgdirs = _get_ref_glaciers(gdirs) for rid in full_ref_df.index: ref_df = full_ref_df.drop(rid, axis=0) gdir = [g for g in rgdirs if g.rgi_id == rid][0] # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # Weighted average tstar = int(np.average(amin.tstar, weights=1./distances)) prcp_fac = np.average(amin.prcp_fac, weights=1./distances) bias = np.average(amin.bias, weights=1./distances) # For fun (Marzeion et al 2012), get the interpolated mu agdirs = [g for g in rgdirs if g.rgi_id in amin.index] amustars = [] for agdir in agdirs: tdf = pd.read_csv(agdir.get_filepath('local_mustar')) amustars.append(tdf['mu_star'].values[0]) mu_interp = np.average(amustars, weights=1./distances) # Go, but store the previous calib first bef_df = pd.read_csv(gdir.get_filepath('local_mustar')) local_mustar_apparent_mb(gdir, tstar=tstar, bias=bias, prcp_fac=prcp_fac, compute_apparent_mb=False) rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'mustar'] = bef_df['mu_star'].values[0] np.testing.assert_allclose(full_ref_df.loc[rid, 'bias'], bef_df['bias'].values[0]) np.testing.assert_allclose(full_ref_df.loc[rid, 'prcp_fac'], bef_df['prcp_fac'].values[0]) full_ref_df.loc[rid, 'cv_muinterp'] = mu_interp full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0]) full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0] full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0] full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0] assert tstar == rdf['t_star'].values[0] # Restore the calib local_mustar_apparent_mb(gdir, tstar=bef_df['t_star'].values[0], bias=bef_df['bias'].values[0], prcp_fac=bef_df['prcp_fac'].values[0], compute_apparent_mb=False) # stats and write full_ref_df['diff_muinterp'] = full_ref_df['cv_muinterp'] - \ full_ref_df['mustar'] full_ref_df['diff_tinterp'] = full_ref_df['cv_mustar'] - \ full_ref_df['mustar'] file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') full_ref_df.to_csv(file)
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Reference glaciers only if in the list and period is good ref_gdirs = _get_ref_glaciers(gdirs) # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir) # list of mus compatibles with refmb mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE'] t_star, res_bias, prcp_fac = t_star_from_refmb(gdir, mbdf) # store the mb (could be useful later) gdir.write_pickle(mbdf, 'ref_massbalance') # if we have just one candidate this is good if len(t_star) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, t_star, res_bias, prcp_fac) # At least of of the X glaciers should have a single t*, otherwise we dont # know how to start if len(only_one) == 0: flink, mbdatadir = utils.get_wgms_files() if os.path.basename(os.path.dirname(flink)) == 'test-workflow': # TODO: hardcoded stuff here, for the test workflow only_one.append('RGI40-11.00897') gdir, t_star, res_bias, prcp_fac = per_glacier['RGI40-11.00897'] per_glacier['RGI40-11.00897'] = (gdir, [t_star[-1]], [res_bias[-1]], prcp_fac) else: raise RuntimeError('We need at least one glacier with one ' 'tstar only.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir = per_glacier[id][0] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir = per_glacier[id_o][0] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* pg = per_glacier[ids_left[np.argmin(distances)]] gdir, t_star, res_bias, prcp_fac = pg distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]], prcp_fac) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, prcp_facs, biases, lons, lats = [], [], [], [], [], [] for id, (gdir, t_star, res_bias, prcp_fac) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) prcp_facs.append(prcp_fac) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['lon'] = lons df['lat'] = lats df['tstar'] = t_stars df['prcp_fac'] = prcp_facs df['bias'] = biases file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def quick_crossval_t_stars(gdirs): """Cross-validate the interpolation of tstar to each individual glacier. This version does NOT recompute the precipitation scaling factor at each round (this quite OK to do so) Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Cross-validate the t* and mu* determination') rgdirs = utils.get_ref_mb_glaciers(gdirs) # This might be redundant but we redo the calc here with utils.DisableLogger(): compute_ref_t_stars(rgdirs) full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv'), index_col=0) with utils.DisableLogger(): distribute_t_stars(rgdirs) n = len(full_ref_df) for i, rid in enumerate(full_ref_df.index): # log.info('Cross-validation iteration {} of {}'.format(i+1, n)) # the glacier to look at gdir = [g for g in rgdirs if g.rgi_id == rid][0] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # before the cross-val we can get the info about "real" mustar rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'mustar'] = rdf['mu_star'].values[0] # redo the computations with utils.DisableLogger(): distribute_t_stars([gdir], ref_df=tmp_ref_df) # store rdf = pd.read_csv(gdir.get_filepath('local_mustar')) full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0]) full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0] full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0] full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0] # Reproduce Ben's figure for i, rid in enumerate(full_ref_df.index): # the glacier to look at gdir = full_ref_df.loc[full_ref_df.index == rid] # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid] # Compute the distance distances = utils.haversine(gdir.lon.values[0], gdir.lat.values[0], tmp_ref_df.lon, tmp_ref_df.lat) # Take the 10 closests aso = np.argsort(distances)[0:9] amin = tmp_ref_df.iloc[aso] distances = distances[aso]**2 interp = np.average(amin.mustar, weights=1. / distances) full_ref_df.loc[rid, 'interp_mustar'] = interp # write file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') full_ref_df.to_csv(file)
def compute_ref_t_stars(gdirs): """ Detects the best t* for the reference glaciers. Parameters ---------- gdirs: list of oggm.GlacierDirectory objects """ log.info('Compute the reference t* and mu* for WGMS glaciers') # Reference glaciers only if in the list and period is good ref_gdirs = _get_ref_glaciers(gdirs) sf = None if cfg.PARAMS['prcp_scaling_factor'] == 'stddev': sf = _get_optimal_scaling_factor(ref_gdirs) # Loop only_one = [] # start to store the glaciers with just one t* per_glacier = dict() for gdir in ref_gdirs: # all possible mus mu_candidates(gdir, prcp_sf=sf) # list of mus compatibles with refmb mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE'] res = t_star_from_refmb(gdir, mbdf) # if we have just one candidate this is good if len(res['t_star']) == 1: only_one.append(gdir.rgi_id) # this might be more than one, we'll have to select them later per_glacier[gdir.rgi_id] = (gdir, res['t_star'], res['bias'], res['prcp_fac']) # At least one of the glaciers should have a single t*, otherwise we don't # know how to start if len(only_one) == 0: if 'RGI50-11.00897' in per_glacier: # TODO: hardcoded stuff here, for the test workflow only_one.append('RGI50-11.00897') gdir, t_star, res_bias, prcp_fac = per_glacier['RGI50-11.00897'] per_glacier['RGI50-11.00897'] = (gdir, [t_star[-1]], [res_bias[-1]], prcp_fac) elif 'RGI40-11.00897' in per_glacier: # TODO: hardcoded stuff here, for the test workflow only_one.append('RGI40-11.00897') gdir, t_star, res_bias, prcp_fac = per_glacier['RGI40-11.00897'] per_glacier['RGI40-11.00897'] = (gdir, [t_star[-1]], [res_bias[-1]], prcp_fac) else: raise RuntimeError('We need at least one glacier with one ' 'tstar only.') log.info('%d out of %d have only one possible t*. Start from here', len(only_one), len(ref_gdirs)) # Ok. now loop over the nearest glaciers until all have a unique t* while True: ids_left = [id for id in per_glacier.keys() if id not in only_one] if len(ids_left) == 0: break # Compute the summed distance to all glaciers with one t* distances = [] for id in ids_left: gdir = per_glacier[id][0] lon, lat = gdir.cenlon, gdir.cenlat ldis = 0. for id_o in only_one: ogdir = per_glacier[id_o][0] ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat) distances.append(ldis) # Take the shortest and choose the best t* pg = per_glacier[ids_left[np.argmin(distances)]] gdir, t_star, res_bias, prcp_fac = pg distances = [] for tt in t_star: ldis = 0. for id_o in only_one: _, ot_star, _, _ = per_glacier[id_o] ldis += np.abs(tt - ot_star) distances.append(ldis) amin = np.argmin(distances) per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]], prcp_fac) only_one.append(gdir.rgi_id) # Write out the data rgis_ids, t_stars, prcp_facs, biases, lons, lats = [], [], [], [], [], [] for id, (gdir, t_star, res_bias, prcp_fac) in per_glacier.items(): rgis_ids.append(id) t_stars.append(t_star[0]) prcp_facs.append(prcp_fac) biases.append(res_bias[0]) lats.append(gdir.cenlat) lons.append(gdir.cenlon) df = pd.DataFrame(index=rgis_ids) df['lon'] = lons df['lat'] = lats df['tstar'] = t_stars df['prcp_fac'] = prcp_facs df['bias'] = biases file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') df.sort_index().to_csv(file)
def local_t_star(gdir, *, ref_df=None, tstar=None, bias=None): """Compute the local t* and associated glacier-wide mu*. If ``tstar`` and ``bias`` are not provided, they will be interpolated from the reference t* list. Note: the glacier wide mu* is here just for indication. It might be different from the flowlines' mu* in some cases. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process ref_df : :py:class:`pandas.DataFrame`, optional replace the default calibration list with your own. tstar: int, optional the year where the glacier should be equilibrium bias: float, optional the associated reference bias """ # Relevant mb params params = [ 'temp_default_gradient', 'temp_all_solid', 'temp_all_liq', 'temp_melt', 'prcp_scaling_factor' ] if tstar is None or bias is None: # Do our own interpolation if ref_df is None: if not cfg.PARAMS['run_mb_calibration']: # Make some checks and use the default one climate_info = gdir.read_json('climate_info') source = climate_info['baseline_climate_source'] ok_source = ['CRU TS4.01', 'CRU TS3.23', 'HISTALP'] if not np.any(s in source.upper() for s in ok_source): msg = ('If you are using a custom climate file you should ' 'run your own MB calibration.') raise MassBalanceCalibrationError(msg) v = gdir.rgi_version[0] # major version relevant # Check that the params are fine s = 'cru4' if 'CRU' in source else 'histalp' vn = 'oggm_ref_tstars_rgi{}_{}_calib_params'.format(v, s) for k in params: if cfg.PARAMS[k] != cfg.PARAMS[vn][k]: msg = ('The reference t* you are trying to use was ' 'calibrated with different MB parameters. You ' 'might have to run the calibration manually.') raise MassBalanceCalibrationError(msg) ref_df = cfg.PARAMS['oggm_ref_tstars_rgi{}_{}'.format(v, s)] else: # Use the the local calibration fp = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv') ref_df = pd.read_csv(fp) # Compute the distance to each glacier distances = utils.haversine(gdir.cenlon, gdir.cenlat, ref_df.lon, ref_df.lat) # Take the 10 closest aso = np.argsort(distances)[0:9] amin = ref_df.iloc[aso] distances = distances[aso]**2 # If really close no need to divide, else weighted average if distances.iloc[0] <= 0.1: tstar = amin.tstar.iloc[0] bias = amin.bias.iloc[0] else: tstar = int(np.average(amin.tstar, weights=1. / distances)) bias = np.average(amin.bias, weights=1. / distances) # Add the climate related params to the GlacierDir to make sure # other tools cannot fool around without re-calibration out = gdir.read_json('climate_info') out['mb_calib_params'] = {k: cfg.PARAMS[k] for k in params} gdir.write_json(out, 'climate_info') # We compute the overall mu* here but this is mostly for testing # Climate period mu_hp = int(cfg.PARAMS['mu_star_halfperiod']) yr = [tstar - mu_hp, tstar + mu_hp] # Do we have a calving glacier? cmb = calving_mb(gdir) log.info('(%s) local mu* computation for t*=%d', gdir.rgi_id, tstar) # Get the corresponding mu years, temp_yr, prcp_yr = mb_yearly_climate_on_glacier(gdir, year_range=yr) assert len(years) == (2 * mu_hp + 1) # mustar is taking calving into account (units of specific MB) mustar = (np.mean(prcp_yr) - cmb) / np.mean(temp_yr) if not np.isfinite(mustar): raise MassBalanceCalibrationError('{} has a non finite ' 'mu'.format(gdir.rgi_id)) # Clip it? if cfg.PARAMS['clip_mu_star']: mustar = utils.clip_min(mustar, 0) # If mu out of bounds, raise if not (cfg.PARAMS['min_mu_star'] <= mustar <= cfg.PARAMS['max_mu_star']): raise MassBalanceCalibrationError('mu* out of specified bounds: ' '{:.2f}'.format(mustar)) # Scalars in a small dict for later df = dict() df['rgi_id'] = gdir.rgi_id df['t_star'] = int(tstar) df['bias'] = bias df['mu_star_glacierwide'] = mustar gdir.write_json(df, 'local_mustar')