for i, gdir in enumerate(gdirs): print('Cross-validation iteration {} of {}'.format(i + 1, len(ref_df))) # Now recalibrate the model blindly tmp_ref_df = ref_df.loc[ref_df.index != gdir.rgi_id] tasks.local_t_star(gdir, ref_df=tmp_ref_df) tasks.mu_star_calibration(gdir) # Mass-balance model with cross-validated parameters instead mb_mod = MultipleFlowlineMassBalance(gdir, mb_model_class=PastMassBalance, use_inversion_flowlines=True) # Mass-balance timeseries, observed and simulated refmb = gdir.get_ref_mb_data().copy() refmb['OGGM'] = mb_mod.get_specific_mb(year=refmb.index) # Compare their standard deviation std_ref = refmb.ANNUAL_BALANCE.std() rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1] if std_ref == 0: # I think that such a thing happens with some geodetic values std_ref = refmb.OGGM.std() rcor = 1 # Store the scores ref_df.loc[gdir.rgi_id, 'CV_MB_BIAS'] = (refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean()) ref_df.loc[gdir.rgi_id, 'CV_MB_SIGMA_BIAS'] = (refmb.OGGM.std() / std_ref) ref_df.loc[gdir.rgi_id, 'CV_MB_COR'] = rcor
if refmb.index[-1]>years[-1]: mod.run_until(t_0) tasks.run_from_climate_data(gdir, ys=t_0, ye=refmb.index[-1], init_model_fls=copy.deepcopy(mod.fls), output_filesuffix='_until_refmb', bias=bias) mod = FileModel(gdir.get_filepath('model_run', filesuffix='_until_refmb')) # get mass balance from volume difference df.loc[:-1, 'OGGM_dv'] = mod.volume_m3_ts().diff() * cfg.PARAMS['ice_density'] / mod.area_m2_ts() df = df.shift(-1) for yr in mod.volume_km3_ts().index: mod.run_until(yr) mb = MultipleFlowlineMassBalance(gdir,fls=copy.deepcopy( mod.fls), mb_model_class=PastMassBalance, bias=bias) df.loc[yr, 'OGGM_mb']=mb.get_specific_mb(year=[mod.yr]) df.loc[:, 'WGMS'] = refmb.ANNUAL_BALANCE df.index = df.index.astype(int) # difference between Mass Balance and volume delta rmse_d = np.sqrt(((df.OGGM_mb-df.OGGM_dv)**2).mean()) max_d = (df.OGGM_mb-df.OGGM_dv).abs().max() delta_diff.loc[gdir.rgi_id, 'region'] = REGION delta_diff.loc[gdir.rgi_id, 'rmse'] = rmse_d delta_diff.loc[gdir.rgi_id, 'max_diff'] = max_d delta_diff.loc[gdir.rgi_id, 'temp_bias'] = temp_bias # difference between modelled and observed mass balance df = df.dropna(subset=['WGMS']) rmse = np.sqrt(((df.WGMS - df.OGGM_mb) ** 2).mean())
# gdirs.remove(gdir) # define year range years = np.arange(1903, 2020) # create dataframe to store results mb_result = pd.DataFrame() # Flowline Mass Balance from oggm.core.massbalance import MultipleFlowlineMassBalance, PastMassBalance for gdir in gdirs: rgi_id = gdir.rgi_id mbmod = MultipleFlowlineMassBalance(gdir, use_inversion_flowlines=True, mb_model_class=PastMassBalance) mb_ts = mbmod.get_specific_mb(year=years) # get mass balance # create dataframe of mb per year temp_df = pd.DataFrame({'mb': mb_ts}, index=years) # read geodetic mb for the glacier test = geodmb[geodmb['RGIId'] == rgi_id] # get mm w.e. per year mmwe5385 = test['dmwe_53_85'].loc[test.index[0]] * 1000 / 32 mmwe8516 = test['dmwe_85_16'].loc[test.index[0]] * 1000 / 31 # avg difference to oggm mb per period. # Note hydrological year in OGGM. davg5385 = np.average(temp_df['mb'].loc[1954:1986]) - mmwe5385 davg8516 = np.average(temp_df['mb'].loc[1986:2017]) - mmwe8516 # add difference and compute corrected mb to columns
def minor_xval_statistics(gdirs): # initialize the pandas dataframes # to store mass balances of every glacier mbdf = pd.DataFrame([], index=np.arange(1850, 2050)) # Cross-validation file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv') cvdf = pd.read_csv(file, index_col=0) # dataframe output xval = pd.DataFrame([], columns=[ 'RGIId', 'Name', 'tstar_bias', 'xval_bias', 'interp_bias', 'mustar', 'tstar', 'xval_mustar', 'xval_tstar', 'interp_mustar' ]) for gd in gdirs: t_cvdf = cvdf.loc[gd.rgi_id] # heights, widths = gd.get_inversion_flowline_hw() # Observed mass-blance refmb = gd.get_ref_mb_data().copy() # Mass-balance model with cross-validated parameters instead # use the cross validated flowline mustars: cv_fls = [col for col in t_cvdf.index if 'cv_mustar_flowline' in col] cv_fls.sort() mustarlist = t_cvdf[cv_fls].sort_index().dropna().tolist() mb_mod = MultipleFlowlineMassBalance(gd, mu_star=mustarlist, bias=t_cvdf.cv_bias, use_inversion_flowlines=True) refmb['OGGM_cv'] = mb_mod.get_specific_mb(year=refmb.index) # Compare their standard deviation std_ref = refmb.ANNUAL_BALANCE.std() rcor = np.corrcoef(refmb.OGGM_cv, refmb.ANNUAL_BALANCE)[0, 1] if std_ref == 0: # I think that such a thing happens with some geodetic values std_ref = refmb.OGGM_cv.std() rcor = 1 # Store the scores cvdf.loc[gd.rgi_id, 'CV_MB_BIAS'] = (refmb.OGGM_cv.mean() - refmb.ANNUAL_BALANCE.mean()) cvdf.loc[gd.rgi_id, 'CV_MB_SIGMA_BIAS'] = (refmb.OGGM_cv.std() / std_ref) cvdf.loc[gd.rgi_id, 'CV_MB_COR'] = rcor # Mass-balance model with interpolated mu_star mb_mod = MultipleFlowlineMassBalance(gd, mu_star=t_cvdf.interp_mustar, bias=t_cvdf.cv_bias, use_inversion_flowlines=True) refmb['OGGM_mu_interp'] = mb_mod.get_specific_mb(year=refmb.index) cvdf.loc[gd.rgi_id, 'INTERP_MB_BIAS'] = (refmb.OGGM_mu_interp.mean() - refmb.ANNUAL_BALANCE.mean()) # Mass-balance model with best guess tstar mu_fls = [ col for col in t_cvdf.index if ('mustar_flowline' in col) and ('cv_' not in col) ] mu_fls.sort() mustarlist = t_cvdf[mu_fls].sort_index().dropna().tolist() mb_mod = MultipleFlowlineMassBalance(gd, mu_star=mustarlist, bias=t_cvdf.bias, use_inversion_flowlines=True) refmb['OGGM_tstar'] = mb_mod.get_specific_mb(year=refmb.index) cvdf.loc[gd.rgi_id, 'tstar_MB_BIAS'] = (refmb.OGGM_tstar.mean() - refmb.ANNUAL_BALANCE.mean()) # Pandas DataFrame Output # # 1. statistics tbias = cvdf.loc[gd.rgi_id, 'tstar_MB_BIAS'] xbias = cvdf.loc[gd.rgi_id, 'CV_MB_BIAS'] ibias = cvdf.loc[gd.rgi_id, 'INTERP_MB_BIAS'] xval = xval.append( { 'Name': gd.name, 'RGIId': gd.rgi_id, 'tstar_bias': tbias, 'xval_bias': xbias, 'interp_bias': ibias, # TODO wie mach ich das mit den Flowline Mus hier? 'mustar': t_cvdf.mu_star_glacierwide, 'tstar': t_cvdf.tstar, 'xval_mustar': t_cvdf.cv_mu_star_glacierwide, 'xval_tstar': t_cvdf.cv_t_star, 'interp_mustar': t_cvdf.interp_mustar }, ignore_index=True) # # 2. mass balance timeseries mbarray = np.dstack( (refmb.ANNUAL_BALANCE, refmb.OGGM_tstar, refmb.OGGM_cv)).squeeze() mbdf_add = pd.DataFrame( mbarray, columns=[[gd.rgi_id, gd.rgi_id, gd.rgi_id], ['measured', 'calibrated', 'crossvalidated']], index=refmb.index) mbdf = pd.concat([mbdf, mbdf_add], axis=1) mbdf.columns = pd.MultiIndex.from_tuples(mbdf.columns) mbdf = mbdf.dropna(how='all') xval.index = xval.RGIId return xval, mbdf
def quick_crossval_entity(gdir, full_ref_df=None): tmpdf = pd.DataFrame( [], columns=['std_oggm', 'std_ref', 'rmse', 'core', 'bias']) # the reference glaciers tmp_ref_df = full_ref_df.loc[full_ref_df.index != gdir.rgi_id] # before the cross-val store the info about "real" mustar ref_rdf = gdir.read_json('local_mustar') tasks.local_t_star(gdir, ref_df=tmp_ref_df) tasks.mu_star_calibration(gdir) # read crossvalidated values cv_rdf = gdir.read_json('local_mustar') # ---- # --- MASS-BALANCE MODEL mb_mod = MultipleFlowlineMassBalance(gdir, use_inversion_flowlines=True) # Mass-balance timeseries, observed and simulated refmb = gdir.get_ref_mb_data().copy() refmb['OGGM'] = mb_mod.get_specific_mb(year=refmb.index) # store single glacier results bias = refmb.OGGM.mean() - refmb.ANNUAL_BALANCE.mean() rmse = np.sqrt(np.mean(refmb.OGGM - refmb.ANNUAL_BALANCE)**2) rcor = np.corrcoef(refmb.OGGM, refmb.ANNUAL_BALANCE)[0, 1] ref_std = refmb.ANNUAL_BALANCE.std() # unclear how to treat this best if ref_std == 0: ref_std = refmb.OGGM.std() rcor = 1 tmpdf.loc[len(tmpdf.index)] = { 'std_oggm': refmb.OGGM.std(), 'std_ref': ref_std, 'bias': bias, 'rmse': rmse, 'core': rcor } # and store mean values out = { 'prcpsf': cfg.PARAMS['prcp_scaling_factor'], 'tliq': cfg.PARAMS['temp_all_liq'], 'tmelt': cfg.PARAMS['temp_melt'], 'tgrad': cfg.PARAMS['temp_default_gradient'], 'std_oggm': tmpdf.std_oggm.values[0], 'std_ref': tmpdf.std_ref.values[0], 'std_quot': np.nan, 'bias': tmpdf['bias'].mean(), 'rmse': tmpdf['rmse'].mean(), 'core': tmpdf['core'].mean() } # combine "real" mustar and crossvalidated mu_star # get rid of mu_star_per_flowline as list of flowlines is ugly to deal with for i, fl in enumerate(cv_rdf['mu_star_per_flowline']): cv_rdf['mustar_flowline_{:03d}'.format(i + 1)] = fl for i, fl in enumerate(ref_rdf['mu_star_per_flowline']): ref_rdf['mustar_flowline_{:03d}'.format(i + 1)] = fl del cv_rdf['mu_star_per_flowline'] del ref_rdf['mu_star_per_flowline'] for col in cv_rdf.keys(): if 'rgi_id' in col: continue ref_rdf['cv_' + col] = cv_rdf[col] return [out, ref_rdf]
# We store the associated params mb_calib = gdirs[0].read_pickle('climate_info')['mb_calib_params'] with open(os.path.join(WORKING_DIR, 'mb_calib_params.json'), 'w') as fp: json.dump(mb_calib, fp) # And also some statistics utils.compile_glacier_statistics(gdirs) # Tests: for all glaciers, the mass-balance around tstar and the # bias with observation should be approx 0 for gd in gdirs: mb_mod = MultipleFlowlineMassBalance(gd, mb_model_class=ConstantMassBalance, use_inversion_flowlines=True, bias=0) # bias=0 because of calib! mb = mb_mod.get_specific_mb() np.testing.assert_allclose(mb, 0, atol=5) # atol for numerical errors mb_mod = MultipleFlowlineMassBalance(gd, mb_model_class=PastMassBalance, use_inversion_flowlines=True) refmb = gd.get_ref_mb_data().copy() refmb['OGGM'] = mb_mod.get_specific_mb(year=refmb.index) np.testing.assert_allclose(refmb.OGGM.mean(), refmb.ANNUAL_BALANCE.mean(), atol=5) # atol for numerical errors # Log log.info('Calibration is done!')
# run climate related entity tasks try: # try statement allows to skip errors workflow.climate_tasks( gdirs) # Downloads some files on the first time! except: # if exception is raised, add ID to list and return to beginning of loop excludeIDs.append(rgiid) pass continue ### Mass balance ### from oggm.core.massbalance import MultipleFlowlineMassBalance mbmod = MultipleFlowlineMassBalance(gdir, use_inversion_flowlines=True) years = np.arange(1953, 2016) mb_ts = mbmod.get_specific_mb(year=years) #plt.plot(years, mb_ts); plt.ylabel('SMB (mm yr$^{-1}$)') ### Ice thickness ### list_talks = [ tasks.prepare_for_inversion, # This is a preprocessing task tasks.mass_conservation_inversion, # This does the actual job tasks. filter_inversion_output # This smoothes the thicknesses at the tongue a little ] for task in list_talks: workflow.execute_entity_task(task, gdirs) # plot #graphics.plot_inversion(gdirs, figsize=(8, 7))
def t_star_from_refmb(gdir, mbdf=None, glacierwide=None): """Computes the ref t* for the glacier, given a series of MB measurements. Parameters ---------- gdir : oggm.GlacierDirectory mbdf: a pd.Series containing the observed MB data indexed by year if None, read automatically from the reference data Returns ------- A dict: {t_star:[], bias:[]} """ from oggm.core.massbalance import MultipleFlowlineMassBalance if glacierwide is None: glacierwide = cfg.PARAMS['tstar_search_glacierwide'] # Be sure we have no marine terminating glacier assert not gdir.is_tidewater # Reference time series if mbdf is None: mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE'] # which years to look at ref_years = mbdf.index.values # Average oberved mass-balance ref_mb = np.mean(mbdf) # Compute one mu candidate per year and the associated statistics # Only get the years were we consider looking for tstar y0, y1 = cfg.PARAMS['tstar_search_window'] ci = gdir.read_json('climate_info') y0 = y0 or ci['baseline_hydro_yr_0'] y1 = y1 or ci['baseline_hydro_yr_1'] years = np.arange(y0, y1 + 1) ny = len(years) mu_hp = int(cfg.PARAMS['mu_star_halfperiod']) mb_per_mu = pd.Series(index=years) if glacierwide: # The old (but fast) method to find t* _, temp, prcp = mb_yearly_climate_on_glacier(gdir, year_range=[y0, y1]) # which years to look at selind = np.searchsorted(years, mbdf.index) sel_temp = temp[selind] sel_prcp = prcp[selind] sel_temp = np.mean(sel_temp) sel_prcp = np.mean(sel_prcp) for i, y in enumerate(years): # Ignore begin and end if ((i - mu_hp) < 0) or ((i + mu_hp) >= ny): continue # Compute the mu candidate t_avg = np.mean(temp[i - mu_hp:i + mu_hp + 1]) if t_avg < 1e-3: # if too cold no melt possible continue mu = np.mean(prcp[i - mu_hp:i + mu_hp + 1]) / t_avg # Apply it mb_per_mu[y] = np.mean(sel_prcp - mu * sel_temp) else: # The new (but slow) method to find t* # Compute mu for each 31-yr climatological period fls = gdir.read_pickle('inversion_flowlines') for i, y in enumerate(years): # Ignore begin and end if ((i - mu_hp) < 0) or ((i + mu_hp) >= ny): continue # Calibrate the mu for this year for fl in fls: fl.mu_star_is_valid = False try: # TODO: this is slow and can be highly optimised # it reads the same data over and over again _recursive_mu_star_calibration(gdir, fls, y, first_call=True) # Compute the MB with it mb_mod = MultipleFlowlineMassBalance(gdir, fls, bias=0, check_calib_params=False) mb_ts = mb_mod.get_specific_mb(fls=fls, year=ref_years) mb_per_mu[y] = np.mean(mb_ts) except MassBalanceCalibrationError: pass # Diff to reference diff = (mb_per_mu - ref_mb).dropna() if len(diff) == 0: raise MassBalanceCalibrationError('No single valid mu candidate for ' 'this glacier!') # Here we used to keep all possible mu* in order to later select # them based on some distance search algorithms. # (revision 81bc0923eab6301306184d26462f932b72b84117) # # As of Jul 2018, we will now stop this non-sense: # out of all mu*, let's just pick the one with the smallest bias. # It doesn't make much sense, but the same is true for other methods # as well -> this is how Ben used to do it, and he is clever # Another way would be to pick the closest to today or something amin = np.abs(diff).idxmin() # Write d = gdir.read_json('climate_info') d['t_star'] = amin d['bias'] = diff[amin] gdir.write_json(d, 'climate_info') return { 't_star': amin, 'bias': diff[amin], 'avg_mb_per_mu': mb_per_mu, 'avg_ref_mb': ref_mb }