def calc_gamma_map(): fname = r"D:\data_sets\MSWEP_V21\data\grid_new.csv" ascat = HSAF_io() mswep = MSWEP_io() mswep.grid['gamma'] = np.nan for i, (precip, info) in enumerate(mswep.iter_gp()): print(i) if len(precip.dropna()) == 0: continue try: precip = calc_anomaly(precip, method='harmonic', longterm=False) sm = calc_anomaly(ascat.read( info.dgg_gpi)['2007-01-01':'2016-12-31'], method='harmonic', longterm=False) ts = pd.concat((precip, sm), axis=1).values mswep.grid.loc[info.name, 'gamma'] = estimate_gamma(ts[:, 0], ts[:, 1]) except: continue mswep.grid.dropna().to_csv(fname)
def read_data(): i_lat = 750 i_lon = 750 ascat = HSAF_io() merra2 = Dataset('/Users/u0116961/data_sets/MERRA2/MERRA2_timeseries.nc4') with Dataset( '/Users/u0116961/data_sets/DMP_COPERNICUS/DMP_COPERNICUS_timeseries.nc' ) as ds: time = pd.DatetimeIndex( num2date(ds['time'][:], units=ds['time'].units, only_use_python_datetimes=True, only_use_cftime_datetimes=False)) dmp_ts = pd.DataFrame({'DMP': ds['DMP'][:, i_lat, i_lon]}, index=time) lat = ds['lat'][i_lat].data lon = ds['lon'][i_lon].data ind_lat = abs(merra2['lat'][:] - lat).argmin() ind_lon = abs(merra2['lon'][:] - lon).argmin() gpi_ascat = ascat.latlon2gpi(lat, lon) time = pd.DatetimeIndex( num2date(merra2['time'][:], units=merra2['time'].units, only_use_python_datetimes=True, only_use_cftime_datetimes=False)) df = pd.DataFrame( { 'time': time, 'sm': merra2['SFMC'][:, ind_lat, ind_lon], 'DMP': dmp_ts.reindex(time).values.flatten() / 10, 'sig40_ascat': ascat.read(gpi_ascat, resample_time=True, var='sigma40').reindex(time).values }, index=time) merra2.close() ascat.close() return df
def run_ascat_eval_part(part, parts, ref='ascat'): import numpy as np import pandas as pd from pathlib import Path from scipy.stats import pearsonr from pyldas.interface import GEOSldas_io from myprojects.readers.ascat import HSAF_io from myprojects.timeseries import calc_anom from validation_good_practice.ancillary.paths import Paths res_path = Path( '~/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/validation_all' ).expanduser() if not res_path.exists(): Path.mkdir(res_path, parents=True) result_file = res_path / ('ascat_eval_part%i.csv' % part) tc_res_pc = pd.read_csv( '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv', index_col=0) tc_res_nopc = pd.read_csv( '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/noPcorr/result.csv', index_col=0) lut = pd.read_csv(Paths().lut, index_col=0) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(lut) / parts).astype('int') subs[-1] = len(lut) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over lut = lut.iloc[start:end, :] root = Path('/Users/u0116961/data_sets/GEOSldas_runs') runs = [run.name for run in root.glob('*_DA_SMAP_*')] names = [run[20::] for run in runs] runs += ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_OL_noPcorr'] names += ['Pcorr_OL', 'noPcorr_OL'] # names = ['OL_Pcorr', 'OL_noPcorr'] + \ # [f'DA_{pc}_{err}' for pc in ['Pcorr','noPcorr'] for err in ['4K','abs','anom_lt','anom_lst','anom_st']] # runs = ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_OL_noPcorr' ] + \ # [f'NLv4_M36_US_DA_SMAP_{pc}_{err}' for pc in ['Pcorr','noPcorr'] for err in ['4K','abs','anom_lt','anom_lst','anom_st']] # names = ['OL_Pcorr', 'DA_Pcorr_LTST'] + \ # [f'DA_{pc}_{err}{mode}' for pc in ['Pcorr'] for err in ['4K','anom_lt', 'anom_lt_ScYH', 'anom_lst','anom_st'] for mode in ['', '_ScDY', '_ScYH']] # # runs = ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_DA_Pcorr_LTST'] + \ # [f'NLv4_M36_US_DA_SMAP_{pc}_{err}{mode}' for pc in ['Pcorr'] for err in ['4K','abs','anom_lt','anom_lst','anom_st'] for mode in ['', '_ScDY', '_ScYH']] dss = [ GEOSldas_io('tavg3_1d_lnr_Nt', run).timeseries if 'DA' in run else GEOSldas_io('SMAP_L4_SM_gph', run).timeseries for run in runs ] grid = GEOSldas_io('ObsFcstAna', runs[0]).grid ds_full = GEOSldas_io('SMAP_L4_SM_gph', 'NLv4_M36_US_OL_Pcorr').timeseries ds_full = ds_full.assign_coords( {'time': ds_full['time'].values + pd.to_timedelta('2 hours')}) ds_obs_smap = GEOSldas_io( 'ObsFcstAna', 'NLv4_M36_US_DA_SMAP_Pcorr_4K').timeseries['obs_obs'] modes = ['abs', 'anom_lt', 'anom_st', 'anom_lst'] ascat = HSAF_io() for cnt, (gpi, data) in enumerate(lut.iterrows()): print('%i / %i, gpi: %i' % (cnt, len(lut), gpi)) col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg']) row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg']) res = pd.DataFrame(index=(gpi, )) res['col'] = int(data.ease2_col) res['row'] = int(data.ease2_row) res['lcol'] = col res['lrow'] = row try: ts_ascat = ascat.read( data['ascat_gpi']).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue try: t_df_smap = ds_obs_smap.sel(species=[1, 2]).isel( lat=row, lon=col).to_pandas() t_ana = t_df_smap[~np.isnan(t_df_smap[1]) | ~np.isnan(t_df_smap[2])].index t_ana = pd.Series(1, index=t_ana).resample('1d').mean().dropna().index except: t_ana = pd.DatetimeIndex([]) var = 'sm_surface' for mode in modes: if mode == 'anom_lst': ts_ref = calc_anom(ts_ascat.copy(), mode='climatological').dropna() elif mode == 'anom_st': ts_ref = calc_anom(ts_ascat.copy(), mode='shortterm').dropna() elif mode == 'anom_lt': ts_ref = calc_anom(ts_ascat.copy(), mode='longterm').dropna() else: ts_ref = ts_ascat.dropna() for run, ts_model in zip(names, dss): try: if 'noPcorr' in run: r_asc = np.sqrt(tc_res_nopc.loc[ gpi, f'r2_grid_{mode}_m_ASCAT_tc_ASCAT_SMAP_CLSM']) r_mod = np.sqrt(tc_res_nopc.loc[ gpi, f'r2_grid_{mode}_m_CLSM_tc_ASCAT_SMAP_CLSM']) else: r_asc = np.sqrt(tc_res_pc.loc[ gpi, f'r2_grid_{mode}_m_ASCAT_tc_ASCAT_SMAP_CLSM']) r_mod = np.sqrt(tc_res_pc.loc[ gpi, f'r2_grid_{mode}_m_CLSM_tc_ASCAT_SMAP_CLSM']) except: r_asc = np.nan r_mod = np.nan ind_valid = ds_full.time.values[ (ds_full['snow_depth'][:, row, col].values == 0) & (ds_full['soil_temp_layer1'][:, row, col].values > 277.15)] ts_mod = ts_model[var][:, row, col].to_series() ts_mod.index += pd.to_timedelta('2 hours') ts_mod = ts_mod.reindex(ind_valid) if mode == 'anom_lst': ts_mod = calc_anom(ts_mod.copy(), mode='climatological').dropna() elif mode == 'anom_st': ts_mod = calc_anom(ts_mod.copy(), mode='shortterm').dropna() elif mode == 'anom_lt': ts_mod = calc_anom(ts_mod.copy(), mode='longterm').dropna() else: ts_mod = ts_mod.dropna() ts_mod = ts_mod.resample('1d').mean() if 'OL_' in run: res[f'r_tca_{run}_{mode}'] = r_mod tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna() res[f'len_{run}_{mode}'] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res[f'r_{run}_{mode}'] = r res[f'p_{run}_{mode}'] = p res[f'r_corr_{run}_{mode}'] = min(r / r_asc, 1) tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod }).reindex(t_ana).dropna() res[f'ana_len_{run}_{mode}'] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res[f'ana_r_{run}_{mode}'] = r res[f'ana_p_{run}_{mode}'] = p res[f'ana_r_corr_{run}_{mode}'] = min(r / r_asc, 1) if not result_file.exists(): res.to_csv(result_file, float_format='%0.3f') else: res.to_csv(result_file, float_format='%0.3f', mode='a', header=False)
def run_ascat_eval_smos_part(part, parts, ref='ascat'): periods = [ ['2010-04-01', '2020-04-01'], ['2010-04-01', '2015-04-01'], ['2015-04-01', '2020-04-01'], ['2010-04-01', '2012-10-01'], ['2012-10-01', '2015-04-01'], ['2015-04-01', '2017-10-01'], ['2017-10-01', '2020-04-01'], ] res_path = Path( f'~/Documents/work/MadKF/CLSM/SMOS40/validation/multiperiod/ascat' ).expanduser() if not res_path.exists(): Path.mkdir(res_path, parents=True) result_file = res_path / f'ascat_eval_smos_part{part}.csv' lut = pd.read_csv(Paths().lut, index_col=0) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(lut) / parts).astype('int') subs[-1] = len(lut) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over lut = lut.iloc[start:end, :] names = ['open_loop'] + [f'SMOS40_it62{i}' for i in range(1, 5)] runs = ['US_M36_SMOS40_TB_OL_noScl' ] + [f'US_M36_SMOS40_TB_MadKF_DA_it62{i}' for i in range(1, 5)] grid = LDAS_io('ObsFcstAna', runs[0]).grid dss_xhourly = [LDAS_io('xhourly', run).timeseries for run in runs] dss_obs_ana = [ LDAS_io('ObsFcstAna', run).timeseries['obs_ana'] for run in runs ] modes = ['absolute', 'longterm', 'shortterm'] ascat = HSAF_io() for cnt, (gpi, data) in enumerate(lut.iterrows()): print('%i / %i' % (cnt, len(lut))) col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg']) row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg']) res = pd.DataFrame(index=(gpi, )) res['col'] = int(data.ease2_col) res['row'] = int(data.ease2_row) res['lcol'] = col res['lrow'] = row try: ts_ascat = ascat.read( data['ascat_gpi'], resample_time=False).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue dfs = [ ds.sel(species=[1, 2]).isel( lat=row, lon=col).to_pandas().resample('1d').mean() for ds in dss_obs_ana ] idx = [df[np.any(~np.isnan(df), axis=1)].index for df in dfs] t_ana = idx[0].intersection(idx[1]).intersection(idx[2]).intersection( idx[3]) var = 'sm_surface' for mode in modes: if mode == 'absolute': ts_ref = ts_ascat.copy() else: ts_ref = calc_anom(ts_ascat.copy(), longterm=(mode == 'longterm')).dropna() for run, ts_model in zip(names, dss_xhourly): ind = (ts_model['snow_mass'][:, row, col].values == 0) & ( ts_model['soil_temp_layer1'][:, row, col].values > 277.15) ts_mod = ts_model[var][:, row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') if mode == 'absolute': ts_mod = ts_mod.dropna() else: ts_mod = calc_anom(ts_mod, longterm=mode == 'longterm').dropna() ts_mod = ts_mod.reindex(t_ana).dropna() for i, p in enumerate(periods): tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod })[p[0]:p[1]].dropna() res[f'p{i}_len_{run}_{mode}'] = len(tmp) r, p = pearsonr( tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res[f'p{i}_r_{run}_{mode}'] = r if not result_file.exists(): res.to_csv(result_file, float_format='%0.3f') else: res.to_csv(result_file, float_format='%0.3f', mode='a', header=False)
def EC_ascat_smap_ismn_ldas(): result_file = Path('/Users/u0116961/Documents/work/extended_collocation/ec_ascat_smap_ismn_ldas.csv') names = ['insitu', 'ascat', 'smap', 'ol', 'da'] combs = list(combinations(names, 2)) ds_ol = LDAS_io('xhourly', 'US_M36_SMAP_TB_OL_noScl').timeseries ds_da = LDAS_io('xhourly', 'US_M36_SMAP_TB_MadKF_DA_it11').timeseries ds_da_ana = LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_MadKF_DA_it11').timeseries['obs_ana'] tg = LDAS_io().grid.tilegrids modes = ['absolute','longterm','shortterm'] ismn = ISMN_io() ismn.list = ismn.list.iloc[70::] ascat = HSAF_io() smap = SMAP_io() lut = pd.read_csv(Paths().lut, index_col=0) i = 0 for meta, ts_insitu in ismn.iter_stations(surface_only=True): i += 1 logging.info('%i/%i' % (i, len(ismn.list))) try: if len(ts_insitu := ts_insitu['2015-04-01':'2020-04-01'].resample('1d').mean().dropna()) < 25: continue except: continue res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row colg = col + tg.loc['domain', 'i_offg'] # col / lon rowg = row + tg.loc['domain', 'j_offg'] # row / lat tmp_lut = lut[(lut.ease2_col == colg) & (lut.ease2_row == rowg)] if len(tmp_lut) == 0: continue gpi_smap = tmp_lut.index.values[0] gpi_ascat = tmp_lut.ascat_gpi.values[0] try: ts_ascat = ascat.read(gpi_ascat, resample_time=False).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue ts_smap = smap.read(gpi_smap) if (ts_ascat is None) | (ts_smap is None): continue ind = (ds_ol['snow_mass'][:, row, col].values == 0)&(ds_ol['soil_temp_layer1'][:, row, col].values > 277.15) ts_ol = ds_ol['sm_surface'][:, row, col].to_series().loc[ind].dropna() ts_ol.index += pd.to_timedelta('2 hours') ind = (ds_da['snow_mass'][:, row, col].values == 0)&(ds_da['soil_temp_layer1'][:, row, col].values > 277.15) ts_da = ds_da['sm_surface'][:, row, col].to_series().loc[ind].dropna() ts_da.index += pd.to_timedelta('2 hours') for mode in modes: if mode == 'absolute': ts_ins = ts_insitu.copy() ts_asc = ts_ascat.copy() ts_smp = ts_smap.copy() ts_ol = ts_ol.copy() ts_da = ts_da.copy() else: ts_ins = calc_anom(ts_ins.copy(), longterm=(mode=='longterm')).dropna() ts_asc = calc_anom(ts_asc.copy(), longterm=(mode == 'longterm')).dropna() ts_smp = calc_anom(ts_smp.copy(), longterm=(mode == 'longterm')).dropna() ts_ol = calc_anom(ts_ol.copy(), longterm=(mode == 'longterm')).dropna() ts_da = calc_anom(ts_da.copy(), longterm=(mode == 'longterm')).dropna() tmp = pd.DataFrame(dict(zip(names, [ts_ins, ts_asc, ts_smp, ts_ol, ts_da]))).dropna() corr = tmp.corr() ec_res = ecol(tmp[['insitu', 'ascat', 'smap', 'ol', 'da']], correlated=[['smap', 'ol'], ['smap', 'da'], ['ol', 'da']]) res[f'len_{mode}'] = len(tmp) for c in combs: res[f'corr_{"_".join(c)}'] = corr.loc[c] res[f'err_corr_smap_ol_{mode}'] = ec_res['err_corr_smap_ol'] res[f'err_corr_smap_da_{mode}'] = ec_res['err_corr_smap_da'] res[f'err_corr_ol_da_{mode}'] = ec_res['err_corr_ol_da'] if not result_file.exists(): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)
def TCA_insitu_evaluation(): result_file = r'D:\work\LDAS\2018-06_rmse_uncertainty\TCA_evaluation\validation.csv' noDA = LDAS_io('xhourly', 'US_M36_SMOS40_noDA_cal_scaled') DA_const_err = LDAS_io('xhourly', 'US_M36_SMOS40_DA_cal_scaled') DA_varia_err = LDAS_io('xhourly', 'US_M36_SMOS40_DA_cal_scl_errfile') t_ana = pd.DatetimeIndex( LDAS_io('ObsFcstAna', 'US_M36_SMOS40_DA_cal_scaled').timeseries.time. values).sort_values() ascat = HSAF_io() gpi_list = pd.read_csv( r"D:\data_sets\ASCAT\warp5_grid\pointlist_warp_conus.csv", index_col=0) ismn = ISMN_io(col_offs=noDA.grid.tilegrids.loc['domain', 'i_offg'], row_offs=noDA.grid.tilegrids.loc['domain', 'j_offg']) runs = ['noDA', 'DA_const_err', 'DA_varia_err'] tss = [noDA.timeseries, DA_const_err.timeseries, DA_varia_err.timeseries] variables = [ 'sm_surface', ] modes = [ 'absolute', ] for i, (meta, ts_insitu) in enumerate(ismn.iter_stations()): logging.info('%i/%i' % (i, len(ismn.list))) try: res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row gpi = lonlat2gpi(meta.lon, meta.lat, gpi_list) ts_asc = ascat.read(gpi, resample_time=False) if ts_asc is None: continue ts_asc.name = 'ascat' ts_asc = pd.DataFrame(ts_asc) for var in variables: for mode in modes: ts_ins = ts_insitu[var].dropna() ts_ins.name = 'insitu' ts_ins = pd.DataFrame(ts_ins) for run, ts_model in zip(runs, tss): ind = (ts_model['snow_mass'][row, col].values == 0) & ( ts_model['soil_temp_layer1'][row, col].values > 277.15) ts_mod = ts_model[var][row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') ts_mod = ts_mod.loc[t_ana].dropna() ts_mod.name = 'model' ts_mod = pd.DataFrame(ts_mod) matched = df_match(ts_mod, ts_asc, ts_ins, window=0.5) data = ts_mod.join(matched[0][[ 'ascat', ]]).join(matched[1][[ 'insitu', ]]).dropna() tc_res = TCA(data['model'].values, data['ascat'].values, data['insitu'].values) res['RMSE_model_' + run + '_' + mode + '_' + var] = tc_res[1][0] res['RMSE_ascat_' + run + '_' + mode + '_' + var] = tc_res[1][1] res['RMSE_insitu_' + run + '_' + mode + '_' + var] = tc_res[1][2] res['beta_ascat_' + run + '_' + mode + '_' + var] = tc_res[2][1] res['beta_insitu_' + run + '_' + mode + '_' + var] = tc_res[2][2] res['len_' + mode + '_' + var] = len(data) if (os.path.isfile(result_file) == False): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: continue
def reformat_ascat(): outfile_ts = '/data_sets/LIS/ASCAT/timeseries.nc' outfile_img = '/data_sets/LIS/ASCAT/images.nc' with rasterio.open('/data_sets/LIS/NoahMP_belgium/mask.tif') as ds: mask = np.flipud(ds.read()[0, :, :]) with Dataset('/data_sets/LIS/NoahMP_belgium/images.nc') as ds: lats = ds.variables['lat'][:, :] lons = ds.variables['lon'][:, :] timeunit = ds['time'].units dates = ds['time'][:] pydates = pd.to_datetime(num2date(dates, units=timeunit)) io = HSAF_io() gpis = pd.read_csv( '/data_sets/LIS/NoahMP_belgium/pointlist_Belgium_warp.csv', index_col=0) lats.mask[mask == 0] = True lons.mask[mask == 0] = True inds = np.where(~lats.mask) tmp_list = pd.DataFrame({ 'row': inds[0], 'col': inds[1], 'gpi': np.full(len(inds[0]), 0, dtype='int64'), 'cell': np.full(len(inds[0]), 0, dtype='int64') }) for idx, data in tmp_list.iterrows(): print('%i / %i' % (idx + 1, len(tmp_list))) r, c = data['row'], data['col'] gpi = ((gpis.lat - lats[r, c])**2 + (gpis.lon - lons[r, c])**2).idxmin() tmp_list.loc[idx, 'gpi'] = gpi tmp_list.loc[idx, 'cell'] = gpis.loc[gpi, 'cell'] tmp_list.to_csv('/data_sets/LIS/NoahMP_belgium/tmp_list.csv') # tmp_list = pd.read_csv('/data_sets/LIS/NoahMP_belgium/tmp_list.csv', index_col=0) with Dataset(outfile_ts, mode='w') as res: res.createDimension('lat', lats.shape[0]) res.createDimension('lon', lons.shape[1]) res.createDimension('time', len(dates)) res.createVariable('lat', ds['lat'].dtype, dimensions=('lat', 'lon'), chunksizes=(1, 1), zlib=True) res.createVariable('lon', ds['lon'].dtype, dimensions=('lat', 'lon'), chunksizes=(1, 1), zlib=True) res.createVariable('time', dates.dtype, dimensions=('time', ), chunksizes=(len(dates), ), zlib=True) res.variables['lat'][:, :] = lats res.variables['lon'][:, :] = lons res.variables['time'][:] = dates # Coordinate attributes following CF-conventions res.variables['time'].setncatts({ 'long_name': 'time', 'units': timeunit }) res.variables['lon'].setncatts({ 'long_name': 'longitude', 'units': 'degrees_east' }) res.variables['lat'].setncatts({ 'long_name': 'latitude', 'units': 'degrees_north' }) res.createVariable('SoilMoisture', 'float32', dimensions=('time', 'lat', 'lon'), chunksizes=(len(dates), 1, 1), zlib=True) res.variables['SoilMoisture'].setncatts({'missing_value': -9999}) i = 0 for cell in tmp_list['cell'].unique(): for gpi in tmp_list.loc[tmp_list['cell'] == cell, 'gpi'].unique(): print('%i / %i' % (i, len(tmp_list))) cell_gpi_list = tmp_list.loc[tmp_list['gpi'] == gpi] try: ts = io.read(gpi, resample_time=False).resample( '6h').mean().dropna()[pydates].values np.place(ts, np.isnan(ts), -9999) for idx, data in cell_gpi_list.iterrows(): i += 1 res.variables['SoilMoisture'][:, data['row'], data['col']] = ts except: print('gpi %i failed' % gpi) continue cmdBase = 'ncks -4 -L 4 --cnk_dmn time,1 --cnk_dmn lat,%i --cnk_dmn lon,%i ' % lats.shape cmd = ' '.join([cmdBase, outfile_ts, outfile_img]) os.system(cmd)
def run_ascat_eval_part(part, parts): res_path = Path( '/Users/u0116961/Documents/work/LDAS/2020-03_scaling/validation') result_file = res_path / ('ascat_eval_part%i.csv' % part) lut = pd.read_csv(Paths().lut, index_col=0) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(lut) / parts).astype('int') subs[-1] = len(lut) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over lut = lut.iloc[start:end, :] names = ['open_loop', 'SMOSSMAP_short', 'MadKF_SMOS40'] runs = [ 'US_M36_SMAP_TB_OL_noScl', 'US_M36_SMAP_TB_DA_scl_SMOSSMAP_short', 'US_M36_SMOS40_TB_MadKF_DA_it613' ] dss = [LDAS_io('xhourly', run).timeseries for run in runs] grid = LDAS_io().grid # t_ana = pd.DatetimeIndex(LDAS_io('ObsFcstAna', runs[0]).timeseries.time.values).sort_values() ds_obs_smap = (LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_OL_noScl').timeseries['obs_ana']) ds_obs_smos = (LDAS_io( 'ObsFcstAna', 'US_M36_SMOS40_TB_MadKF_DA_it613').timeseries['obs_ana']) modes = ['absolute', 'longterm', 'shortterm'] ascat = HSAF_io() for cnt, (gpi, data) in enumerate(lut.iterrows()): print('%i / %i' % (cnt, len(lut))) col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg']) row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg']) res = pd.DataFrame(index=(gpi, )) res['col'] = int(data.ease2_col) res['row'] = int(data.ease2_row) res['lcol'] = col res['lrow'] = row try: ts_ascat = ascat.read( data['ascat_gpi'], resample_time=False).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue t_df_smap = ds_obs_smap.sel(species=[1, 2]).isel(lat=row, lon=col).to_pandas() t_df_smos = ds_obs_smos.sel(species=[1, 2]).isel(lat=row, lon=col).to_pandas() t_ana_smap = t_df_smap[~np.isnan(t_df_smap[1]) | ~np.isnan(t_df_smap[2])].resample( '1d').mean().index t_ana_smos = t_df_smos[~np.isnan(t_df_smos[1]) | ~np.isnan(t_df_smos[2])].resample( '1d').mean().index var = 'sm_surface' for mode in modes: if mode == 'absolute': ts_ref = ts_ascat.copy() else: ts_ref = calc_anom(ts_ascat.copy(), longterm=(mode == 'longterm')).dropna() for run, ts_model in zip(names, dss): t_ana = t_ana_smos if run == 'MadKF_SMOS40' else t_ana_smap ind = (ts_model['snow_mass'][:, row, col].values == 0) & ( ts_model['soil_temp_layer1'][:, row, col].values > 277.15) ts_mod = ts_model[var][:, row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') if mode == 'absolute': ts_mod = ts_mod.dropna() else: ts_mod = calc_anom(ts_mod, longterm=mode == 'longterm').dropna() ts_mod = ts_mod.resample('1d').mean() tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna() res['len_' + run + '_' + mode] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res['r_' + run + '_' + mode] = r # res['p_' + run + '_' + mode] = p # res['rmsd_' + run + '_' + mode] = np.sqrt(((tmp[1] - tmp[2]) ** 2).mean()) res['ubrmsd_' + run + '_' + mode] = np.sqrt( (((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod }).reindex(t_ana).dropna() res['ana_len_' + run + '_' + mode] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res['ana_r_' + run + '_' + mode] = r # res['ana_p_' + run + '_' + mode] = p # res['ana_rmsd_' + run + '_' + mode] = np.sqrt(((tmp[1] - tmp[2]) ** 2).mean()) res['ana_ubrmsd_' + run + '_' + mode] = np.sqrt( (((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) if not result_file.exists(): res.to_csv(result_file, float_format='%0.3f') else: res.to_csv(result_file, float_format='%0.3f', mode='a', header=False)
def run(part): parts = 15 smos = SMOS_io() ismn = ISMN_io() ascat = HSAF_io(ext=None) mswep = MSWEP_io() # Median Q from MadKF API/CONUS run. Q_avg = 12. R_avg = 74. # Select only SCAN and USCRN ismn.list = ismn.list[(ismn.list.network == 'SCAN') | (ismn.list.network == 'USCRN')] ismn.list.index = np.arange(len(ismn.list)) # Split station list in 4 parts for parallelization subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int') subs[-1] = len(ismn.list) start = subs[part - 1] end = subs[part] ismn.list = ismn.list.iloc[start:end, :] if platform.system() == 'Windows': result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'ismn_eval', 'result_part%i.csv' % part) elif platform.system() == 'Linux': result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046', 'output', 'MadKF', 'CONUS', 'ismn_eval', 'result_part%i.csv' % part) else: result_file = os.path.join('/', 'work', 'MadKF', 'CONUS', 'ismn_eval', 'parts2', 'result_part%i.csv' % part) dt = ['2010-01-01', '2015-12-31'] for cnt, (station, insitu) in enumerate(ismn.iter_stations(surf_depth=0.1)): # station = ismn.list.loc[978,:] # insitu = ismn.read_first_surface_layer('SCAN','Los_Lunas_Pmc') print('%i / %i' % (cnt, len(ismn.list))) # if True: try: gpi = lonlat2gpi(station.lon, station.lat, mswep.grid) mswep_idx = mswep.grid.index[mswep.grid.dgg_gpi == gpi][0] smos_gpi = mswep.grid.loc[mswep_idx, 'smos_gpi'] precip = mswep.read(mswep_idx) sm_ascat = ascat.read(gpi) sm_smos = smos.read(smos_gpi) * 100. if (precip is None) | (sm_ascat is None) | (sm_smos is None) | ( insitu is None): continue precip = calc_anomaly(precip[dt[0]:dt[1]], method='moving_average', longterm=False) sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]], method='moving_average', longterm=False) sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]], method='moving_average', longterm=False) insitu = calc_anomaly(insitu[dt[0]:dt[1]].resample('1d').first(), method='moving_average', longterm=False).tz_localize(None) df = pd.DataFrame({ 1: precip, 2: sm_ascat, 3: sm_smos, 4: insitu }, index=pd.date_range(dt[0], dt[1])) df.loc[np.isnan(df[1]), 1] = 0. n = len(df) if len(df.dropna()) < 50: continue gamma = mswep.grid.loc[mswep_idx, 'gamma'] api = API(gamma=gamma) # --- OL run --- x_OL = np.full(n, np.nan) model = deepcopy(api) for t, f in enumerate(precip.values): x = model.step(f) x_OL[t] = x # ----- Calculate uncertainties ----- # convert (static) forcing to model uncertainty P_avg = Q_avg / (1 - gamma**2) # calculate TCA based uncertainty and scaling coefficients tmp_df = pd.DataFrame({ 1: x_OL, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])).dropna() snr, r_tc, err, beta = tc(tmp_df) P_TC = err[0]**2 Q_TC = P_TC * (1 - gamma**2) R_TC = (err[1] / beta[1])**2 H_TC = beta[1] # Calculate RMSD based uncertainty R_rmsd = (np.nanmean( (tmp_df[1].values - H_TC * tmp_df[2].values)**2) - P_avg) if R_rmsd < 0: R_rmsd *= -1 # ----------------------------------- # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=gamma, Q=Q_TC) x_kf, P, R_innov_kf, checkvar_kf, K_kf = \ KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC) # ----- Run EnKF using static uncertainties ----- forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_avg] x_avg, P, R_innov_avg, checkvar_avg, K_avg = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) ----- # forc_pert = ['normal', 'additive', Q_avg] # obs_pert = ['normal', 'additive', R_rmsd] # x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \ # EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run MadKF ----- cnt = 0 checkvar_madkf = 9999. while ((checkvar_madkf < 0.95) | (checkvar_madkf > 1.05)) & (cnt < 5): cnt += 1 tmp_x_madkf, P_madkf, R_madkf, Q_madkf, H_madkf, R_innov_madkf, tmp_checkvar_madkf, K_madkf = \ MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20) if abs(1 - tmp_checkvar_madkf) < abs(1 - checkvar_madkf): checkvar_madkf = tmp_checkvar_madkf x_madkf = tmp_x_madkf df['x_ol'] = x_OL df['x_kf'] = x_kf df['x_avg'] = x_avg # df['x_rmsd'] = x_rmsd df['x_madkf'] = x_madkf # tc_ol = tc(df[[4,3,'x_ol']]) # tc_kf = tc(df[[4,3,'x_kf']]) # tc_avg = tc(df[[4,3,'x_avg']]) # tc_rmsd = tc(df[[4,3,'x_rmsd']]) # tc_madkf = tc(df[[4,3,'x_madkf']]) ci_l_ol, ci_m_ol, ci_u_ol = bootstrap_tc(df[[4, 3, 'x_ol']]) ci_l_kf, ci_m_kf, ci_u_kf = bootstrap_tc(df[[4, 3, 'x_kf']]) ci_l_avg, ci_m_avg, ci_u_avg = bootstrap_tc(df[[4, 3, 'x_avg']]) # ci_l_rmsd, ci_m_rmsd, ci_u_rmsd = bootstrap_tc(df[[4,3,'x_rmsd']]) ci_l_madkf, ci_m_madkf, ci_u_madkf = bootstrap_tc( df[[4, 3, 'x_madkf']]) corr = df.dropna().corr() n_all = len(df.dropna()) result = pd.DataFrame( { 'lon': station.lon, 'lat': station.lat, 'network': station.network, 'station': station.station, 'gpi': gpi, 'n_all': n_all, 'Q_est_madkf': Q_madkf, 'R_est_madkf': R_madkf, 'corr_ol': corr[4]['x_ol'], 'corr_kf': corr[4]['x_kf'], 'corr_avg': corr[4]['x_avg'], # 'corr_rmsd': corr[4]['x_rmsd'], 'corr_madkf': corr[4]['x_madkf'], # 'snr_ol': tc_ol[0][2], # 'snr_kf': tc_kf[0][2], # 'snr_avg': tc_avg[0][2], # 'snr_rmsd': tc_rmsd[0][2], # 'snr_madkf': tc_madkf[0][2], # 'r_ol': tc_ol[1][2], # 'r_kf': tc_kf[1][2], # 'r_avg': tc_avg[1][2], # 'r_rmsd': tc_rmsd[1][2], # 'r_madkf': tc_madkf[1][2], # 'rmse_kf': tc_kf[2][2], # 'rmse_avg': tc_avg[2][2], # 'rmse_rmsd': tc_rmsd[2][2], # 'rmse_madkf': tc_madkf[2][2], # 'rmse_ol': tc_ol[2][2], 'r_ol_l': ci_l_ol, 'r_ol_m': ci_m_ol, 'r_ol_u': ci_u_ol, 'r_kf_l': ci_l_kf, 'r_kf_m': ci_m_kf, 'r_kf_u': ci_u_kf, 'r_avg_l': ci_l_avg, 'r_avg_m': ci_m_avg, 'r_avg_u': ci_u_avg, # 'r_rmsd_l': ci_l_rmsd, # 'r_rmsd_m': ci_m_rmsd, # 'r_rmsd_u': ci_u_rmsd, 'r_madkf_l': ci_l_madkf, 'r_madkf_m': ci_m_madkf, 'r_madkf_u': ci_u_madkf, 'checkvar_kf': checkvar_kf, 'checkvar_avg': checkvar_avg, # 'checkvar_rmsd': checkvar_rmsd, 'checkvar_madkf': checkvar_madkf, 'R_innov_kf': R_innov_kf, 'R_innov_avg': R_innov_avg, # 'R_innov_rmsd': R_innov_rmsd, 'R_innov_madkf': R_innov_madkf }, index=(station.name, )) if (os.path.isfile(result_file) == False): result.to_csv(result_file, float_format='%0.4f') else: result.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: print('GPI failed.') continue ascat.close() mswep.close()
def plot_suspicious_stations(root): statlist = pd.read_csv('/Users/u0116961/Documents/work/MadKF/CLSM/suspicious_stations/station_list_r_diff.csv', index_col=0) rmsd_root = 'US_M36_SMAP_TB_DA_SM_PROXY_' rmsd_exps = list(np.sort([x.name.split(rmsd_root)[1] for x in Path('/Users/u0116961/data_sets/LDASsa_runs').glob('*SM_PROXY*')])) ds_ol = LDAS_io('xhourly', 'US_M36_SMAP_TB_OL_scaled_4K_obserr').timeseries ds_da = LDAS_io('xhourly', 'US_M36_SMAP_TB_DA_scaled_4K_obserr').timeseries ts_ana = LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_DA_scaled_4K_obserr').timeseries['obs_obs'] t_ana = pd.DatetimeIndex(ts_ana.time.values).sort_values() ascat = HSAF_io() gpi_list = pd.read_csv(ascat.root / 'warp5_grid' / 'pointlist_warp_conus.csv', index_col=0) ismn = ISMN_io() variables = ['sm_surface', 'sm_rootzone'] modes = ['absolute', 'longterm', 'shortterm'] ismn.list.index = ismn.list.network + '_' + ismn.list.station ismn.list.reindex(statlist.index) ismn.list = ismn.list.reindex(statlist.index) for i, (meta, ts_insitu) in enumerate(ismn.iter_stations(surface_only=False)): if 'tmp_res' in locals(): if (meta.network in tmp_res) & (meta.station in tmp_res): print(f'Skipping {i}') continue try: res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row gpi = lonlat2gpi(meta.lon, meta.lat, gpi_list) ts_ascat = ascat.read(gpi) / 100 * 0.6 if ts_ascat is None: continue for mode in modes: for var in variables: tmp = statlist[(statlist.network==meta.network)&(statlist.station==meta.station)] dpr = tmp[f'diff_pearsonr2_{mode}_{var}'].values[0] dtr = tmp[f'diff_tcar2_{mode}_{var}'].values[0] if not ((dtr < 0) & (dpr > 0)): continue if mode == 'absolute': ts_asc = ts_ascat.dropna() else: ts_asc = calc_anom(ts_ascat, longterm=(mode == 'longterm')).dropna() ts_asc.name = 'ascat' ts_asc = pd.DataFrame(ts_asc) if mode == 'absolute': ts_ins = ts_insitu[var].dropna() else: ts_ins = calc_anom(ts_insitu[var], longterm=(mode == 'longterm')).dropna() ts_ins.name = 'insitu' ts_ins = pd.DataFrame(ts_ins) ind = (ds_ol['snow_mass'].isel(lat=row, lon=col).values == 0) & \ (ds_ol['soil_temp_layer1'].isel(lat=row, lon=col).values > 277.15) ts_ol = ds_ol[var].isel(lat=row, lon=col).to_series().loc[ind] ts_ol.index += pd.to_timedelta('2 hours') ind_obs = np.bitwise_or.reduce(~np.isnan(ts_ana[:, :, row, col].values), 1) if mode == 'absolute': ts_ol = ts_ol.reindex(t_ana[ind_obs]).dropna() else: ts_ol = calc_anom(ts_ol.reindex(t_ana[ind_obs]), longterm=(mode == 'longterm')).dropna() ts_ol.name = 'open_loop' ts_ol = pd.DataFrame(ts_ol) ind = (ds_da['snow_mass'].isel(lat=row, lon=col).values == 0) & \ (ds_da['soil_temp_layer1'].isel(lat=row, lon=col).values > 277.15) ts_da = ds_da[var].isel(lat=row, lon=col).to_series().loc[ind] ts_da.index += pd.to_timedelta('2 hours') ind_obs = np.bitwise_or.reduce(~np.isnan(ts_ana[:, :, row, col].values), 1) if mode == 'absolute': ts_da = ts_da.reindex(t_ana[ind_obs]).dropna() else: ts_da = calc_anom(ts_da.reindex(t_ana[ind_obs]), longterm=(mode == 'longterm')).dropna() ts_da.name = 'DA_4K' ts_da = pd.DataFrame(ts_da) matched = df_match(ts_ol, ts_da, ts_asc, ts_ins, window=0.5) data = ts_ol.join(matched[0]['DA_4K']).join(matched[1]['ascat']).join(matched[2]['insitu']).dropna() dpr_triplets = data.corr()['DA_4K']['insitu'] - data.corr()['open_loop']['insitu'] if dpr_triplets < 0: continue f = plt.figure(figsize=(15, 5)) sns.lineplot(data=data[['open_loop', 'DA_4K', 'insitu']], dashes=False, linewidth=1.5, axes=plt.gca()) plt.title(f'{meta.network} / {meta.station} ({var}): d(Pearson R2) = {dpr_triplets:.3f} , d(TCA R2) = {dtr:.3f}') fbase = Path('/Users/u0116961/Documents/work/MadKF/CLSM/suspicious_stations/timeseries') fname = fbase / f'{mode}_{var}_{meta.network}_{meta.station}.png' f.savefig(fname, dpi=300, bbox_inches='tight') plt.close() except: continue
def noahmp_version_comparison(part, parts): result_file = Path( f'/Users/u0116961/Documents/work/LIS/noahmp_version_comparison/result_part{part}.csv' ) if not result_file.parent.exists(): Path.mkdir(result_file.parent, parents=True) ascat = HSAF_io() smap = SMAP_io() noah3 = Dataset('/Users/u0116961/data_sets/LIS/noahmp36/timeseries.nc') noah4 = Dataset('/Users/u0116961/data_sets/LIS/noahmp401/timeseries.nc') lats = noah3['lat'][:, :] lons = noah3['lon'][:, :] ind_lat, ind_lon = np.where(~lats.mask) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(ind_lat) / parts).astype('int') subs[-1] = len(ind_lat) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over ind_lat = ind_lat[start:end] ind_lon = ind_lon[start:end] for i, (i_r, i_c) in enumerate(zip(ind_lat, ind_lon)): i += 1 logging.info(f'{i} / {len(ind_lat)}') lat = lats[i_r, i_c] lon = lons[i_r, i_c] res = pd.DataFrame({'lat': lat, 'lon': lon}, index=(i, )) for v in [ 'SM1', 'SM2', 'SM3', 'SM4', 'ST1', 'ST2', 'ST3', 'ST4', 'LAI', 'SWE' ]: if ('SM' in v) | ('ST' in v): res[f'mdiff_{v}'], res[f'sdiff_{v}'], res[f'r2_{v}'] = \ stats(noah4[v[0:2]][:, int(v[-1])-1, i_r, i_c], noah3[v[0:2]][:, int(v[-1])-1, i_r, i_c]) else: res[f'mdiff_{v}'], res[f'sdiff_{v}'], res[f'r2_{v}'] = \ stats(noah4[v][:, i_r, i_c], noah3[v][:, i_r, i_c]) time = pd.DatetimeIndex( num2date(noah3['time'][:], units=noah3['time'].units, only_use_python_datetimes=True, only_use_cftime_datetimes=False)) df = pd.DataFrame( { 'noahmp36': noah3['SM'][:, 0, i_r, i_c], 'noahmp401': noah4['SM'][:, 0, i_r, i_c] }, index=time) ts_ascat = ascat.read(lat, lon) if ts_ascat is None: ts_ascat = pd.Series(name='ascat') ts_smap = smap.read(lat, lon) if ts_smap is None: ts_smap = pd.Series(name='smap') df = pd.concat((df, ts_ascat, ts_smap), axis='columns').dropna() for mode in ['abs', 'anom']: if mode == 'anom': for c in df.columns.values: df[c] = calc_anom(df[c], longterm=False) res[f'len_{mode}'] = len(df) ec_res = ecol(df, correlated=[['noahmp36', 'noahmp401']]) for c in df.columns.values: snr = 10**(ec_res[f'snr_{c}'] / 10) res[f'tcr2_{mode}_{c}'] = snr / (1 + snr) if not result_file.exists(): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)
def run(cell=None, gpi=None): if (cell is None) and (gpi is None): print('No cell/gpi specified.') return smos = SMOS_io() ascat = HSAF_io(ext=None) mswep = MSWEP_io() if gpi is not None: cell = mswep.gpi2cell(gpi) # Median Q/R from TC run. Q_avg = 12. R_avg = 74. if platform.system() == 'Windows': result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'result_%04i.csv' % cell) else: result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046', 'output', 'MadKF', 'CONUS', 'result_%04i.csv' % cell) dt = ['2010-01-01', '2015-12-31'] for data, info in mswep.iter_cell(cell, gpis=gpi): # print info.name # if True: try: precip = mswep.read(info.name) sm_ascat = ascat.read(info.dgg_gpi) sm_smos = smos.read(info.smos_gpi) * 100. if (precip is None) | (sm_ascat is None) | (sm_smos is None): continue precip = calc_anomaly(precip[dt[0]:dt[1]], method='moving_average', longterm=False) sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]], method='moving_average', longterm=False) sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]], method='moving_average', longterm=False) api = API(gamma=info.gamma) # Regularize time steps df = pd.DataFrame({ 1: precip, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])) n_inv_precip = len(np.where(np.isnan(df[1]))[0]) n_inv_ascat = len(np.where(np.isnan(df[2]))[0]) n_inv_smos = len(np.where(np.isnan(df[3]))[0]) n_inv_asc_smo = len(np.where(np.isnan(df[2]) & np.isnan(df[3]))[0]) df.loc[np.isnan(df[1]), 1] = 0. # --- get OL ts --- OL = np.full(len(precip), np.nan) model = API(gamma=info.gamma) for t, f in enumerate(df[1].values): x = model.step(f) OL[t] = x # collocate OL and satellite data sets. df2 = pd.DataFrame({ 1: OL, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])).dropna() # ----- Calculate uncertainties ----- # convert (static) forcing to model uncertainty P_avg = Q_avg / (1 - info.gamma**2) # calculate TCA based uncertainty and scaling coefficients snr, err, beta = tcol_snr(df2[1].values, df2[2].values, df2[3].values) P_TC = err[0]**2 Q_TC = P_TC * (1 - info.gamma**2) R_TC = (err[1] / beta[1])**2 H_TC = beta[1] # Calculate RMSD based uncertainty R_rmsd = (np.nanmean( (df2[1].values - H_TC * df2[2].values)**2) - P_avg) if R_rmsd < 0: R_rmsd *= -1 # ----------------------------------- # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=info.gamma, Q=Q_TC) R_2D = np.array([(err[1] / beta[1])**2, (err[2] / beta[2])**2]) H_2D = np.array([beta[1]**(-1), beta[2]**(-1)]) x_2d, P, checkvar1_2d, checkvar2_2d, checkvar3_2d, K1_2d, K2_2d = \ KF_2D(api_kf, df[1].values.copy(), df[2].values.copy(), df[3].values.copy(), R_2D, H=H_2D) # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=info.gamma, Q=Q_TC) x_kf, P, R_innov_kf, checkvar_kf, K_kf = \ KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC) # ----- Run EnKF using TCA-based uncertainties ----- forc_pert = ['normal', 'additive', Q_TC] obs_pert = ['normal', 'additive', R_TC] x_tc, P, R_innov_tc, checkvar_tc, K_tc = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using static uncertainties ----- forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_avg] x_avg, P, R_innov_avg, checkvar_avg, K_avg = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) ----- t = timeit.default_timer() forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_rmsd] x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) t_enkf = timeit.default_timer() - t # ----- Run MadKF ----- t = timeit.default_timer() x_madkf, P, R_madkf, Q_madkf, H_madkf, R_innov_madkf, checkvar_madkf, K_madkf = \ MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20) t_madkf = timeit.default_timer() - t # TC evaluation of assimilation results # df3 = pd.DataFrame({1: x_tc, 2: x_avg, 3: x_rmsd, 4: x_madkf, 5: sm_ascat, 6: sm_smos}, index=pd.date_range(dt[0], dt[1])).dropna() # # rmse_ana_tc = tcol_snr(df3[1].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_avg = tcol_snr(df3[2].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_rmsd = tcol_snr(df3[3].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_madkf = tcol_snr(df3[4].values, df3[5].values, df3[6].values)[1][0] result = pd.DataFrame( { 'lon': info.lon, 'lat': info.lat, 'col': info.col, 'row': info.row, 'P_tc': P_TC, 'Q_tc': Q_TC, 'R_tc': R_TC, 'H_tc': H_TC, 'K_tc': K_tc, 'R_innov_tc': R_innov_tc, 'checkvar_tc': checkvar_tc, 'K_kf': K_kf, 'R_innov_kf': R_innov_kf, 'checkvar_kf': checkvar_kf, 'K1_2d': K1_2d, 'K2_2d': K2_2d, 'checkvar1_2d': checkvar1_2d, 'checkvar2_2d': checkvar2_2d, 'checkvar3_2d': checkvar3_2d, 'P_avg': P_avg, 'Q_avg': Q_avg, 'R_avg': R_avg, 'K_avg': K_avg, 'R_innov_avg': R_innov_avg, 'checkvar_avg': checkvar_avg, 'R_rmsd': R_rmsd, 'K_rmsd': K_rmsd, 'R_innov_rmsd': R_innov_rmsd, 'checkvar_rmsd': checkvar_rmsd, 'P_madkf': Q_madkf / (1 - info.gamma**2), 'Q_madkf': Q_madkf, 'R_madkf': R_madkf, 'H_madkf': H_madkf, 'K_madkf': K_madkf, 'R_innov_madkf': R_innov_madkf, 'checkvar_madkf': checkvar_madkf, 't_enkf': t_enkf, 't_madkf': t_madkf, 'n_inv_precip': n_inv_precip, 'n_inv_ascat': n_inv_ascat, 'n_inv_smos': n_inv_smos, 'n_inv_asc_smo': n_inv_asc_smo }, index=(info.name, )) if (os.path.isfile(result_file) == False): result.to_csv(result_file, float_format='%0.4f') else: result.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: print('GPI failed.') continue ascat.close() mswep.close()