def read_data(): i_lat = 750 i_lon = 750 ascat = HSAF_io() merra2 = Dataset('/Users/u0116961/data_sets/MERRA2/MERRA2_timeseries.nc4') with Dataset( '/Users/u0116961/data_sets/DMP_COPERNICUS/DMP_COPERNICUS_timeseries.nc' ) as ds: time = pd.DatetimeIndex( num2date(ds['time'][:], units=ds['time'].units, only_use_python_datetimes=True, only_use_cftime_datetimes=False)) dmp_ts = pd.DataFrame({'DMP': ds['DMP'][:, i_lat, i_lon]}, index=time) lat = ds['lat'][i_lat].data lon = ds['lon'][i_lon].data ind_lat = abs(merra2['lat'][:] - lat).argmin() ind_lon = abs(merra2['lon'][:] - lon).argmin() gpi_ascat = ascat.latlon2gpi(lat, lon) time = pd.DatetimeIndex( num2date(merra2['time'][:], units=merra2['time'].units, only_use_python_datetimes=True, only_use_cftime_datetimes=False)) df = pd.DataFrame( { 'time': time, 'sm': merra2['SFMC'][:, ind_lat, ind_lon], 'DMP': dmp_ts.reindex(time).values.flatten() / 10, 'sig40_ascat': ascat.read(gpi_ascat, resample_time=True, var='sigma40').reindex(time).values }, index=time) merra2.close() ascat.close() return df
def run(part): parts = 15 smos = SMOS_io() ismn = ISMN_io() ascat = HSAF_io(ext=None) mswep = MSWEP_io() # Median Q from MadKF API/CONUS run. Q_avg = 12. R_avg = 74. # Select only SCAN and USCRN ismn.list = ismn.list[(ismn.list.network == 'SCAN') | (ismn.list.network == 'USCRN')] ismn.list.index = np.arange(len(ismn.list)) # Split station list in 4 parts for parallelization subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int') subs[-1] = len(ismn.list) start = subs[part - 1] end = subs[part] ismn.list = ismn.list.iloc[start:end, :] if platform.system() == 'Windows': result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'ismn_eval', 'result_part%i.csv' % part) elif platform.system() == 'Linux': result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046', 'output', 'MadKF', 'CONUS', 'ismn_eval', 'result_part%i.csv' % part) else: result_file = os.path.join('/', 'work', 'MadKF', 'CONUS', 'ismn_eval', 'parts2', 'result_part%i.csv' % part) dt = ['2010-01-01', '2015-12-31'] for cnt, (station, insitu) in enumerate(ismn.iter_stations(surf_depth=0.1)): # station = ismn.list.loc[978,:] # insitu = ismn.read_first_surface_layer('SCAN','Los_Lunas_Pmc') print('%i / %i' % (cnt, len(ismn.list))) # if True: try: gpi = lonlat2gpi(station.lon, station.lat, mswep.grid) mswep_idx = mswep.grid.index[mswep.grid.dgg_gpi == gpi][0] smos_gpi = mswep.grid.loc[mswep_idx, 'smos_gpi'] precip = mswep.read(mswep_idx) sm_ascat = ascat.read(gpi) sm_smos = smos.read(smos_gpi) * 100. if (precip is None) | (sm_ascat is None) | (sm_smos is None) | ( insitu is None): continue precip = calc_anomaly(precip[dt[0]:dt[1]], method='moving_average', longterm=False) sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]], method='moving_average', longterm=False) sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]], method='moving_average', longterm=False) insitu = calc_anomaly(insitu[dt[0]:dt[1]].resample('1d').first(), method='moving_average', longterm=False).tz_localize(None) df = pd.DataFrame({ 1: precip, 2: sm_ascat, 3: sm_smos, 4: insitu }, index=pd.date_range(dt[0], dt[1])) df.loc[np.isnan(df[1]), 1] = 0. n = len(df) if len(df.dropna()) < 50: continue gamma = mswep.grid.loc[mswep_idx, 'gamma'] api = API(gamma=gamma) # --- OL run --- x_OL = np.full(n, np.nan) model = deepcopy(api) for t, f in enumerate(precip.values): x = model.step(f) x_OL[t] = x # ----- Calculate uncertainties ----- # convert (static) forcing to model uncertainty P_avg = Q_avg / (1 - gamma**2) # calculate TCA based uncertainty and scaling coefficients tmp_df = pd.DataFrame({ 1: x_OL, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])).dropna() snr, r_tc, err, beta = tc(tmp_df) P_TC = err[0]**2 Q_TC = P_TC * (1 - gamma**2) R_TC = (err[1] / beta[1])**2 H_TC = beta[1] # Calculate RMSD based uncertainty R_rmsd = (np.nanmean( (tmp_df[1].values - H_TC * tmp_df[2].values)**2) - P_avg) if R_rmsd < 0: R_rmsd *= -1 # ----------------------------------- # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=gamma, Q=Q_TC) x_kf, P, R_innov_kf, checkvar_kf, K_kf = \ KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC) # ----- Run EnKF using static uncertainties ----- forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_avg] x_avg, P, R_innov_avg, checkvar_avg, K_avg = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) ----- # forc_pert = ['normal', 'additive', Q_avg] # obs_pert = ['normal', 'additive', R_rmsd] # x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \ # EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run MadKF ----- cnt = 0 checkvar_madkf = 9999. while ((checkvar_madkf < 0.95) | (checkvar_madkf > 1.05)) & (cnt < 5): cnt += 1 tmp_x_madkf, P_madkf, R_madkf, Q_madkf, H_madkf, R_innov_madkf, tmp_checkvar_madkf, K_madkf = \ MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20) if abs(1 - tmp_checkvar_madkf) < abs(1 - checkvar_madkf): checkvar_madkf = tmp_checkvar_madkf x_madkf = tmp_x_madkf df['x_ol'] = x_OL df['x_kf'] = x_kf df['x_avg'] = x_avg # df['x_rmsd'] = x_rmsd df['x_madkf'] = x_madkf # tc_ol = tc(df[[4,3,'x_ol']]) # tc_kf = tc(df[[4,3,'x_kf']]) # tc_avg = tc(df[[4,3,'x_avg']]) # tc_rmsd = tc(df[[4,3,'x_rmsd']]) # tc_madkf = tc(df[[4,3,'x_madkf']]) ci_l_ol, ci_m_ol, ci_u_ol = bootstrap_tc(df[[4, 3, 'x_ol']]) ci_l_kf, ci_m_kf, ci_u_kf = bootstrap_tc(df[[4, 3, 'x_kf']]) ci_l_avg, ci_m_avg, ci_u_avg = bootstrap_tc(df[[4, 3, 'x_avg']]) # ci_l_rmsd, ci_m_rmsd, ci_u_rmsd = bootstrap_tc(df[[4,3,'x_rmsd']]) ci_l_madkf, ci_m_madkf, ci_u_madkf = bootstrap_tc( df[[4, 3, 'x_madkf']]) corr = df.dropna().corr() n_all = len(df.dropna()) result = pd.DataFrame( { 'lon': station.lon, 'lat': station.lat, 'network': station.network, 'station': station.station, 'gpi': gpi, 'n_all': n_all, 'Q_est_madkf': Q_madkf, 'R_est_madkf': R_madkf, 'corr_ol': corr[4]['x_ol'], 'corr_kf': corr[4]['x_kf'], 'corr_avg': corr[4]['x_avg'], # 'corr_rmsd': corr[4]['x_rmsd'], 'corr_madkf': corr[4]['x_madkf'], # 'snr_ol': tc_ol[0][2], # 'snr_kf': tc_kf[0][2], # 'snr_avg': tc_avg[0][2], # 'snr_rmsd': tc_rmsd[0][2], # 'snr_madkf': tc_madkf[0][2], # 'r_ol': tc_ol[1][2], # 'r_kf': tc_kf[1][2], # 'r_avg': tc_avg[1][2], # 'r_rmsd': tc_rmsd[1][2], # 'r_madkf': tc_madkf[1][2], # 'rmse_kf': tc_kf[2][2], # 'rmse_avg': tc_avg[2][2], # 'rmse_rmsd': tc_rmsd[2][2], # 'rmse_madkf': tc_madkf[2][2], # 'rmse_ol': tc_ol[2][2], 'r_ol_l': ci_l_ol, 'r_ol_m': ci_m_ol, 'r_ol_u': ci_u_ol, 'r_kf_l': ci_l_kf, 'r_kf_m': ci_m_kf, 'r_kf_u': ci_u_kf, 'r_avg_l': ci_l_avg, 'r_avg_m': ci_m_avg, 'r_avg_u': ci_u_avg, # 'r_rmsd_l': ci_l_rmsd, # 'r_rmsd_m': ci_m_rmsd, # 'r_rmsd_u': ci_u_rmsd, 'r_madkf_l': ci_l_madkf, 'r_madkf_m': ci_m_madkf, 'r_madkf_u': ci_u_madkf, 'checkvar_kf': checkvar_kf, 'checkvar_avg': checkvar_avg, # 'checkvar_rmsd': checkvar_rmsd, 'checkvar_madkf': checkvar_madkf, 'R_innov_kf': R_innov_kf, 'R_innov_avg': R_innov_avg, # 'R_innov_rmsd': R_innov_rmsd, 'R_innov_madkf': R_innov_madkf }, index=(station.name, )) if (os.path.isfile(result_file) == False): result.to_csv(result_file, float_format='%0.4f') else: result.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: print('GPI failed.') continue ascat.close() mswep.close()
def run(cell=None, gpi=None): if (cell is None) and (gpi is None): print('No cell/gpi specified.') return smos = SMOS_io() ascat = HSAF_io(ext=None) mswep = MSWEP_io() if gpi is not None: cell = mswep.gpi2cell(gpi) # Median Q/R from TC run. Q_avg = 12. R_avg = 74. if platform.system() == 'Windows': result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'result_%04i.csv' % cell) else: result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046', 'output', 'MadKF', 'CONUS', 'result_%04i.csv' % cell) dt = ['2010-01-01', '2015-12-31'] for data, info in mswep.iter_cell(cell, gpis=gpi): # print info.name # if True: try: precip = mswep.read(info.name) sm_ascat = ascat.read(info.dgg_gpi) sm_smos = smos.read(info.smos_gpi) * 100. if (precip is None) | (sm_ascat is None) | (sm_smos is None): continue precip = calc_anomaly(precip[dt[0]:dt[1]], method='moving_average', longterm=False) sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]], method='moving_average', longterm=False) sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]], method='moving_average', longterm=False) api = API(gamma=info.gamma) # Regularize time steps df = pd.DataFrame({ 1: precip, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])) n_inv_precip = len(np.where(np.isnan(df[1]))[0]) n_inv_ascat = len(np.where(np.isnan(df[2]))[0]) n_inv_smos = len(np.where(np.isnan(df[3]))[0]) n_inv_asc_smo = len(np.where(np.isnan(df[2]) & np.isnan(df[3]))[0]) df.loc[np.isnan(df[1]), 1] = 0. # --- get OL ts --- OL = np.full(len(precip), np.nan) model = API(gamma=info.gamma) for t, f in enumerate(df[1].values): x = model.step(f) OL[t] = x # collocate OL and satellite data sets. df2 = pd.DataFrame({ 1: OL, 2: sm_ascat, 3: sm_smos }, index=pd.date_range(dt[0], dt[1])).dropna() # ----- Calculate uncertainties ----- # convert (static) forcing to model uncertainty P_avg = Q_avg / (1 - info.gamma**2) # calculate TCA based uncertainty and scaling coefficients snr, err, beta = tcol_snr(df2[1].values, df2[2].values, df2[3].values) P_TC = err[0]**2 Q_TC = P_TC * (1 - info.gamma**2) R_TC = (err[1] / beta[1])**2 H_TC = beta[1] # Calculate RMSD based uncertainty R_rmsd = (np.nanmean( (df2[1].values - H_TC * df2[2].values)**2) - P_avg) if R_rmsd < 0: R_rmsd *= -1 # ----------------------------------- # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=info.gamma, Q=Q_TC) R_2D = np.array([(err[1] / beta[1])**2, (err[2] / beta[2])**2]) H_2D = np.array([beta[1]**(-1), beta[2]**(-1)]) x_2d, P, checkvar1_2d, checkvar2_2d, checkvar3_2d, K1_2d, K2_2d = \ KF_2D(api_kf, df[1].values.copy(), df[2].values.copy(), df[3].values.copy(), R_2D, H=H_2D) # ----- Run KF using TCA-based uncertainties ----- api_kf = API(gamma=info.gamma, Q=Q_TC) x_kf, P, R_innov_kf, checkvar_kf, K_kf = \ KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC) # ----- Run EnKF using TCA-based uncertainties ----- forc_pert = ['normal', 'additive', Q_TC] obs_pert = ['normal', 'additive', R_TC] x_tc, P, R_innov_tc, checkvar_tc, K_tc = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using static uncertainties ----- forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_avg] x_avg, P, R_innov_avg, checkvar_avg, K_avg = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) ----- t = timeit.default_timer() forc_pert = ['normal', 'additive', Q_avg] obs_pert = ['normal', 'additive', R_rmsd] x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \ EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50) t_enkf = timeit.default_timer() - t # ----- Run MadKF ----- t = timeit.default_timer() x_madkf, P, R_madkf, Q_madkf, H_madkf, R_innov_madkf, checkvar_madkf, K_madkf = \ MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20) t_madkf = timeit.default_timer() - t # TC evaluation of assimilation results # df3 = pd.DataFrame({1: x_tc, 2: x_avg, 3: x_rmsd, 4: x_madkf, 5: sm_ascat, 6: sm_smos}, index=pd.date_range(dt[0], dt[1])).dropna() # # rmse_ana_tc = tcol_snr(df3[1].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_avg = tcol_snr(df3[2].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_rmsd = tcol_snr(df3[3].values, df3[5].values, df3[6].values)[1][0] # rmse_ana_madkf = tcol_snr(df3[4].values, df3[5].values, df3[6].values)[1][0] result = pd.DataFrame( { 'lon': info.lon, 'lat': info.lat, 'col': info.col, 'row': info.row, 'P_tc': P_TC, 'Q_tc': Q_TC, 'R_tc': R_TC, 'H_tc': H_TC, 'K_tc': K_tc, 'R_innov_tc': R_innov_tc, 'checkvar_tc': checkvar_tc, 'K_kf': K_kf, 'R_innov_kf': R_innov_kf, 'checkvar_kf': checkvar_kf, 'K1_2d': K1_2d, 'K2_2d': K2_2d, 'checkvar1_2d': checkvar1_2d, 'checkvar2_2d': checkvar2_2d, 'checkvar3_2d': checkvar3_2d, 'P_avg': P_avg, 'Q_avg': Q_avg, 'R_avg': R_avg, 'K_avg': K_avg, 'R_innov_avg': R_innov_avg, 'checkvar_avg': checkvar_avg, 'R_rmsd': R_rmsd, 'K_rmsd': K_rmsd, 'R_innov_rmsd': R_innov_rmsd, 'checkvar_rmsd': checkvar_rmsd, 'P_madkf': Q_madkf / (1 - info.gamma**2), 'Q_madkf': Q_madkf, 'R_madkf': R_madkf, 'H_madkf': H_madkf, 'K_madkf': K_madkf, 'R_innov_madkf': R_innov_madkf, 'checkvar_madkf': checkvar_madkf, 't_enkf': t_enkf, 't_madkf': t_madkf, 'n_inv_precip': n_inv_precip, 'n_inv_ascat': n_inv_ascat, 'n_inv_smos': n_inv_smos, 'n_inv_asc_smo': n_inv_asc_smo }, index=(info.name, )) if (os.path.isfile(result_file) == False): result.to_csv(result_file, float_format='%0.4f') else: result.to_csv(result_file, float_format='%0.4f', mode='a', header=False) except: print('GPI failed.') continue ascat.close() mswep.close()