Esempio n. 1
0
def read_data():

    i_lat = 750
    i_lon = 750

    ascat = HSAF_io()
    merra2 = Dataset('/Users/u0116961/data_sets/MERRA2/MERRA2_timeseries.nc4')

    with Dataset(
            '/Users/u0116961/data_sets/DMP_COPERNICUS/DMP_COPERNICUS_timeseries.nc'
    ) as ds:
        time = pd.DatetimeIndex(
            num2date(ds['time'][:],
                     units=ds['time'].units,
                     only_use_python_datetimes=True,
                     only_use_cftime_datetimes=False))
        dmp_ts = pd.DataFrame({'DMP': ds['DMP'][:, i_lat, i_lon]}, index=time)
        lat = ds['lat'][i_lat].data
        lon = ds['lon'][i_lon].data

    ind_lat = abs(merra2['lat'][:] - lat).argmin()
    ind_lon = abs(merra2['lon'][:] - lon).argmin()
    gpi_ascat = ascat.latlon2gpi(lat, lon)

    time = pd.DatetimeIndex(
        num2date(merra2['time'][:],
                 units=merra2['time'].units,
                 only_use_python_datetimes=True,
                 only_use_cftime_datetimes=False))
    df = pd.DataFrame(
        {
            'time':
            time,
            'sm':
            merra2['SFMC'][:, ind_lat, ind_lon],
            'DMP':
            dmp_ts.reindex(time).values.flatten() / 10,
            'sig40_ascat':
            ascat.read(gpi_ascat, resample_time=True,
                       var='sigma40').reindex(time).values
        },
        index=time)
    merra2.close()
    ascat.close()

    return df
Esempio n. 2
0
def run(part):

    parts = 15

    smos = SMOS_io()
    ismn = ISMN_io()
    ascat = HSAF_io(ext=None)
    mswep = MSWEP_io()

    # Median Q from MadKF API/CONUS run.
    Q_avg = 12.
    R_avg = 74.

    # Select only SCAN and USCRN
    ismn.list = ismn.list[(ismn.list.network == 'SCAN') |
                          (ismn.list.network == 'USCRN')]
    ismn.list.index = np.arange(len(ismn.list))

    # Split station list in 4 parts for parallelization
    subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int')
    subs[-1] = len(ismn.list)
    start = subs[part - 1]
    end = subs[part]
    ismn.list = ismn.list.iloc[start:end, :]

    if platform.system() == 'Windows':
        result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'ismn_eval',
                                   'result_part%i.csv' % part)
    elif platform.system() == 'Linux':
        result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046',
                                   'output', 'MadKF', 'CONUS', 'ismn_eval',
                                   'result_part%i.csv' % part)
    else:
        result_file = os.path.join('/', 'work', 'MadKF', 'CONUS', 'ismn_eval',
                                   'parts2', 'result_part%i.csv' % part)

    dt = ['2010-01-01', '2015-12-31']

    for cnt, (station,
              insitu) in enumerate(ismn.iter_stations(surf_depth=0.1)):

        # station = ismn.list.loc[978,:]
        # insitu = ismn.read_first_surface_layer('SCAN','Los_Lunas_Pmc')

        print('%i / %i' % (cnt, len(ismn.list)))

        # if True:
        try:
            gpi = lonlat2gpi(station.lon, station.lat, mswep.grid)
            mswep_idx = mswep.grid.index[mswep.grid.dgg_gpi == gpi][0]
            smos_gpi = mswep.grid.loc[mswep_idx, 'smos_gpi']

            precip = mswep.read(mswep_idx)
            sm_ascat = ascat.read(gpi)
            sm_smos = smos.read(smos_gpi) * 100.

            if (precip is None) | (sm_ascat is None) | (sm_smos is None) | (
                    insitu is None):
                continue

            precip = calc_anomaly(precip[dt[0]:dt[1]],
                                  method='moving_average',
                                  longterm=False)
            sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]],
                                    method='moving_average',
                                    longterm=False)
            sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]],
                                   method='moving_average',
                                   longterm=False)
            insitu = calc_anomaly(insitu[dt[0]:dt[1]].resample('1d').first(),
                                  method='moving_average',
                                  longterm=False).tz_localize(None)

            df = pd.DataFrame({
                1: precip,
                2: sm_ascat,
                3: sm_smos,
                4: insitu
            },
                              index=pd.date_range(dt[0], dt[1]))
            df.loc[np.isnan(df[1]), 1] = 0.
            n = len(df)

            if len(df.dropna()) < 50:
                continue
            gamma = mswep.grid.loc[mswep_idx, 'gamma']
            api = API(gamma=gamma)

            # --- OL run ---
            x_OL = np.full(n, np.nan)
            model = deepcopy(api)
            for t, f in enumerate(precip.values):
                x = model.step(f)
                x_OL[t] = x

            # ----- Calculate uncertainties -----
            # convert (static) forcing to model uncertainty
            P_avg = Q_avg / (1 - gamma**2)

            # calculate TCA based uncertainty and scaling coefficients
            tmp_df = pd.DataFrame({
                1: x_OL,
                2: sm_ascat,
                3: sm_smos
            },
                                  index=pd.date_range(dt[0], dt[1])).dropna()
            snr, r_tc, err, beta = tc(tmp_df)
            P_TC = err[0]**2
            Q_TC = P_TC * (1 - gamma**2)
            R_TC = (err[1] / beta[1])**2
            H_TC = beta[1]

            # Calculate RMSD based uncertainty
            R_rmsd = (np.nanmean(
                (tmp_df[1].values - H_TC * tmp_df[2].values)**2) - P_avg)
            if R_rmsd < 0:
                R_rmsd *= -1
            # -----------------------------------

            # ----- Run KF using TCA-based uncertainties -----
            api_kf = API(gamma=gamma, Q=Q_TC)
            x_kf, P, R_innov_kf, checkvar_kf, K_kf = \
                KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC)

            # ----- Run EnKF using static uncertainties -----
            forc_pert = ['normal', 'additive', Q_avg]
            obs_pert = ['normal', 'additive', R_avg]
            x_avg, P, R_innov_avg, checkvar_avg, K_avg = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) -----
            # forc_pert = ['normal', 'additive', Q_avg]
            # obs_pert = ['normal', 'additive', R_rmsd]
            # x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \
            #     EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run MadKF -----
            cnt = 0
            checkvar_madkf = 9999.
            while ((checkvar_madkf < 0.95) |
                   (checkvar_madkf > 1.05)) & (cnt < 5):
                cnt += 1
                tmp_x_madkf, P_madkf, R_madkf, Q_madkf, H_madkf, R_innov_madkf, tmp_checkvar_madkf, K_madkf = \
                    MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20)
                if abs(1 - tmp_checkvar_madkf) < abs(1 - checkvar_madkf):
                    checkvar_madkf = tmp_checkvar_madkf
                    x_madkf = tmp_x_madkf

            df['x_ol'] = x_OL
            df['x_kf'] = x_kf
            df['x_avg'] = x_avg
            # df['x_rmsd'] = x_rmsd
            df['x_madkf'] = x_madkf

            # tc_ol = tc(df[[4,3,'x_ol']])
            # tc_kf = tc(df[[4,3,'x_kf']])
            # tc_avg = tc(df[[4,3,'x_avg']])
            # tc_rmsd = tc(df[[4,3,'x_rmsd']])
            # tc_madkf = tc(df[[4,3,'x_madkf']])

            ci_l_ol, ci_m_ol, ci_u_ol = bootstrap_tc(df[[4, 3, 'x_ol']])
            ci_l_kf, ci_m_kf, ci_u_kf = bootstrap_tc(df[[4, 3, 'x_kf']])
            ci_l_avg, ci_m_avg, ci_u_avg = bootstrap_tc(df[[4, 3, 'x_avg']])
            # ci_l_rmsd, ci_m_rmsd, ci_u_rmsd = bootstrap_tc(df[[4,3,'x_rmsd']])
            ci_l_madkf, ci_m_madkf, ci_u_madkf = bootstrap_tc(
                df[[4, 3, 'x_madkf']])

            corr = df.dropna().corr()
            n_all = len(df.dropna())

            result = pd.DataFrame(
                {
                    'lon': station.lon,
                    'lat': station.lat,
                    'network': station.network,
                    'station': station.station,
                    'gpi': gpi,
                    'n_all': n_all,
                    'Q_est_madkf': Q_madkf,
                    'R_est_madkf': R_madkf,
                    'corr_ol': corr[4]['x_ol'],
                    'corr_kf': corr[4]['x_kf'],
                    'corr_avg': corr[4]['x_avg'],
                    # 'corr_rmsd': corr[4]['x_rmsd'],
                    'corr_madkf': corr[4]['x_madkf'],
                    # 'snr_ol': tc_ol[0][2],
                    # 'snr_kf': tc_kf[0][2],
                    # 'snr_avg': tc_avg[0][2],
                    # 'snr_rmsd': tc_rmsd[0][2],
                    # 'snr_madkf': tc_madkf[0][2],
                    # 'r_ol': tc_ol[1][2],
                    # 'r_kf': tc_kf[1][2],
                    # 'r_avg': tc_avg[1][2],
                    # 'r_rmsd': tc_rmsd[1][2],
                    # 'r_madkf': tc_madkf[1][2],
                    # 'rmse_kf': tc_kf[2][2],
                    # 'rmse_avg': tc_avg[2][2],
                    # 'rmse_rmsd': tc_rmsd[2][2],
                    # 'rmse_madkf': tc_madkf[2][2],
                    # 'rmse_ol': tc_ol[2][2],
                    'r_ol_l': ci_l_ol,
                    'r_ol_m': ci_m_ol,
                    'r_ol_u': ci_u_ol,
                    'r_kf_l': ci_l_kf,
                    'r_kf_m': ci_m_kf,
                    'r_kf_u': ci_u_kf,
                    'r_avg_l': ci_l_avg,
                    'r_avg_m': ci_m_avg,
                    'r_avg_u': ci_u_avg,
                    # 'r_rmsd_l': ci_l_rmsd,
                    # 'r_rmsd_m': ci_m_rmsd,
                    # 'r_rmsd_u': ci_u_rmsd,
                    'r_madkf_l': ci_l_madkf,
                    'r_madkf_m': ci_m_madkf,
                    'r_madkf_u': ci_u_madkf,
                    'checkvar_kf': checkvar_kf,
                    'checkvar_avg': checkvar_avg,
                    # 'checkvar_rmsd': checkvar_rmsd,
                    'checkvar_madkf': checkvar_madkf,
                    'R_innov_kf': R_innov_kf,
                    'R_innov_avg': R_innov_avg,
                    # 'R_innov_rmsd': R_innov_rmsd,
                    'R_innov_madkf': R_innov_madkf
                },
                index=(station.name, ))

            if (os.path.isfile(result_file) == False):
                result.to_csv(result_file, float_format='%0.4f')
            else:
                result.to_csv(result_file,
                              float_format='%0.4f',
                              mode='a',
                              header=False)
        except:
            print('GPI failed.')
            continue

    ascat.close()
    mswep.close()
Esempio n. 3
0
def run(cell=None, gpi=None):

    if (cell is None) and (gpi is None):
        print('No cell/gpi specified.')
        return

    smos = SMOS_io()
    ascat = HSAF_io(ext=None)
    mswep = MSWEP_io()

    if gpi is not None:
        cell = mswep.gpi2cell(gpi)

    # Median Q/R from TC run.
    Q_avg = 12.
    R_avg = 74.

    if platform.system() == 'Windows':
        result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS',
                                   'result_%04i.csv' % cell)
    else:
        result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046',
                                   'output', 'MadKF', 'CONUS',
                                   'result_%04i.csv' % cell)

    dt = ['2010-01-01', '2015-12-31']

    for data, info in mswep.iter_cell(cell, gpis=gpi):

        # print info.name
        # if True:
        try:
            precip = mswep.read(info.name)
            sm_ascat = ascat.read(info.dgg_gpi)
            sm_smos = smos.read(info.smos_gpi) * 100.

            if (precip is None) | (sm_ascat is None) | (sm_smos is None):
                continue

            precip = calc_anomaly(precip[dt[0]:dt[1]],
                                  method='moving_average',
                                  longterm=False)
            sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]],
                                    method='moving_average',
                                    longterm=False)
            sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]],
                                   method='moving_average',
                                   longterm=False)

            api = API(gamma=info.gamma)

            # Regularize time steps
            df = pd.DataFrame({
                1: precip,
                2: sm_ascat,
                3: sm_smos
            },
                              index=pd.date_range(dt[0], dt[1]))

            n_inv_precip = len(np.where(np.isnan(df[1]))[0])
            n_inv_ascat = len(np.where(np.isnan(df[2]))[0])
            n_inv_smos = len(np.where(np.isnan(df[3]))[0])
            n_inv_asc_smo = len(np.where(np.isnan(df[2]) & np.isnan(df[3]))[0])

            df.loc[np.isnan(df[1]), 1] = 0.

            # --- get OL ts  ---
            OL = np.full(len(precip), np.nan)
            model = API(gamma=info.gamma)
            for t, f in enumerate(df[1].values):
                x = model.step(f)
                OL[t] = x

            # collocate OL and satellite data sets.
            df2 = pd.DataFrame({
                1: OL,
                2: sm_ascat,
                3: sm_smos
            },
                               index=pd.date_range(dt[0], dt[1])).dropna()

            # ----- Calculate uncertainties -----
            # convert (static) forcing to model uncertainty
            P_avg = Q_avg / (1 - info.gamma**2)

            # calculate TCA based uncertainty and scaling coefficients
            snr, err, beta = tcol_snr(df2[1].values, df2[2].values,
                                      df2[3].values)
            P_TC = err[0]**2
            Q_TC = P_TC * (1 - info.gamma**2)
            R_TC = (err[1] / beta[1])**2
            H_TC = beta[1]

            # Calculate RMSD based uncertainty
            R_rmsd = (np.nanmean(
                (df2[1].values - H_TC * df2[2].values)**2) - P_avg)
            if R_rmsd < 0:
                R_rmsd *= -1
            # -----------------------------------

            # ----- Run KF using TCA-based uncertainties -----
            api_kf = API(gamma=info.gamma, Q=Q_TC)
            R_2D = np.array([(err[1] / beta[1])**2, (err[2] / beta[2])**2])
            H_2D = np.array([beta[1]**(-1), beta[2]**(-1)])
            x_2d, P, checkvar1_2d, checkvar2_2d, checkvar3_2d, K1_2d, K2_2d = \
                KF_2D(api_kf, df[1].values.copy(), df[2].values.copy(), df[3].values.copy(), R_2D, H=H_2D)

            # ----- Run KF using TCA-based uncertainties -----
            api_kf = API(gamma=info.gamma, Q=Q_TC)
            x_kf, P, R_innov_kf, checkvar_kf, K_kf = \
                KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC)

            # ----- Run EnKF using TCA-based uncertainties -----
            forc_pert = ['normal', 'additive', Q_TC]
            obs_pert = ['normal', 'additive', R_TC]
            x_tc, P, R_innov_tc, checkvar_tc, K_tc = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run EnKF using static uncertainties -----
            forc_pert = ['normal', 'additive', Q_avg]
            obs_pert = ['normal', 'additive', R_avg]
            x_avg, P, R_innov_avg, checkvar_avg, K_avg = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) -----
            t = timeit.default_timer()
            forc_pert = ['normal', 'additive', Q_avg]
            obs_pert = ['normal', 'additive', R_rmsd]
            x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)
            t_enkf = timeit.default_timer() - t

            # ----- Run MadKF -----
            t = timeit.default_timer()
            x_madkf, P, R_madkf, Q_madkf, H_madkf, R_innov_madkf, checkvar_madkf, K_madkf = \
                MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20)
            t_madkf = timeit.default_timer() - t

            # TC evaluation of assimilation results
            # df3 = pd.DataFrame({1: x_tc, 2: x_avg, 3: x_rmsd, 4: x_madkf, 5: sm_ascat, 6: sm_smos}, index=pd.date_range(dt[0], dt[1])).dropna()
            #
            # rmse_ana_tc = tcol_snr(df3[1].values, df3[5].values, df3[6].values)[1][0]
            # rmse_ana_avg = tcol_snr(df3[2].values, df3[5].values, df3[6].values)[1][0]
            # rmse_ana_rmsd = tcol_snr(df3[3].values, df3[5].values, df3[6].values)[1][0]
            # rmse_ana_madkf = tcol_snr(df3[4].values, df3[5].values, df3[6].values)[1][0]

            result = pd.DataFrame(
                {
                    'lon': info.lon,
                    'lat': info.lat,
                    'col': info.col,
                    'row': info.row,
                    'P_tc': P_TC,
                    'Q_tc': Q_TC,
                    'R_tc': R_TC,
                    'H_tc': H_TC,
                    'K_tc': K_tc,
                    'R_innov_tc': R_innov_tc,
                    'checkvar_tc': checkvar_tc,
                    'K_kf': K_kf,
                    'R_innov_kf': R_innov_kf,
                    'checkvar_kf': checkvar_kf,
                    'K1_2d': K1_2d,
                    'K2_2d': K2_2d,
                    'checkvar1_2d': checkvar1_2d,
                    'checkvar2_2d': checkvar2_2d,
                    'checkvar3_2d': checkvar3_2d,
                    'P_avg': P_avg,
                    'Q_avg': Q_avg,
                    'R_avg': R_avg,
                    'K_avg': K_avg,
                    'R_innov_avg': R_innov_avg,
                    'checkvar_avg': checkvar_avg,
                    'R_rmsd': R_rmsd,
                    'K_rmsd': K_rmsd,
                    'R_innov_rmsd': R_innov_rmsd,
                    'checkvar_rmsd': checkvar_rmsd,
                    'P_madkf': Q_madkf / (1 - info.gamma**2),
                    'Q_madkf': Q_madkf,
                    'R_madkf': R_madkf,
                    'H_madkf': H_madkf,
                    'K_madkf': K_madkf,
                    'R_innov_madkf': R_innov_madkf,
                    'checkvar_madkf': checkvar_madkf,
                    't_enkf': t_enkf,
                    't_madkf': t_madkf,
                    'n_inv_precip': n_inv_precip,
                    'n_inv_ascat': n_inv_ascat,
                    'n_inv_smos': n_inv_smos,
                    'n_inv_asc_smo': n_inv_asc_smo
                },
                index=(info.name, ))

            if (os.path.isfile(result_file) == False):
                result.to_csv(result_file, float_format='%0.4f')
            else:
                result.to_csv(result_file,
                              float_format='%0.4f',
                              mode='a',
                              header=False)
        except:
            print('GPI failed.')
            continue

    ascat.close()
    mswep.close()