Esempio n. 1
0
def calc_gamma_map():

    fname = r"D:\data_sets\MSWEP_V21\data\grid_new.csv"

    ascat = HSAF_io()
    mswep = MSWEP_io()

    mswep.grid['gamma'] = np.nan

    for i, (precip, info) in enumerate(mswep.iter_gp()):
        print(i)

        if len(precip.dropna()) == 0:
            continue
        try:
            precip = calc_anomaly(precip, method='harmonic', longterm=False)
            sm = calc_anomaly(ascat.read(
                info.dgg_gpi)['2007-01-01':'2016-12-31'],
                              method='harmonic',
                              longterm=False)
            ts = pd.concat((precip, sm), axis=1).values
            mswep.grid.loc[info.name,
                           'gamma'] = estimate_gamma(ts[:, 0], ts[:, 1])
        except:
            continue

    mswep.grid.dropna().to_csv(fname)
Esempio n. 2
0
def generate_soil_moisture(size=5000,
                           gamma=0.85,
                           precip=None,
                           scale=15,
                           anomaly=False):
    '''
    generate soil moisture time series based on the API model

    '''

    if precip is None:
        precip = generate_precipitation(size=size, scale=scale)
    else:
        size = len(precip)

    if anomaly is True:
        precip = calc_anomaly(
            pd.Series(precip,
                      index=pd.date_range(start='2010-01-01',
                                          periods=size))).values

    sm_true = np.zeros(size)

    for t in np.arange(1, size):

        sm_true[t] = gamma * sm_true[t - 1] + precip[t]

    return sm_true, precip
Esempio n. 3
0
def plot_cat_timeseries():

    outpath = r'D:\work\LDAS\2018-02_scaling\_new\ismn_eval\timeseries'

    fname = r"D:\work\LDAS\2018-02_scaling\_new\ismn_eval\validation.csv"
    res = pd.read_csv(fname)

    diff_srf = res['corr_DA_cal_pent_ma_sm_surface'] - res[
        'corr_DA_uncal_pent_ma_sm_surface']
    diff_rz = res['corr_DA_cal_pent_ma_sm_rootzone'] - res[
        'corr_DA_uncal_pent_ma_sm_rootzone']
    diff_prof = res['corr_DA_cal_pent_ma_sm_profile'] - res[
        'corr_DA_uncal_pent_ma_sm_profile']
    ind = (diff_srf > 0.2) | (diff_rz > 0.2) | (diff_prof > 0.2)
    res = res.loc[ind, ['network', 'station', 'lat', 'lon']]

    ismn = ISMN_io()
    cal = LDAS_io('xhourly', 'US_M36_SMOS_DA_calibrated_scaled')
    uncal = LDAS_io('xhourly', 'US_M36_SMOS_DA_nocal_scaled_pentadal')

    variables = ['sm_surface', 'sm_rootzone', 'sm_profile']

    for idx, stat in res.iterrows():

        fname = os.path.join(outpath,
                             stat.network + '_' + stat.station + '.png')

        ts_ismn = ismn.read(stat.network, stat.station)
        lat = stat.lat
        lon = stat.lon

        plt.figure(figsize=(17, 9))

        for i, var in enumerate(variables):

            ax = plt.subplot(3, 1, i + 1)

            ts_cal = calc_anomaly(cal.read_ts(var, lon, lat), method='ma')
            ts_cal.index += pd.to_timedelta('2 hours')
            ts_uncal = calc_anomaly(uncal.read_ts(var, lon, lat), method='ma')
            ts_uncal.index += pd.to_timedelta('2 hours')

            df = pd.DataFrame({
                'cal': ts_cal,
                'uncal': ts_uncal,
                'insitu': calc_anomaly(ts_ismn[var], method='ma')
            }).dropna()
            if len(df) > 0:
                df.plot(ax=ax)
            else:
                continue

            title = 'R(ismn - cal) = %.2f , R(ismn - uncal) = %.2f' % (
                df.corr().loc['insitu', 'cal'], df.corr().loc['insitu',
                                                              'uncal'])

            ax.set_title(title, fontsize=12)
            ax.set_xlim('2010-01-01', '2016-01-01')
            ax.set_ylim(-0.3, 0.3)
            ax.set_xlabel('')

        plt.tight_layout()

        plt.savefig(fname, dpi=150)
        plt.close()
Esempio n. 4
0
def run(part):

    parts = 6

    result_file = r'D:\work\ESA_CCI_SM\validation_%i.csv' % part

    cci = CCISM_io()
    ismn = ISMN_io()

    # ismn.list = ismn.list.iloc[100:120]

    # Split station list in 4 parts for parallelization
    subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int')
    subs[-1] = len(ismn.list)
    start = subs[part - 1]
    end = subs[part]
    ismn.list = ismn.list.iloc[start:end, :]

    periods = {
        'p1': ['2007-10-01', '2010-01-14'],
        'p2': ['2010-01-15', '2011-10-04'],
        'p3': ['2011-10-05', '2012-06-30'],
        'p4': ['2012-07-01', '2014-12-31']
    }

    freq = ['abs', 'anom']

    corr_tags = [
        'corr_' + m + '_' + v + '_' + p + '_' + f for m in cci.modes
        for v in cci.versions for p in periods.keys() for f in freq
    ]
    p_tags = [
        'p_' + m + '_' + v + '_' + p + '_' + f for m in cci.modes
        for v in cci.versions for p in periods.keys() for f in freq
    ]
    n_tags = [
        'n_' + m + '_' + v + '_' + p + '_' + f for m in cci.modes
        for v in cci.versions for p in periods.keys() for f in freq
    ]

    res = ismn.list.copy()
    res.drop(['ease_col', 'ease_row'], axis='columns', inplace=True)
    for col in corr_tags + p_tags:
        res[col] = np.nan
    for col in n_tags:
        res[col] = 0

    for i, (meta, ts_insitu) in enumerate(ismn.iter_stations()):
        print('%i/%i (Proc %i)' % (i, len(ismn.list), part))

        if ts_insitu is None:
            print('No in situ data for ' + meta.network + ' / ' + meta.station)
            continue
        ts_insitu = ts_insitu[periods['p1'][0]:periods['p4'][1]]
        if len(ts_insitu) < 10:
            print('No in situ data for ' + meta.network + ' / ' + meta.station)
            continue
        df_insitu = pd.DataFrame(ts_insitu).dropna()
        df_insitu_anom = pd.DataFrame(calc_anomaly(ts_insitu)).dropna()

        for m in cci.modes:
            df_cci = cci.read(meta.lon, meta.lat, mode=m).dropna()
            if len(df_cci) < 10:
                print('No CCI ' + m + ' data for ' + meta.network + ' / ' +
                      meta.station)
                continue

            for f in freq:
                if f == 'abs':
                    matched = df_match(df_cci, df_insitu, window=0.5)
                else:
                    for v in cci.versions:
                        df_cci.loc[:, m + '_' + v] = calc_anomaly(
                            df_cci[m + '_' + v])
                    df_cci.dropna(inplace=True)
                    if (len(df_cci) < 10) | (len(df_insitu_anom) < 10):
                        print('No in situ or CCI ' + m + ' anomaly data for ' +
                              meta.network + ' / ' + meta.station)
                        continue
                    matched = df_match(df_cci, df_insitu_anom, window=0.5)

                data = df_cci.join(matched['insitu']).dropna()

                for p in periods.keys():
                    vals = data[periods[p][0]:periods[p][1]].values

                    n_matches = vals.shape[0]
                    if n_matches < 10:
                        continue
                    for k, v in enumerate(cci.versions):
                        corr, p_value = pearsonr(vals[:, k], vals[:, -1])
                        res.loc[meta.name, 'corr_' + m + '_' + v + '_' + p +
                                '_' + f] = corr
                        res.loc[meta.name, 'p_' + m + '_' + v + '_' + p + '_' +
                                f] = p_value
                        res.loc[meta.name, 'n_' + m + '_' + v + '_' + p + '_' +
                                f] = n_matches

    res.to_csv(result_file, float_format='%0.4f')
Esempio n. 5
0
def insitu_evaluation():

    result_file = r'D:\work\LDAS\2018-06_rmse_uncertainty\insitu_evaluation\validation.csv'

    noDA = LDAS_io('xhourly', 'US_M36_SMOS40_noDA_cal_scaled')

    DA_const_err = LDAS_io('xhourly', 'US_M36_SMOS40_DA_cal_scaled')
    DA_varia_err = LDAS_io('xhourly', 'US_M36_SMOS40_DA_cal_scl_errfile')

    t_ana = pd.DatetimeIndex(
        LDAS_io('ObsFcstAna', 'US_M36_SMOS40_DA_cal_scaled').timeseries.time.
        values).sort_values()

    ismn = ISMN_io(col_offs=noDA.grid.tilegrids.loc['domain', 'i_offg'],
                   row_offs=noDA.grid.tilegrids.loc['domain', 'j_offg'])

    runs = ['noDA', 'DA_const_err', 'DA_varia_err']
    tss = [noDA.timeseries, DA_const_err.timeseries, DA_varia_err.timeseries]

    variables = ['sm_surface', 'sm_rootzone', 'sm_profile']
    # modes = ['absolute','longterm','shortterm']
    modes = [
        'absolute',
    ]

    # ismn.list = ismn.list.iloc[101::]

    i = 0
    for meta, ts_insitu in ismn.iter_stations():
        i += 1
        logging.info('%i/%i' % (i, len(ismn.list)))

        res = pd.DataFrame(meta.copy()).transpose()
        col = meta.ease_col
        row = meta.ease_row

        for var in variables:
            for mode in modes:

                if mode == 'absolute':
                    ts_ref = ts_insitu[var].dropna()
                elif mode == 'mean':
                    ts_ref = calc_anomaly(ts_insitu[var], mode).dropna()
                else:
                    ts_ref = calc_anomaly(
                        ts_insitu[var],
                        method='moving_average',
                        longterm=(mode == 'longterm')).dropna()

                for run, ts_model in zip(runs, tss):

                    ind = (ts_model['snow_mass'][row, col].values == 0) & (
                        ts_model['soil_temp_layer1'][row, col].values > 277.15)
                    ts_mod = ts_model[var][row, col].to_series().loc[ind]
                    ts_mod.index += pd.to_timedelta('2 hours')
                    # TODO: Make sure that time of netcdf file is correct!!

                    if mode == 'absolute':
                        ts_mod = ts_mod.dropna()
                    else:
                        ts_mod = calc_anomaly(
                            ts_mod,
                            method='moving_average',
                            longterm=mode == 'longterm').dropna()

                    tmp = pd.DataFrame({
                        1: ts_ref,
                        2: ts_mod
                    }).loc[t_ana, :].dropna()
                    res['len_' + mode + '_' + var] = len(tmp)

                    r, p = pearsonr(tmp[1], tmp[2])

                    res['corr_' + run + '_' + mode + '_' +
                        var] = r if (r > 0) & (p < 0.01) else np.nan
                    res['rmsd_' + run + '_' + mode + '_' + var] = np.sqrt(
                        ((tmp[1] - tmp[2])**2).mean())
                    res['ubrmsd_' + run + '_' + mode + '_' + var] = np.sqrt(
                        (((tmp[1] - tmp[1].mean()) -
                          (tmp[2] - tmp[2].mean()))**2).mean())

        if (os.path.isfile(result_file) == False):
            res.to_csv(result_file, float_format='%0.4f')
        else:
            res.to_csv(result_file,
                       float_format='%0.4f',
                       mode='a',
                       header=False)
Esempio n. 6
0
def plot_timeseries():

    # Colorado
    # lat = 39.095962936
    # lon = -106.918945312

    # Nebraska
    # lat = 41.203456192
    # lon = -102.249755859

    # New Mexico
    # lat = 31.522361470
    # lon = -108.528442383

    # Oklahoma
    lat = 35.205233348
    lon = -97.910156250

    exp = 'SMAP_EASEv2_M36_NORTH_SCA_SMOSrw_DA'
    domain = 'SMAP_EASEv2_M36_NORTH'

    cal = LDAS_io('incr', 'US_M36_SMOS_DA_calibrated_scaled')
    uncal = LDAS_io('incr', 'US_M36_SMOS_DA_nocal_scaled_pentadal')

    incr_var_cal = (cal.timeseries['srfexc'] + cal.timeseries['rzexc'] -
                    cal.timeseries['catdef']).var(dim='time').values
    incr_var_uncal = (uncal.timeseries['srfexc'] + uncal.timeseries['rzexc'] -
                      uncal.timeseries['catdef']).var(dim='time').values

    col, row = LDAS_io().grid.lonlat2colrow(lon, lat, domain=True)

    title = 'increment variance (calibrated): %.2f        increment variance (uncalibrated): %.2f' % (
        incr_var_cal[row, col], incr_var_uncal[row, col])
    # title = ''

    fontsize = 12

    cal = LDAS_io('ObsFcstAna', 'US_M36_SMOS_DA_calibrated_scaled')
    uncal = LDAS_io('ObsFcstAna', 'US_M36_SMOS_DA_nocal_scaled_pentadal')
    orig = LDAS_io('ObsFcstAna', 'US_M36_SMOS_noDA_unscaled')

    ts_obs_cal = cal.read_ts('obs_obs', lon, lat, species=3, lonlat=True)
    ts_obs_cal.name = 'Tb obs (calibrated)'
    ts_obs_uncal = uncal.read_ts('obs_obs', lon, lat, species=3, lonlat=True)
    ts_obs_uncal.name = 'Tb obs (uncalibrated)'

    ts_obs_orig = orig.read_ts('obs_obs', lon, lat, species=3, lonlat=True)
    ts_obs_orig.name = 'Tb obs (uncalibrated, unscaled)'

    ts_fcst_cal = cal.read_ts('obs_fcst', lon, lat, species=3, lonlat=True)
    ts_fcst_cal.name = 'Tb fcst (calibrated)'
    ts_fcst_uncal = uncal.read_ts('obs_fcst', lon, lat, species=3, lonlat=True)
    ts_fcst_uncal.name = 'Tb fcst (uncalibrated)'

    df = pd.concat(
        (ts_obs_cal, ts_obs_uncal, ts_obs_orig, ts_fcst_cal, ts_fcst_uncal),
        axis=1).dropna()

    plt.figure(figsize=(19, 8))

    ax1 = plt.subplot(211)
    df.plot(ax=ax1,
            ylim=[140, 300],
            xlim=['2010-01-01', '2017-01-01'],
            fontsize=fontsize,
            style=['-', '--', ':', '-', '--'],
            linewidth=2)
    plt.xlabel('')
    plt.title(title, fontsize=fontsize + 2)

    cols = df.columns.values
    for i, col in enumerate(df):
        df[col] = calc_anomaly(df[col], method='ma', longterm=True).values
        if i < 3:
            cols[i] = col[0:7] + 'anomaly ' + col[7::]
        else:
            cols[i] = col[0:7] + ' anomaly' + col[7::]
    df.columns = cols
    df.dropna(inplace=True)

    ax2 = plt.subplot(212, sharex=ax1)
    df.plot(ax=ax2,
            ylim=[-60, 60],
            xlim=['2010-01-01', '2017-01-01'],
            fontsize=fontsize,
            style=['-', '--', ':', '-', '--'],
            linewidth=2)
    plt.xlabel('')

    plt.tight_layout()
    plt.show()
Esempio n. 7
0
def calc_lagged_corr():

    fout = Path('/Users/u0116961/Documents/work/deforestation_paper/lagged_corr_w_sif.csv')

    ds_lai = io('LAI')
    ds_vod = io('SMOS_IC')
    ds_met = io('MERRA2')
    ds_sif = io('SIF')

    date_from = '2010-01-01'
    date_to = '2019-12-31'

    for i, val in ds_lai.lut.iterrows():

        print(f'{i} / {len(ds_lai.lut)}')

        lai = ds_lai.read('LAI', i, date_from=date_from, date_to=date_to).dropna()
        if len(lai) == 0:
            continue
        vod = ds_vod.read('VOD', i, date_from=date_from, date_to=date_to)

        if (len(lai)>0) & (len(vod)>0):
            invalid = (ds_vod.read('Flags', i, date_from=date_from, date_to=date_to) > 0) | \
                      (ds_vod.read('RMSE', i, date_from=date_from, date_to=date_to) > 8) | \
                      (ds_vod.read('VOD_StdErr', i, date_from=date_from, date_to=date_to) > 1.2)
            vod[invalid] = np.nan
            vod = vod.dropna()
        if len(vod) == 0:
            continue

        sif = ds_sif.read('sif_dc', i, date_from=date_from, date_to=date_to)
        invalid = (ds_sif.read('n', i, date_from=date_from, date_to=date_to) <= 1) | \
                  (ds_sif.read('cloud_fraction', i, date_from=date_from, date_to=date_to) > 0.7)
        sif[invalid] = np.nan
        sif = sif.dropna()
        if len(sif) == 0:
            continue

        df_veg = pd.concat((lai, vod, sif), axis=1, keys=['LAI', 'VOD', 'SIF']).resample('M').mean().dropna()
        for col in df_veg:
            df_veg[f'{col}_anom'] = calc_anomaly(df_veg[col], method='harmonic', longterm=True, n=3)

        temp = ds_met.read('T2M', i, date_from=date_from, date_to=date_to)
        prec = ds_met.read('PRECTOTLAND', i, date_from=date_from, date_to=date_to)
        rad = ds_met.read('LWLAND', i, date_from=date_from, date_to=date_to) + \
              ds_met.read('SWLAND', i, date_from=date_from, date_to=date_to)
        df_met = pd.concat((temp,prec,rad), axis=1, keys=['T','P','R']).resample('M').mean().dropna()
        if len(df_met) == 0:
            continue

        df_met['T_anom'] = calc_anomaly(df_met['T'], method='harmonic', longterm=True, n=3)
        df_met['P_anom'] = calc_anomaly(df_met['P'], method='harmonic', longterm=True, n=3)
        df_met['R_anom'] = calc_anomaly(df_met['R'], method='harmonic', longterm=True, n=3)

        tmp_df_met = df_met.copy()
        tmp_df_veg = df_veg.reindex(df_met.index).copy()
        tmp_df_veg.columns = tmp_df_veg.columns + '_nolag'
        tmp_df_met = pd.concat((tmp_df_met, tmp_df_veg), axis=1)

        tmp_df_met.index = np.arange(len(df_met))

        res = pd.DataFrame(index=(i,))
        for lag in np.arange(-6,7):
            tmp_df_veg = df_veg.reindex(df_met.index)
            tmp_df_veg.index = np.arange(len(tmp_df_veg)) + lag
            corr = pd.concat((tmp_df_met, tmp_df_veg), axis=1).corr()
            res[f'R_LAI_T_{lag}'] = corr['R']['LAI']
            res[f'R_LAI_P_{lag}'] = corr['T']['LAI']
            res[f'R_LAI_R_{lag}'] = corr['P']['LAI']
            res[f'R_VOD_T_{lag}'] = corr['R']['VOD']
            res[f'R_VOD_P_{lag}'] = corr['T']['VOD']
            res[f'R_VOD_R_{lag}'] = corr['P']['VOD']
            res[f'R_SIF_T_{lag}'] = corr['R']['SIF']
            res[f'R_SIF_P_{lag}'] = corr['T']['SIF']
            res[f'R_SIF_R_{lag}'] = corr['P']['SIF']

            res[f'R_LAI_VOD_{lag}'] = corr['LAI_nolag']['VOD']
            res[f'R_LAI_SIF_{lag}'] = corr['LAI_nolag']['SIF']
            res[f'R_VOD_SIF_{lag}'] = corr['VOD_nolag']['SIF']

            res[f'R_anom_LAI_T_{lag}'] = corr['R_anom']['LAI_anom']
            res[f'R_anom_LAI_P_{lag}'] = corr['T_anom']['LAI_anom']
            res[f'R_anom_LAI_R_{lag}'] = corr['P_anom']['LAI_anom']
            res[f'R_anom_VOD_T_{lag}'] = corr['R_anom']['VOD_anom']
            res[f'R_anom_VOD_P_{lag}'] = corr['T_anom']['VOD_anom']
            res[f'R_anom_VOD_R_{lag}'] = corr['P_anom']['VOD_anom']
            res[f'R_anom_SIF_T_{lag}'] = corr['R_anom']['SIF_anom']
            res[f'R_anom_SIF_P_{lag}'] = corr['T_anom']['SIF_anom']
            res[f'R_anom_SIF_R_{lag}'] = corr['P_anom']['SIF_anom']

            res[f'R_anom_LAI_VOD_{lag}'] = corr['LAI_anom_nolag']['VOD_anom']
            res[f'R_anom_LAI_SIF_{lag}'] = corr['LAI_anom_nolag']['SIF_anom']
            res[f'R_anom_VOD_SIF_{lag}'] = corr['VOD_anom_nolag']['SIF_anom']

        if fout.exists():
            res.to_csv(fout, float_format='%0.4f', mode='a', header=False)
        else:
            res.to_csv(fout, float_format='%0.4f')
Esempio n. 8
0
def run():

    exp = 'US_M36_SMOS40_noDA_cal_scaled'

    io = LDAS_io('ObsFcstAna', exp)

    froot = r"D:\data_sets\LDAS_runs" + "\\" + exp + "\\obs_err"
    fbase = 'SMOS_fit_Tb_'

    dtype = template_error_Tb40()[0]

    angles = np.array([
        40.,
    ])
    orbits = ['A', 'D']

    tiles = io.grid.tilecoord['tile_id'].values.astype('int32')
    ind_lat = io.grid.tilecoord.loc[:,
                                    'j_indg'].values - io.grid.tilegrids.loc[
                                        'domain', 'j_offg']
    ind_lon = io.grid.tilecoord.loc[:,
                                    'i_indg'].values - io.grid.tilegrids.loc[
                                        'domain', 'i_offg']

    template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32')
    template['lon'] = io.grid.tilecoord['com_lon'].values.astype('float32')
    template['lat'] = io.grid.tilecoord['com_lat'].values.astype('float32')

    modes = np.array([0, 0])
    sdate = np.array([2010, 1, 1, 0, 0])
    edate = np.array([2016, 12, 31, 0, 0])
    lengths = np.array([len(tiles),
                        len(angles)])  # tiles, incidence angles, whatever

    dims = io.timeseries['obs_obs'].shape

    obs_errstd = np.full(dims[0:-1], 4.)

    # ----- Calculate anomalies -----
    cnt = 0
    for spc in np.arange(dims[0]):
        for lat in np.arange(dims[1]):
            for lon in np.arange(dims[2]):
                cnt += 1
                logging.info('%i / %i' % (cnt, np.prod(dims[0:-1])))

                try:
                    obs = calc_anomaly(io.timeseries['obs_obs'][
                        spc, lat, lon, :].to_dataframe()['obs_obs'],
                                       method='moving_average',
                                       longterm=True)
                    fcst = calc_anomaly(io.timeseries['obs_fcst'][
                        spc, lat, lon, :].to_dataframe()['obs_fcst'],
                                        method='moving_average',
                                        longterm=True)
                    fcst_errvar = np.nanmean(
                        io.timeseries['obs_fcstvar'][spc, lat, lon, :].values)

                    tmp_obs_errstd = (((obs - fcst)**2).mean() -
                                      fcst_errvar)**0.5
                    if not np.isnan(tmp_obs_errstd):
                        obs_errstd[spc, lat, lon] = tmp_obs_errstd

                except:
                    pass

    np.place(obs_errstd, obs_errstd < 0, 0)
    np.place(obs_errstd, obs_errstd > 20, 20)

    # ----- write output files -----
    for orb in orbits:
        # !!! inconsistent with the definition in the obs_paramfile (species) !!!
        modes[0] = 1 if orb == 'A' else 0

        res = template.copy()

        spc = 0 if orb == 'A' else 1
        res['err_Tbh'] = obs_errstd[spc, ind_lat, ind_lon]

        spc = 2 if orb == 'A' else 3
        res['err_Tbv'] = obs_errstd[spc, ind_lat, ind_lon]

        fname = os.path.join(froot, fbase + orb + '.bin')

        fid = open(fname, 'wb')
        io.write_fortran_block(fid, modes)
        io.write_fortran_block(fid, sdate)
        io.write_fortran_block(fid, edate)
        io.write_fortran_block(fid, lengths)
        io.write_fortran_block(fid, angles)

        for f in res.columns.values:
            io.write_fortran_block(fid, res[f].values)
        fid.close()
Esempio n. 9
0
def run(part):

    parts = 15

    smos = SMOS_io()
    ismn = ISMN_io()
    ascat = HSAF_io(ext=None)
    mswep = MSWEP_io()

    # Median Q from MadKF API/CONUS run.
    Q_avg = 12.
    R_avg = 74.

    # Select only SCAN and USCRN
    ismn.list = ismn.list[(ismn.list.network == 'SCAN') |
                          (ismn.list.network == 'USCRN')]
    ismn.list.index = np.arange(len(ismn.list))

    # Split station list in 4 parts for parallelization
    subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int')
    subs[-1] = len(ismn.list)
    start = subs[part - 1]
    end = subs[part]
    ismn.list = ismn.list.iloc[start:end, :]

    if platform.system() == 'Windows':
        result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS', 'ismn_eval',
                                   'result_part%i.csv' % part)
    elif platform.system() == 'Linux':
        result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046',
                                   'output', 'MadKF', 'CONUS', 'ismn_eval',
                                   'result_part%i.csv' % part)
    else:
        result_file = os.path.join('/', 'work', 'MadKF', 'CONUS', 'ismn_eval',
                                   'parts2', 'result_part%i.csv' % part)

    dt = ['2010-01-01', '2015-12-31']

    for cnt, (station,
              insitu) in enumerate(ismn.iter_stations(surf_depth=0.1)):

        # station = ismn.list.loc[978,:]
        # insitu = ismn.read_first_surface_layer('SCAN','Los_Lunas_Pmc')

        print('%i / %i' % (cnt, len(ismn.list)))

        # if True:
        try:
            gpi = lonlat2gpi(station.lon, station.lat, mswep.grid)
            mswep_idx = mswep.grid.index[mswep.grid.dgg_gpi == gpi][0]
            smos_gpi = mswep.grid.loc[mswep_idx, 'smos_gpi']

            precip = mswep.read(mswep_idx)
            sm_ascat = ascat.read(gpi)
            sm_smos = smos.read(smos_gpi) * 100.

            if (precip is None) | (sm_ascat is None) | (sm_smos is None) | (
                    insitu is None):
                continue

            precip = calc_anomaly(precip[dt[0]:dt[1]],
                                  method='moving_average',
                                  longterm=False)
            sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]],
                                    method='moving_average',
                                    longterm=False)
            sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]],
                                   method='moving_average',
                                   longterm=False)
            insitu = calc_anomaly(insitu[dt[0]:dt[1]].resample('1d').first(),
                                  method='moving_average',
                                  longterm=False).tz_localize(None)

            df = pd.DataFrame({
                1: precip,
                2: sm_ascat,
                3: sm_smos,
                4: insitu
            },
                              index=pd.date_range(dt[0], dt[1]))
            df.loc[np.isnan(df[1]), 1] = 0.
            n = len(df)

            if len(df.dropna()) < 50:
                continue
            gamma = mswep.grid.loc[mswep_idx, 'gamma']
            api = API(gamma=gamma)

            # --- OL run ---
            x_OL = np.full(n, np.nan)
            model = deepcopy(api)
            for t, f in enumerate(precip.values):
                x = model.step(f)
                x_OL[t] = x

            # ----- Calculate uncertainties -----
            # convert (static) forcing to model uncertainty
            P_avg = Q_avg / (1 - gamma**2)

            # calculate TCA based uncertainty and scaling coefficients
            tmp_df = pd.DataFrame({
                1: x_OL,
                2: sm_ascat,
                3: sm_smos
            },
                                  index=pd.date_range(dt[0], dt[1])).dropna()
            snr, r_tc, err, beta = tc(tmp_df)
            P_TC = err[0]**2
            Q_TC = P_TC * (1 - gamma**2)
            R_TC = (err[1] / beta[1])**2
            H_TC = beta[1]

            # Calculate RMSD based uncertainty
            R_rmsd = (np.nanmean(
                (tmp_df[1].values - H_TC * tmp_df[2].values)**2) - P_avg)
            if R_rmsd < 0:
                R_rmsd *= -1
            # -----------------------------------

            # ----- Run KF using TCA-based uncertainties -----
            api_kf = API(gamma=gamma, Q=Q_TC)
            x_kf, P, R_innov_kf, checkvar_kf, K_kf = \
                KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC)

            # ----- Run EnKF using static uncertainties -----
            forc_pert = ['normal', 'additive', Q_avg]
            obs_pert = ['normal', 'additive', R_avg]
            x_avg, P, R_innov_avg, checkvar_avg, K_avg = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) -----
            # forc_pert = ['normal', 'additive', Q_avg]
            # obs_pert = ['normal', 'additive', R_rmsd]
            # x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \
            #     EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run MadKF -----
            cnt = 0
            checkvar_madkf = 9999.
            while ((checkvar_madkf < 0.95) |
                   (checkvar_madkf > 1.05)) & (cnt < 5):
                cnt += 1
                tmp_x_madkf, P_madkf, R_madkf, Q_madkf, H_madkf, R_innov_madkf, tmp_checkvar_madkf, K_madkf = \
                    MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20)
                if abs(1 - tmp_checkvar_madkf) < abs(1 - checkvar_madkf):
                    checkvar_madkf = tmp_checkvar_madkf
                    x_madkf = tmp_x_madkf

            df['x_ol'] = x_OL
            df['x_kf'] = x_kf
            df['x_avg'] = x_avg
            # df['x_rmsd'] = x_rmsd
            df['x_madkf'] = x_madkf

            # tc_ol = tc(df[[4,3,'x_ol']])
            # tc_kf = tc(df[[4,3,'x_kf']])
            # tc_avg = tc(df[[4,3,'x_avg']])
            # tc_rmsd = tc(df[[4,3,'x_rmsd']])
            # tc_madkf = tc(df[[4,3,'x_madkf']])

            ci_l_ol, ci_m_ol, ci_u_ol = bootstrap_tc(df[[4, 3, 'x_ol']])
            ci_l_kf, ci_m_kf, ci_u_kf = bootstrap_tc(df[[4, 3, 'x_kf']])
            ci_l_avg, ci_m_avg, ci_u_avg = bootstrap_tc(df[[4, 3, 'x_avg']])
            # ci_l_rmsd, ci_m_rmsd, ci_u_rmsd = bootstrap_tc(df[[4,3,'x_rmsd']])
            ci_l_madkf, ci_m_madkf, ci_u_madkf = bootstrap_tc(
                df[[4, 3, 'x_madkf']])

            corr = df.dropna().corr()
            n_all = len(df.dropna())

            result = pd.DataFrame(
                {
                    'lon': station.lon,
                    'lat': station.lat,
                    'network': station.network,
                    'station': station.station,
                    'gpi': gpi,
                    'n_all': n_all,
                    'Q_est_madkf': Q_madkf,
                    'R_est_madkf': R_madkf,
                    'corr_ol': corr[4]['x_ol'],
                    'corr_kf': corr[4]['x_kf'],
                    'corr_avg': corr[4]['x_avg'],
                    # 'corr_rmsd': corr[4]['x_rmsd'],
                    'corr_madkf': corr[4]['x_madkf'],
                    # 'snr_ol': tc_ol[0][2],
                    # 'snr_kf': tc_kf[0][2],
                    # 'snr_avg': tc_avg[0][2],
                    # 'snr_rmsd': tc_rmsd[0][2],
                    # 'snr_madkf': tc_madkf[0][2],
                    # 'r_ol': tc_ol[1][2],
                    # 'r_kf': tc_kf[1][2],
                    # 'r_avg': tc_avg[1][2],
                    # 'r_rmsd': tc_rmsd[1][2],
                    # 'r_madkf': tc_madkf[1][2],
                    # 'rmse_kf': tc_kf[2][2],
                    # 'rmse_avg': tc_avg[2][2],
                    # 'rmse_rmsd': tc_rmsd[2][2],
                    # 'rmse_madkf': tc_madkf[2][2],
                    # 'rmse_ol': tc_ol[2][2],
                    'r_ol_l': ci_l_ol,
                    'r_ol_m': ci_m_ol,
                    'r_ol_u': ci_u_ol,
                    'r_kf_l': ci_l_kf,
                    'r_kf_m': ci_m_kf,
                    'r_kf_u': ci_u_kf,
                    'r_avg_l': ci_l_avg,
                    'r_avg_m': ci_m_avg,
                    'r_avg_u': ci_u_avg,
                    # 'r_rmsd_l': ci_l_rmsd,
                    # 'r_rmsd_m': ci_m_rmsd,
                    # 'r_rmsd_u': ci_u_rmsd,
                    'r_madkf_l': ci_l_madkf,
                    'r_madkf_m': ci_m_madkf,
                    'r_madkf_u': ci_u_madkf,
                    'checkvar_kf': checkvar_kf,
                    'checkvar_avg': checkvar_avg,
                    # 'checkvar_rmsd': checkvar_rmsd,
                    'checkvar_madkf': checkvar_madkf,
                    'R_innov_kf': R_innov_kf,
                    'R_innov_avg': R_innov_avg,
                    # 'R_innov_rmsd': R_innov_rmsd,
                    'R_innov_madkf': R_innov_madkf
                },
                index=(station.name, ))

            if (os.path.isfile(result_file) == False):
                result.to_csv(result_file, float_format='%0.4f')
            else:
                result.to_csv(result_file,
                              float_format='%0.4f',
                              mode='a',
                              header=False)
        except:
            print('GPI failed.')
            continue

    ascat.close()
    mswep.close()
Esempio n. 10
0
def run(cell=None, gpi=None):

    if (cell is None) and (gpi is None):
        print('No cell/gpi specified.')
        return

    smos = SMOS_io()
    ascat = HSAF_io(ext=None)
    mswep = MSWEP_io()

    if gpi is not None:
        cell = mswep.gpi2cell(gpi)

    # Median Q/R from TC run.
    Q_avg = 12.
    R_avg = 74.

    if platform.system() == 'Windows':
        result_file = os.path.join('D:', 'work', 'MadKF', 'CONUS',
                                   'result_%04i.csv' % cell)
    else:
        result_file = os.path.join('/', 'scratch', 'leuven', '320', 'vsc32046',
                                   'output', 'MadKF', 'CONUS',
                                   'result_%04i.csv' % cell)

    dt = ['2010-01-01', '2015-12-31']

    for data, info in mswep.iter_cell(cell, gpis=gpi):

        # print info.name
        # if True:
        try:
            precip = mswep.read(info.name)
            sm_ascat = ascat.read(info.dgg_gpi)
            sm_smos = smos.read(info.smos_gpi) * 100.

            if (precip is None) | (sm_ascat is None) | (sm_smos is None):
                continue

            precip = calc_anomaly(precip[dt[0]:dt[1]],
                                  method='moving_average',
                                  longterm=False)
            sm_ascat = calc_anomaly(sm_ascat[dt[0]:dt[1]],
                                    method='moving_average',
                                    longterm=False)
            sm_smos = calc_anomaly(sm_smos[dt[0]:dt[1]],
                                   method='moving_average',
                                   longterm=False)

            api = API(gamma=info.gamma)

            # Regularize time steps
            df = pd.DataFrame({
                1: precip,
                2: sm_ascat,
                3: sm_smos
            },
                              index=pd.date_range(dt[0], dt[1]))

            n_inv_precip = len(np.where(np.isnan(df[1]))[0])
            n_inv_ascat = len(np.where(np.isnan(df[2]))[0])
            n_inv_smos = len(np.where(np.isnan(df[3]))[0])
            n_inv_asc_smo = len(np.where(np.isnan(df[2]) & np.isnan(df[3]))[0])

            df.loc[np.isnan(df[1]), 1] = 0.

            # --- get OL ts  ---
            OL = np.full(len(precip), np.nan)
            model = API(gamma=info.gamma)
            for t, f in enumerate(df[1].values):
                x = model.step(f)
                OL[t] = x

            # collocate OL and satellite data sets.
            df2 = pd.DataFrame({
                1: OL,
                2: sm_ascat,
                3: sm_smos
            },
                               index=pd.date_range(dt[0], dt[1])).dropna()

            # ----- Calculate uncertainties -----
            # convert (static) forcing to model uncertainty
            P_avg = Q_avg / (1 - info.gamma**2)

            # calculate TCA based uncertainty and scaling coefficients
            snr, err, beta = tcol_snr(df2[1].values, df2[2].values,
                                      df2[3].values)
            P_TC = err[0]**2
            Q_TC = P_TC * (1 - info.gamma**2)
            R_TC = (err[1] / beta[1])**2
            H_TC = beta[1]

            # Calculate RMSD based uncertainty
            R_rmsd = (np.nanmean(
                (df2[1].values - H_TC * df2[2].values)**2) - P_avg)
            if R_rmsd < 0:
                R_rmsd *= -1
            # -----------------------------------

            # ----- Run KF using TCA-based uncertainties -----
            api_kf = API(gamma=info.gamma, Q=Q_TC)
            R_2D = np.array([(err[1] / beta[1])**2, (err[2] / beta[2])**2])
            H_2D = np.array([beta[1]**(-1), beta[2]**(-1)])
            x_2d, P, checkvar1_2d, checkvar2_2d, checkvar3_2d, K1_2d, K2_2d = \
                KF_2D(api_kf, df[1].values.copy(), df[2].values.copy(), df[3].values.copy(), R_2D, H=H_2D)

            # ----- Run KF using TCA-based uncertainties -----
            api_kf = API(gamma=info.gamma, Q=Q_TC)
            x_kf, P, R_innov_kf, checkvar_kf, K_kf = \
                KF(api_kf, df[1].values.copy(), df[2].values.copy(), R_TC, H=H_TC)

            # ----- Run EnKF using TCA-based uncertainties -----
            forc_pert = ['normal', 'additive', Q_TC]
            obs_pert = ['normal', 'additive', R_TC]
            x_tc, P, R_innov_tc, checkvar_tc, K_tc = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run EnKF using static uncertainties -----
            forc_pert = ['normal', 'additive', Q_avg]
            obs_pert = ['normal', 'additive', R_avg]
            x_avg, P, R_innov_avg, checkvar_avg, K_avg = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)

            # ----- Run EnKF using RMSD-based uncertainties (corrected for model uncertainty) -----
            t = timeit.default_timer()
            forc_pert = ['normal', 'additive', Q_avg]
            obs_pert = ['normal', 'additive', R_rmsd]
            x_rmsd, P, R_innov_rmsd, checkvar_rmsd, K_rmsd = \
                EnKF(api, df[1].values.copy(), df[2].values.copy(), forc_pert, obs_pert, H=H_TC, n_ens=50)
            t_enkf = timeit.default_timer() - t

            # ----- Run MadKF -----
            t = timeit.default_timer()
            x_madkf, P, R_madkf, Q_madkf, H_madkf, R_innov_madkf, checkvar_madkf, K_madkf = \
                MadKF(api, df[1].values.copy(), df[2].values.copy(), n_ens=100, n_iter=20)
            t_madkf = timeit.default_timer() - t

            # TC evaluation of assimilation results
            # df3 = pd.DataFrame({1: x_tc, 2: x_avg, 3: x_rmsd, 4: x_madkf, 5: sm_ascat, 6: sm_smos}, index=pd.date_range(dt[0], dt[1])).dropna()
            #
            # rmse_ana_tc = tcol_snr(df3[1].values, df3[5].values, df3[6].values)[1][0]
            # rmse_ana_avg = tcol_snr(df3[2].values, df3[5].values, df3[6].values)[1][0]
            # rmse_ana_rmsd = tcol_snr(df3[3].values, df3[5].values, df3[6].values)[1][0]
            # rmse_ana_madkf = tcol_snr(df3[4].values, df3[5].values, df3[6].values)[1][0]

            result = pd.DataFrame(
                {
                    'lon': info.lon,
                    'lat': info.lat,
                    'col': info.col,
                    'row': info.row,
                    'P_tc': P_TC,
                    'Q_tc': Q_TC,
                    'R_tc': R_TC,
                    'H_tc': H_TC,
                    'K_tc': K_tc,
                    'R_innov_tc': R_innov_tc,
                    'checkvar_tc': checkvar_tc,
                    'K_kf': K_kf,
                    'R_innov_kf': R_innov_kf,
                    'checkvar_kf': checkvar_kf,
                    'K1_2d': K1_2d,
                    'K2_2d': K2_2d,
                    'checkvar1_2d': checkvar1_2d,
                    'checkvar2_2d': checkvar2_2d,
                    'checkvar3_2d': checkvar3_2d,
                    'P_avg': P_avg,
                    'Q_avg': Q_avg,
                    'R_avg': R_avg,
                    'K_avg': K_avg,
                    'R_innov_avg': R_innov_avg,
                    'checkvar_avg': checkvar_avg,
                    'R_rmsd': R_rmsd,
                    'K_rmsd': K_rmsd,
                    'R_innov_rmsd': R_innov_rmsd,
                    'checkvar_rmsd': checkvar_rmsd,
                    'P_madkf': Q_madkf / (1 - info.gamma**2),
                    'Q_madkf': Q_madkf,
                    'R_madkf': R_madkf,
                    'H_madkf': H_madkf,
                    'K_madkf': K_madkf,
                    'R_innov_madkf': R_innov_madkf,
                    'checkvar_madkf': checkvar_madkf,
                    't_enkf': t_enkf,
                    't_madkf': t_madkf,
                    'n_inv_precip': n_inv_precip,
                    'n_inv_ascat': n_inv_ascat,
                    'n_inv_smos': n_inv_smos,
                    'n_inv_asc_smo': n_inv_asc_smo
                },
                index=(info.name, ))

            if (os.path.isfile(result_file) == False):
                result.to_csv(result_file, float_format='%0.4f')
            else:
                result.to_csv(result_file,
                              float_format='%0.4f',
                              mode='a',
                              header=False)
        except:
            print('GPI failed.')
            continue

    ascat.close()
    mswep.close()
Esempio n. 11
0
def run(part):

    parts = 6

    result_file = r'D:\work\ESA_CCI_SM\ismn_r2\ismn_r2_part%i.csv' % part

    cci = CCISM_io()
    ismn = ISMN_io()

    # ismn.list = ismn.list.iloc[100:120]

    # Split station list in 4 parts for parallelization
    subs = (np.arange(parts + 1) * len(ismn.list) / parts).astype('int')
    subs[-1] = len(ismn.list)
    start = subs[part - 1]
    end = subs[part]
    ismn.list = ismn.list.iloc[start:end, :]

    freq = ['abs', 'anom']

    res = ismn.list.copy()
    res.drop(['ease_col', 'ease_row'], axis='columns', inplace=True)
    res['r_abs'] = np.nan
    res['r_anom'] = np.nan

    for i, (meta, ts_insitu) in enumerate(ismn.iter_stations()):
        print('%i/%i (Proc %i)' % (i, len(ismn.list), part))

        if ts_insitu is None:
            print('No in situ data for ' + meta.network + ' / ' + meta.station)
            continue
        ts_insitu = ts_insitu['2007-10-01':'2014-12-31']
        if len(ts_insitu) < 10:
            print('No in situ data for ' + meta.network + ' / ' + meta.station)
            continue
        df_insitu = pd.DataFrame(ts_insitu).dropna()
        df_insitu_anom = pd.DataFrame(calc_anomaly(ts_insitu)).dropna()

        df_cci = cci.read(meta.lon,
                          meta.lat,
                          version='v04.4',
                          mode=['ACTIVE', 'PASSIVE']).dropna()
        if len(df_cci) < 10:
            print('No CCI data for ' + meta.network + ' / ' + meta.station)
            continue

        for f in freq:
            if f == 'abs':
                matched = df_match(df_cci, df_insitu, window=0.5)
            else:
                df_cci.loc[:, 'ACTIVE_v04.4'] = calc_anomaly(
                    df_cci['ACTIVE_v04.4'])
                df_cci.loc[:, 'PASSIVE_v04.4'] = calc_anomaly(
                    df_cci['PASSIVE_v04.4'])
                df_cci.dropna(inplace=True)
                if (len(df_cci) < 10) | (len(df_insitu_anom) < 10):
                    print('No in situ or CCI anomaly data for ' +
                          meta.network + ' / ' + meta.station)
                    continue
                matched = df_match(df_cci, df_insitu_anom, window=0.5)

            data = df_cci.join(matched['insitu']).dropna()

            if len(data) < 100:
                continue

            vals = data[['insitu', 'ACTIVE_v04.4']].values
            c1, p1 = pearsonr(vals[:, 0], vals[:, 1])
            vals = data[['insitu', 'PASSIVE_v04.4']].values
            c2, p2 = pearsonr(vals[:, 0], vals[:, 1])
            vals = data[['ACTIVE_v04.4', 'PASSIVE_v04.4']].values
            c3, p3 = pearsonr(vals[:, 0], vals[:, 1])

            if (c1 < 0) | (c2 < 0) | (c3 < 0) | (p1 > 0.05) | (p2 > 0.05) | (
                    p3 > 0.05):
                continue

            res.loc[meta.name, 'r_' + f] = np.sqrt(tc(data)[1][2])

    res.to_csv(result_file, float_format='%0.4f')