Beispiel #1
0
def plot_ensvar_ratio(dir_out):

    dtype, hdr, length = template_error_Tb40()

    f = plt.figure(figsize=(25, 12))

    fontsize = 14
    cb = [-10, 10]
    cmap = cc.cm.bjy

    modes = ['4K', 'abs', 'anom_lst', 'anom_lt', 'anom_st']
    titles = [
        '4K benchmark', 'Total signal', 'Anomalies', 'LF signal', 'HF signal'
    ]
    labels = [
        '(V-pol, Asc.)', '(V-pol, Dsc.)', '(H-pol, Asc.)', '(H-pol, Dsc.)'
    ]

    ios = [
        GEOSldas_io('ObsFcstAna',
                    exp=f'NLv4_M36_US_DA_SMAP_Pcorr_{mode}').timeseries
        for mode in modes
    ]
    io_ol = GEOSldas_io('ObsFcstAna', exp=f'NLv4_M36_US_OL_Pcorr').timeseries

    grid = GEOSldas_io().grid

    for i, (io_da, tit) in enumerate(zip(ios, titles)):

        for spc, label in zip(range(4), labels):

            tmp1 = io_da['obs_obsvar'][:, spc, :, :].values
            tmp2 = io_ol['obs_fcstvar'][:, spc, :, :].values
            avg = np.nanmean(tmp1 / tmp2, axis=0)
            # ratio = io_da['obs_obsvar'] / io_ol['obs_fcstvar']
            # ratio = io['obs_obsvar']
            # avg = ratio.mean(dim='time', skipna=True)

        plt.subplot(4, 5, spc * 5 + i + 1)
        img = plot_latlon_img(10 * np.log10(avg),
                              io_da.lon.values,
                              io_da.lat.values,
                              fontsize=fontsize,
                              cbrange=cb,
                              cmap=cmap,
                              plot_cb=False)
        if spc == 0:
            plt.title(tit, fontsize=fontsize)
        if i == 0:
            plt.ylabel(label, fontsize=fontsize)

    plot_centered_cbar(f, img, 5, fontsize=fontsize - 2)

    fout = dir_out / 'ensvar_ratio.png'
    f.savefig(fout, dpi=300, bbox_inches='tight')
    plt.close()
Beispiel #2
0
def plot_ObsFcstAna_image(species=8):

    io = GEOSldas_io('ObsFcstAna')
    img = io.read_image(2011, 7, 10, 0, 0)

    img = img[img['obs_species']==species]

    tag = 'innov'
    img[tag] = img['obs_obs']-img['obs_fcst']
    img.index = img['obs_tilenum'].values
    plot_ease_img(img, tag)
Beispiel #3
0
def plot_model_image():

    io = GEOSldas_io('xhourly')
    img = io.read_image(2011, 4, 20, 10, 30)

    # tag = 'precipitation_total_surface_flux'
    tag = 'snow_mass'
    # cbrange = (0,0.0001)
    # cbrange = (0,0.6)
    cbrange = (0,100)

    plot_ease_img(img, tag, cbrange=cbrange)
Beispiel #4
0
def plot_fcst_uncertainties():

    io = GEOSldas_io('ObsFcstAna',exp='US_M36_SMOS40_noDA_cal_scaled')

    lons = io.images['lon'].values
    lats = io.images['lat'].values

    figsize=(18,9)
    f = plt.figure(num=None, figsize=figsize, dpi=90, facecolor='w', edgecolor='k')

    ax = f.add_subplot(2,2,1)
    obserr = io.images.sel(species=1)['obs_fcstvar'].mean('time').values
    plot_xarr_img(obserr,lons,lats,cbrange=[0,50])
    ax.set_title('Asc / H-pol', fontsize=16)

    ax = f.add_subplot(2,2,2)
    obserr = io.images.sel(species=2)['obs_fcstvar'].mean('time').values
    plot_xarr_img(obserr,lons,lats,cbrange=[0,50])
    ax.set_title('Dsc / H-pol', fontsize=16)

    ax = f.add_subplot(2,2,3)
    obserr = io.images.sel(species=3)['obs_fcstvar'].mean('time').values
    plot_xarr_img(obserr,lons,lats,cbrange=[0,50])
    ax.set_title('Asc / V-pol', fontsize=16)

    ax = f.add_subplot(2,2,4)
    obserr = io.images.sel(species=4)['obs_fcstvar'].mean('time').values
    plot_xarr_img(obserr,lons,lats,cbrange=[0,50])
    ax.set_title('Dsc / V-pol', fontsize=16)

    plt.tight_layout()
    plt.show()
def plot_perturbations():

    root = Path('/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/')

    pc = 'noPcorr'

    io = GEOSldas_io('ObsFcstAna')

    lut = pd.read_csv(Paths().lut, index_col=0)
    ind = np.vectorize(io.grid.colrow2tilenum)(lut.ease2_col, lut.ease2_row, local=False)

    for mode in ['abs', 'anom_lst', 'anom_lt', 'anom_st']:
    # for mode in ['abs']:

        fA = root / 'observation_perturbations' / f'{pc}' / f'{mode}' / 'SMOS_fit_Tb_A.bin'
        fD = root / 'observation_perturbations' / f'{pc}' / f'{mode}' / 'SMOS_fit_Tb_D.bin'

        dir_out = root / 'plots' / 'obs_pert' / f'{pc}'
        if not dir_out.exists():
            Path.mkdir(dir_out, parents=True)

        dtype, hdr, length = template_error_Tb40()

        imgA = io.read_fortran_binary(fA, dtype, hdr=hdr, length=length)
        imgD = io.read_fortran_binary(fD, dtype, hdr=hdr, length=length)

        imgA.index += 1
        imgD.index += 1

        cbrange = [0,15]

        plt.figure(figsize=(19, 11))

        plt.subplot(221)
        plot_ease_img2(imgA.reindex(ind),'err_Tbh', cbrange=cbrange, title='H-pol (Asc.)', io=io)
        plt.subplot(222)
        plot_ease_img2(imgA.reindex(ind),'err_Tbv', cbrange=cbrange, title='V-pol (Asc.)', io=io)
        plt.subplot(223)
        plot_ease_img2(imgD.reindex(ind),'err_Tbh', cbrange=cbrange, title='H-pol (Dsc.)', io=io)
        plt.subplot(224)
        plot_ease_img2(imgD.reindex(ind),'err_Tbv', cbrange=cbrange, title='V-pol (Dsc.)', io=io)

        plt.savefig(dir_out / f'{mode}.png', dpi=300, bbox_inches='tight')
        plt.close()
def calc_ens_var(root):

    resdir = root / 'ens_vars' / 'Pcorr'

    if not resdir.exists():
        Path.mkdir(resdir, parents=True)

    exp_ol = 'NLv4_M36_US_OL_Pcorr'
    # exp_da = 'NLv4_M36_US_OL_Pcorr'

    # param = 'ObsFcstAnaEns'
    param = 'ObsFcstAna'

    io_ol = GEOSldas_io(param, exp_ol)
    # io_da = GEOSldas_io(param, exp_da)

    res = pd.DataFrame(index=io_ol.grid.tilecoord.index.values,
                       columns=['col', 'row'] + [f'obs_var_spc{spc}' for spc in [1,2,3,4]] \
                                              + [f'fcst_var_spc{spc}' for spc in [1,2,3,4]] \
                                              + [f'ana_var_spc{spc}' for spc in [1,2,3,4]])

    ids = io_ol.grid.tilecoord['tile_id'].values
    res.loc[:, 'col'], res.loc[:, 'row'] = np.vectorize(
        io_ol.grid.tileid2colrow)(ids, local_cs=False)

    for cnt, (idx, val) in enumerate(io_ol.grid.tilecoord.iterrows()):
        print('%i / %i' % (cnt, len(res)))

        col, row = io_ol.grid.tileid2colrow(val.tile_id)

        for spc in [1, 2, 3, 4]:
            # ts_fcst = io_ol.read_ts('obs_fcst', col, row, species=spc, lonlat=False).dropna()
            # ts_obs = io_da.read_ts('obs_obs', col, row, species=spc, lonlat=False).dropna()
            # ts_ana = io_da.read_ts('obs_ana', col, row, species=spc, lonlat=False).dropna()
            # if len(ts_ana) == 0:
            #     continue
            # ts_fcst = ts_fcst.reindex(ts_ana.index)
            # ts_obs = ts_obs.reindex(ts_ana.index)
            # res.loc[idx,f'obs_var_spc{spc}'] = ts_obs.var(axis='columns').mean()
            # res.loc[idx,f'fcst_var_spc{spc}'] = ts_fcst.var(axis='columns').mean()
            # res.loc[idx,f'ana_var_spc{spc}'] = ts_ana.var(axis='columns').mean()

            # res.loc[idx,f'obs_var_spc{spc}'] = np.nanmean(io_da.timeseries['obs_obsvar'][:,spc-1,row,col].values)
            # res.loc[idx,f'fcst_var_spc{spc}'] = np.nanmean(io_ol.timeseries['obs_fcstvar'][:,spc-1,row,col].values)
            # res.loc[idx,f'ana_var_spc{spc}'] = np.nanmean(io_da.timeseries['obs_anavar'][:,spc-1,row,col].values)
            res.loc[idx, f'obs_var_spc{spc}'] = np.nanmean(
                io_ol.timeseries['obs_obsvar'][:, spc - 1, row, col].values)
            res.loc[idx, f'fcst_var_spc{spc}'] = np.nanmean(
                io_ol.timeseries['obs_fcstvar'][:, spc - 1, row, col].values)
            res.loc[idx, f'ana_var_spc{spc}'] = np.nanmean(
                io_ol.timeseries['obs_fcstvar'][:, spc - 1, row, col].values)

    fname = resdir / 'ens_var.csv'

    res.to_csv(fname, float_format='%0.8f')
Beispiel #7
0
def plot_ease_img(data,tag,
                  llcrnrlat=24,
                  urcrnrlat=51,
                  llcrnrlon=-128,
                  urcrnrlon=-64,
                  figsize=(20,10),
                  cbrange=(-20,20),
                  cmap='jet',
                  plot_cb=True,
                  title='',
                  fontsize=20):

    io = GEOSldas_io()

    tc = io.grid.tilecoord

    lons,lats = np.meshgrid(io.grid.ease_lons, io.grid.ease_lats)

    img = np.empty(lons.shape, dtype='float32')
    img.fill(None)

    ind_lat = tc.loc[data.index.values,'j_indg']
    ind_lon = tc.loc[data.index.values,'i_indg']

    img[ind_lat,ind_lon] = data[tag]
    img_masked = np.ma.masked_invalid(img)

    # plt.figure(num=None, figsize=figsize, dpi=90, facecolor='w', edgecolor='k')

    m = Basemap(projection='mill',
                llcrnrlat=llcrnrlat,
                urcrnrlat=urcrnrlat,
                llcrnrlon=llcrnrlon,
                urcrnrlon=urcrnrlon,
                resolution='c')

    m.drawcoastlines()
    m.drawcountries()
    m.drawstates()

    im = m.pcolormesh(lons, lats, img_masked, cmap=cmap, latlon=True)

    im.set_clim(vmin=cbrange[0], vmax=cbrange[1])

    if plot_cb is True:
        cb = m.colorbar(im, "bottom", size="7%", pad=0.05)
        for t in cb.ax.get_xticklabels():
            t.set_fontsize(fontsize)
        for t in cb.ax.get_yticklabels():
            t.set_fontsize(fontsize)

    plt.title(title,fontsize=fontsize)
Beispiel #8
0
def plot_freq_components(dir_out):

    ds = GEOSldas_io('ObsFcstAna',
                     exp='NLv4_M36_US_DA_SMAP_Pcorr_LTST').timeseries

    ts = ds['obs_fcst'][:, 0, 50, 120].to_pandas().dropna()

    clim = calc_climatology(ts)

    anom_lt = calc_anom(ts, mode='longterm')
    anom_st = calc_anom(ts, mode='shortterm')
    clim = calc_anom(ts, return_clim=True)

    anom_lst = anom_lt + anom_st
    anom_lst.name = 'Anomalies (HF + LF)'

    f, axes = plt.subplots(figsize=(18, 6), nrows=3, ncols=1, sharex=True)

    fontsize = 11
    df = pd.concat((ts, clim), axis=1)
    df.columns = ['$T_b$ signal', 'Climatology']
    df.plot(ax=axes[0],
            color=['darkorange', 'blue'],
            linewidth=1.7,
            fontsize=fontsize).legend(loc='upper right', fontsize=fontsize)

    df = pd.DataFrame(anom_lst)
    df.plot(ax=axes[1],
            color='crimson',
            ylim=[-18, 18],
            linewidth=1.7,
            fontsize=fontsize).legend(loc='upper right', fontsize=fontsize)
    axes[1].axhline(color='black', linestyle='--', linewidth=0.8)

    df = pd.concat((anom_st, anom_lt), axis=1)
    df.columns = ['HF signal', 'LF signal']
    df.plot(ax=axes[2],
            color=['goldenrod', 'teal'],
            ylim=[-15, 15],
            linewidth=1.7,
            fontsize=fontsize).legend(loc='upper right', fontsize=fontsize)
    axes[2].axhline(color='black', linestyle='--', linewidth=0.8)

    plt.xlabel('')
    plt.minorticks_off()
    plt.xlim('2015-04', '2021,04')
    plt.xticks(fontsize=fontsize)

    f.savefig(dir_out / f'frequency_components.png',
              dpi=300,
              bbox_inches='tight')
    plt.close()
Beispiel #9
0
def plot_innov(spc=8, row=35, col=65):

    ts_scl = GEOSldas_io('ObsFcstAna',exp='NLv4_M36_US_OL_Pcorr_scl_SMAP').timeseries
    ts_usc = GEOSldas_io('ObsFcstAna',exp='NLv4_M36_US_OL_Pcorr_SMAP').timeseries

    plt.figure(figsize=(18,11))

    ax1 = plt.subplot(311)
    df = pd.DataFrame(index=ts_scl.time)
    df['obs'] = ts_scl['obs_obs'].isel(species=spc-1, lat=row, lon=col).values
    df['fcst'] = ts_scl['obs_fcst'].isel(species=spc-1, lat=row, lon=col).values
    df.dropna().plot(ax=ax1)

    ax2 = plt.subplot(312)
    df = pd.DataFrame(index=ts_usc.time)
    df['obs'] = ts_usc['obs_obs'].isel(species=spc-1, lat=row, lon=col).values
    df['fcst'] = ts_usc['obs_fcst'].isel(species=spc-1, lat=row, lon=col).values
    df.dropna().plot(ax=ax2)

    ax3 = plt.subplot(313)
    df = pd.DataFrame(index=ts_usc.time)
    # df['innov'] = ts_scl['obs_obs'].isel(lat=row, lon=col).mean(dim='species').values - ts_scl['obs_fcst'].isel(lat=row, lon=col).mean(dim='species').values
    df['innov'] = ts_scl['obs_obs'].isel(species=spc-1, lat=row, lon=col).values - ts_scl['obs_fcst'].isel(species=spc-1, lat=row, lon=col).values
    # df['innov_unscl'] = ts_scl['obs_obs'].isel(species=spc-1, lat=row, lon=col).values - ts_usc['obs_fcst'].isel(species=spc-1, lat=row, lon=col).values
    df.dropna().plot(ax=ax3)
    plt.axhline(color='black', linestyle='--')

    print(df.mean())


    # print(len(ts_scl['obs_obs'][spc,row,col].dropna('time')))
    # print(len(ts_scl['obs_fcst'][spc,row,col].dropna('time')))
    # print(len(ts_usc['obs_obs'][spc,row,col].dropna('time')))
    # print(len(ts_usc['obs_fcst'][spc,row,col].dropna('time')))

    plt.tight_layout()
    plt.show()

    ts_scl.close()
    ts_usc.close()
Beispiel #10
0
    def generate_station_list(self):

        tmplist = self.io.metadata[[('network', 'val'), ('station', 'val'),
                                    ('latitude', 'val'), ('longitude', 'val')]]
        tmplist.columns = tmplist.columns.droplevel('key')
        tmplist.columns.name = None

        tmplist = tmplist.iloc[np.unique(tmplist.network + tmplist.station,
                                         return_index=True)[1]]
        tmplist.index = np.arange(len(tmplist))

        grid = GEOSldas_io().grid
        vfindcolrow = np.vectorize(grid.lonlat2colrow)
        col, row = vfindcolrow(tmplist.longitude.values,
                               tmplist.latitude.values)
        tmplist['ease_col'] = col
        tmplist['ease_row'] = row

        tmplist['ease_col'] -= self.col_offs
        tmplist['ease_row'] -= self.row_offs

        tmplist.to_csv(self.list_file)
        self.list = tmplist
Beispiel #11
0
def plot_grid_coord_indices():

    io = GEOSldas_io('ObsFcstAna', exp='US_M36_SMOS40_noDA_cal_scaled')

    lats = io.images.lat.values
    lons = io.images.lon.values

    plt.figure(figsize=(10, 5))

    llcrnrlat = 24
    urcrnrlat = 51
    llcrnrlon = -128
    urcrnrlon = -64
    m = Basemap(projection='mill', llcrnrlat=llcrnrlat, urcrnrlat=urcrnrlat, llcrnrlon=llcrnrlon,
                urcrnrlon=urcrnrlon,
                resolution='c')
    m.drawcoastlines(linewidth=0.5)
    m.drawcountries(linewidth=0.5)
    m.drawstates(linewidth=0.1)

    lats = lats[np.arange(0, len(lats), 15)]
    lons = lons[np.arange(0, len(lons), 15)]
    m.drawparallels(lats, labels=[False, False, False, False], linestyle='--', linewidth=1, color='red')
    m.drawmeridians(lons, labels=[False, False, False, False], linestyle='--', linewidth=1, color='red')

    x = np.zeros(len(lons))
    for i, lon in enumerate(lons):
        x[i], tmp = m(lon, lats[0])

    y = np.zeros(len(lats))
    for i, lat in enumerate(lats):
        tmp, y[i] = m(lons[-1], lat)

    plt.xticks(x[0:-1], np.arange(0, len(lons) - 1) * 15)
    plt.yticks(y[1::], np.arange(1, len(lats)) * 15)

    plt.show()
Beispiel #12
0
def plot_statistics(res_path, dir_out):

    res = pd.read_csv(res_path / 'ascat_eval.csv', index_col=0)
    res_ismn = pd.read_csv(res_path / 'insitu_TCA.csv', index_col=0)
    networks = ['SCAN', 'USCRN']
    res_ismn = res_ismn.loc[res_ismn.network.isin(networks), :]
    res_tc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)

    tg = GEOSldas_io().grid.tilegrids
    ind_ismn = []
    for col, row in zip(res_ismn.ease_col.values + tg.loc['domain', 'i_offg'],
                        res_ismn.ease_row.values + tg.loc['domain', 'j_offg']):
        try:
            ind_ismn += [
                res[(res.row == row) & (res.col == col)].index.values[0]
            ]
        except:
            continue

    runs = [
        f'Pcorr_{err}' for err in ['anom_lst', 'anom_lt_ScDY', 'anom_st_ScYH']
    ]
    modes = ['anom_lst', 'anom_lt', 'anom_st']
    titles = [f'Anomaly skill', f'LF skill', f'HF skill']

    f = plt.figure(figsize=(24, 15))

    output = 'ascat'  # 'ascat' or 'ascat_ismn' or 'ismn'
    met = 'r_corr'  # 'R_model_insitu'# or 'R2_model' / 'r' or 'r_corr'
    var = 'sm_surface'

    if 'ubRMSD' in met:
        lim = [-0.015, 0.015]
        xloc = -0.0135
        bins = 15
    else:
        if 'ascat' not in output:
            lim = [-0.2, 0.3]
            xloc = -0.18
            bins = 15
        else:
            lim = [-0.25, 0.25]
            xloc = -0.22
            bins = 20

    f.suptitle(f'{output} / {met} / {var}')
    for i, (run, mode, title) in enumerate(zip(runs, modes, titles)):

        if 'ascat' in output:
            col_ol = f'ana_{met}_Pcorr_OL_{mode}'
            col_4k = f'ana_{met}_Pcorr_4K_{mode}'
            col_lst = f'ana_{met}_Pcorr_anom_lst_{mode}'
            col_da2 = f'ana_{met}_Pcorr_LTST_{mode}'
            col_da1 = f'ana_{met}_{run}_{mode}'
            if 'ismn' in output:
                res = res.reindex(ind_ismn)
            else:
                r_asc_smap = res_tc[f'r_grid_{mode}_p_ASCAT_SMAP']
                r_asc_clsm = res_tc[f'r_grid_{mode}_p_ASCAT_CLSM']
                r_smap_clsm = res_tc[f'r_grid_{mode}_p_SMAP_CLSM']
                thres = 0.2
                ind_valid = res_tc[(r_asc_smap > thres) & (r_asc_smap > thres)
                                   & (r_asc_smap > thres)].index
                res = res.reindex(ind_valid)
        else:
            res = res_ismn
            col_ol = f'{met}_Pcorr_OL_{mode}_{var}'
            col_4k = f'{met}_Pcorr_4K_{mode}_{var}'
            col_lst = f'{met}_Pcorr_anom_lst_{mode}_{var}'
            col_da2 = f'{met}_Pcorr_LTST_{mode}_{var}'
            col_da1 = f'{met}_{run}_{mode}_{var}'
            if 'R2' in met:
                res[col_ol] **= 0.5
                res[col_4k] **= 0.5
                res[col_lst] **= 0.5
                res[col_da2] **= 0.5
                res[col_da1] **= 0.5

        if not ((output == 'ismn') and ('R2' not in met)):
            print('filtered')
            res[col_ol][res[col_ol] <= 0] = np.nan
            res[col_4k][res[col_4k] <= 0] = np.nan
            res[col_da1][res[col_da1] <= 0] = np.nan
            res[col_da2][res[col_da2] <= 0] = np.nan
            res[col_lst][res[col_lst] <= 0] = np.nan
            res[col_ol][res[col_ol] >= 1] = np.nan
            res[col_4k][res[col_4k] >= 1] = np.nan
            res[col_da1][res[col_da1] >= 1] = np.nan
            res[col_da2][res[col_da2] >= 1] = np.nan
            res[col_lst][res[col_lst] >= 1] = np.nan

        # res['single1'] = res[col_da1] - res[col_ol]
        # res['single2'] = res[col_da1] - res[col_4k]
        # res['joint1'] = res[col_da2] - res[col_ol]
        # res['joint2'] = res[col_da2] - res[col_4k]

        res['single'] = res[col_da1] - res[col_ol]
        res['joint'] = res[col_da2] - res[col_ol]
        res['anom'] = res[col_lst] - res[col_ol]
        res['4k'] = res[col_4k] - res[col_ol]

        ax = plt.subplot(3, 3, i + 1)
        # plt.scatter(x=res[col_ol].reindex(ind_valid),y=res[col_da1].reindex(ind_valid))
        p1 = res['4k'].hist(bins=bins, grid=False, ax=ax, range=lim, alpha=0.8)
        p2 = res['anom'].hist(bins=bins,
                              grid=False,
                              ax=ax,
                              range=lim,
                              alpha=0.6)
        plt.title(title)
        plt.yticks(color='w', fontsize=5)
        plt.xticks(color='w', fontsize=1)
        if i == 0:
            plt.ylabel('Anomaly assimilation')
        if i == 2:
            # plt.legend(labels=['R$_{TC}$ - R$_{4K}$', 'R$_{TC}$ - R$_{OL}$'], loc='upper right', fontsize=14)
            plt.legend(labels=['R$_{4K}$ - R$_{OL}$', 'R$_{TC}$ - R$_{OL}$'],
                       loc='upper right',
                       fontsize=14)
        plt.axvline(color='black', linestyle='--', linewidth=1)
        plt.xlim(lim)
        ylim = ax.get_ylim()
        yloc1 = ylim[1] - (ylim[1] - ylim[0]) / 10
        yloc2 = ylim[1] - 1.6 * (ylim[1] - ylim[0]) / 10
        plt.text(xloc,
                 yloc1,
                 'mean = %.2f' % res['4k'].mean(),
                 color='#1f77b4')
        plt.text(xloc,
                 yloc2,
                 'mean = %.2f' % res['anom'].mean(),
                 color='#ff7f0e')

        ax = plt.subplot(3, 3, i + 4)
        res['4k'].hist(bins=bins, grid=False, ax=ax, range=lim, alpha=0.8)
        res['single'].hist(bins=bins, grid=False, ax=ax, range=lim, alpha=0.6)
        plt.yticks(color='w', fontsize=5)
        plt.xticks(color='w', fontsize=1)
        if i == 0:
            plt.ylabel('Individual assimilation')
        plt.xlim(lim)
        plt.axvline(color='black', linestyle='--', linewidth=1)
        ylim = ax.get_ylim()
        yloc1 = ylim[1] - (ylim[1] - ylim[0]) / 10
        yloc2 = ylim[1] - 1.6 * (ylim[1] - ylim[0]) / 10
        plt.text(xloc,
                 yloc1,
                 'mean = %.2f' % res['4k'].mean(),
                 color='#1f77b4')
        plt.text(xloc,
                 yloc2,
                 'mean = %.2f' % res['single'].mean(),
                 color='#ff7f0e')

        ax = plt.subplot(3, 3, i + 7)
        res['4k'].hist(bins=bins, grid=False, ax=ax, range=lim, alpha=0.8)
        res['joint'].hist(bins=bins, grid=False, ax=ax, range=lim, alpha=0.6)
        plt.yticks(color='w', fontsize=5)
        if i == 0:
            plt.ylabel('joint assimilation')
        plt.xlim(lim)
        plt.axvline(color='black', linestyle='--', linewidth=1)
        ylim = ax.get_ylim()
        yloc1 = ylim[1] - (ylim[1] - ylim[0]) / 10
        yloc2 = ylim[1] - 1.6 * (ylim[1] - ylim[0]) / 10
        plt.text(xloc,
                 yloc1,
                 'mean = %.2f' % res['4k'].mean(),
                 color='#1f77b4')
        plt.text(xloc,
                 yloc2,
                 'mean = %.2f' % res['joint'].mean(),
                 color='#ff7f0e')

        if i == 0:
            plt.title(title)

    f.savefig(dir_out / f'stats_{output}_{met}_{var}.png',
              dpi=300,
              bbox_inches='tight')
    plt.close()
def run_ascat_eval_part(part, parts, ref='ascat'):

    import numpy as np
    import pandas as pd

    from pathlib import Path
    from scipy.stats import pearsonr

    from pyldas.interface import GEOSldas_io
    from myprojects.readers.ascat import HSAF_io
    from myprojects.timeseries import calc_anom
    from validation_good_practice.ancillary.paths import Paths

    res_path = Path(
        '~/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/validation_all'
    ).expanduser()
    if not res_path.exists():
        Path.mkdir(res_path, parents=True)

    result_file = res_path / ('ascat_eval_part%i.csv' % part)

    tc_res_pc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)
    tc_res_nopc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/noPcorr/result.csv',
        index_col=0)

    lut = pd.read_csv(Paths().lut, index_col=0)

    # Split grid cell list for parallelization
    subs = (np.arange(parts + 1) * len(lut) / parts).astype('int')
    subs[-1] = len(lut)
    start = subs[part - 1]
    end = subs[part]

    # Look-up table that contains the grid cells to iterate over
    lut = lut.iloc[start:end, :]

    root = Path('/Users/u0116961/data_sets/GEOSldas_runs')

    runs = [run.name for run in root.glob('*_DA_SMAP_*')]
    names = [run[20::] for run in runs]

    runs += ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_OL_noPcorr']
    names += ['Pcorr_OL', 'noPcorr_OL']

    # names = ['OL_Pcorr', 'OL_noPcorr'] + \
    #         [f'DA_{pc}_{err}' for pc in ['Pcorr','noPcorr'] for err in ['4K','abs','anom_lt','anom_lst','anom_st']]
    # runs = ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_OL_noPcorr' ] + \
    #     [f'NLv4_M36_US_DA_SMAP_{pc}_{err}' for pc in ['Pcorr','noPcorr'] for err in ['4K','abs','anom_lt','anom_lst','anom_st']]

    # names = ['OL_Pcorr', 'DA_Pcorr_LTST'] + \
    #         [f'DA_{pc}_{err}{mode}' for pc in ['Pcorr'] for err in ['4K','anom_lt', 'anom_lt_ScYH', 'anom_lst','anom_st'] for mode in ['', '_ScDY', '_ScYH']]
    #
    # runs = ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_DA_Pcorr_LTST'] + \
    #     [f'NLv4_M36_US_DA_SMAP_{pc}_{err}{mode}' for pc in ['Pcorr'] for err in ['4K','abs','anom_lt','anom_lst','anom_st'] for mode in ['', '_ScDY', '_ScYH']]

    dss = [
        GEOSldas_io('tavg3_1d_lnr_Nt', run).timeseries
        if 'DA' in run else GEOSldas_io('SMAP_L4_SM_gph', run).timeseries
        for run in runs
    ]
    grid = GEOSldas_io('ObsFcstAna', runs[0]).grid

    ds_full = GEOSldas_io('SMAP_L4_SM_gph', 'NLv4_M36_US_OL_Pcorr').timeseries
    ds_full = ds_full.assign_coords(
        {'time': ds_full['time'].values + pd.to_timedelta('2 hours')})

    ds_obs_smap = GEOSldas_io(
        'ObsFcstAna', 'NLv4_M36_US_DA_SMAP_Pcorr_4K').timeseries['obs_obs']

    modes = ['abs', 'anom_lt', 'anom_st', 'anom_lst']

    ascat = HSAF_io()

    for cnt, (gpi, data) in enumerate(lut.iterrows()):
        print('%i / %i, gpi: %i' % (cnt, len(lut), gpi))

        col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg'])
        row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg'])

        res = pd.DataFrame(index=(gpi, ))
        res['col'] = int(data.ease2_col)
        res['row'] = int(data.ease2_row)
        res['lcol'] = col
        res['lrow'] = row

        try:
            ts_ascat = ascat.read(
                data['ascat_gpi']).resample('1d').mean().dropna()
            ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')]
            ts_ascat.name = 'ASCAT'
        except:
            continue

        try:
            t_df_smap = ds_obs_smap.sel(species=[1, 2]).isel(
                lat=row, lon=col).to_pandas()
            t_ana = t_df_smap[~np.isnan(t_df_smap[1])
                              | ~np.isnan(t_df_smap[2])].index
            t_ana = pd.Series(1,
                              index=t_ana).resample('1d').mean().dropna().index
        except:
            t_ana = pd.DatetimeIndex([])

        var = 'sm_surface'
        for mode in modes:

            if mode == 'anom_lst':
                ts_ref = calc_anom(ts_ascat.copy(),
                                   mode='climatological').dropna()
            elif mode == 'anom_st':
                ts_ref = calc_anom(ts_ascat.copy(), mode='shortterm').dropna()
            elif mode == 'anom_lt':
                ts_ref = calc_anom(ts_ascat.copy(), mode='longterm').dropna()
            else:
                ts_ref = ts_ascat.dropna()

            for run, ts_model in zip(names, dss):

                try:
                    if 'noPcorr' in run:
                        r_asc = np.sqrt(tc_res_nopc.loc[
                            gpi, f'r2_grid_{mode}_m_ASCAT_tc_ASCAT_SMAP_CLSM'])
                        r_mod = np.sqrt(tc_res_nopc.loc[
                            gpi, f'r2_grid_{mode}_m_CLSM_tc_ASCAT_SMAP_CLSM'])
                    else:
                        r_asc = np.sqrt(tc_res_pc.loc[
                            gpi, f'r2_grid_{mode}_m_ASCAT_tc_ASCAT_SMAP_CLSM'])
                        r_mod = np.sqrt(tc_res_pc.loc[
                            gpi, f'r2_grid_{mode}_m_CLSM_tc_ASCAT_SMAP_CLSM'])
                except:
                    r_asc = np.nan
                    r_mod = np.nan

                ind_valid = ds_full.time.values[
                    (ds_full['snow_depth'][:, row, col].values == 0) &
                    (ds_full['soil_temp_layer1'][:, row, col].values > 277.15)]

                ts_mod = ts_model[var][:, row, col].to_series()
                ts_mod.index += pd.to_timedelta('2 hours')
                ts_mod = ts_mod.reindex(ind_valid)

                if mode == 'anom_lst':
                    ts_mod = calc_anom(ts_mod.copy(),
                                       mode='climatological').dropna()
                elif mode == 'anom_st':
                    ts_mod = calc_anom(ts_mod.copy(),
                                       mode='shortterm').dropna()
                elif mode == 'anom_lt':
                    ts_mod = calc_anom(ts_mod.copy(), mode='longterm').dropna()
                else:
                    ts_mod = ts_mod.dropna()
                ts_mod = ts_mod.resample('1d').mean()

                if 'OL_' in run:
                    res[f'r_tca_{run}_{mode}'] = r_mod

                tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna()
                res[f'len_{run}_{mode}'] = len(tmp)
                r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan,
                                                                       np.nan)
                res[f'r_{run}_{mode}'] = r
                res[f'p_{run}_{mode}'] = p
                res[f'r_corr_{run}_{mode}'] = min(r / r_asc, 1)

                tmp = pd.DataFrame({
                    1: ts_ref,
                    2: ts_mod
                }).reindex(t_ana).dropna()
                res[f'ana_len_{run}_{mode}'] = len(tmp)
                r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan,
                                                                       np.nan)
                res[f'ana_r_{run}_{mode}'] = r
                res[f'ana_p_{run}_{mode}'] = p
                res[f'ana_r_corr_{run}_{mode}'] = min(r / r_asc, 1)

        if not result_file.exists():
            res.to_csv(result_file, float_format='%0.3f')
        else:
            res.to_csv(result_file,
                       float_format='%0.3f',
                       mode='a',
                       header=False)
Beispiel #14
0
def run(args, scale_target='SMAP', mode='longterm', use_pc=False):
    '''
    :param args: summarizes the following three for multiprocessing purposes:
        sensor: 'SMOS' or 'SMAP' or 'SMOSSMAP'
        date_from: 'yyyy-mm-dd'
        date_to: 'yyyy-mm-dd'
    :param scale_target: 'SMOS' or 'SMAP'
    :param mode: 'longterm' or "shortterm'
    :param use_pc: If true, the first principal component of SMOS/SMAP Tb will be used
    '''

    sensor, date_from, date_to, pc = args

    exp_smap = f'NLv4_M36_US_OL_{pc}'
    exp_smos = f'NLv4_M36_US_OL_{pc}_SMOS'

    if mode == 'shortterm':
        ext = '_yearly'
    elif mode == 'longterm':
        ext = '_daily'
    else:
        ext = ''

    froot = Path(f'~/data_sets/GEOSldas_runs/_scaling_files_{pc}{ext}').expanduser()
    if not froot.exists():
        Path.mkdir(froot, parents=True)

    ios = []
    if 'SMAP' in sensor:
        ios += [GEOSldas_io('ObsFcstAna', exp=exp_smap)]
    if 'SMOS' in sensor:
        ios += [GEOSldas_io('ObsFcstAna', exp=exp_smos)]

    if not date_from:
        date_from = pd.to_datetime(np.min([io.timeseries['time'].values[0] for io in ios]))
    else:
        date_from = pd.to_datetime(date_from)
    if not date_to:
        date_to = pd.to_datetime(np.max([io.timeseries['time'].values[-1] for io in ios]))
    else:
        date_to = pd.to_datetime(date_to)
    pent_from = int(np.floor((date_from.dayofyear - 1) / 5.) + 1)
    pent_to = int(np.floor((date_to.dayofyear - 1) / 5.) + 1)
    fbase = f'Thvf_TbSM_001_src_{sensor}_trg_{scale_target}_{date_from.year}_p{pent_from:02}_{date_to.year}_p{pent_to:02}_W_9p_Nmin_20'

    dtype, _, _ = template_scaling(sensor='SMAP')

    tiles = ios[0].grid.tilecoord['tile_id'].values.astype('int32')
    angles = np.array([40,], 'int')
    pols = ['H','V']
    orbits = [['A', 'D'],['D', 'A']] # To match SMOS and SMAP species!

    template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32')
    template['lon'] = ios[0].grid.tilecoord['com_lon'].values.astype('float32')
    template['lat'] = ios[0].grid.tilecoord['com_lat'].values.astype('float32')
    template['tile_id'] = tiles.astype('int32')

    pentads = np.arange(73)+1

    if mode == 'longterm':
        years = np.arange(date_from.year, date_to.year + 1)
        doys = np.arange(1,367)
        data_obs = np.full([len(tiles), len(doys), len(years), len(pols), len(orbits[0])], -9999.)
        data_mod = data_obs.copy()
        # dummy = np.full([len(tiles), len(doys), len(years), len(angles), len(pols), len(orbits[0])], -9999)
        # coords = {'tile_id': tiles,
        #           'doy': doys,
        #           'year': years,
        #           'angle': angles,
        #           'pol': pols,
        #           'orbit': orbits[0]}
        # darr = xr.DataArray(dummy, coords=coords, dims=['tile_id', 'doy', 'year', 'angle', 'pol', 'orbit'])
    elif mode == 'shortterm':
        years = np.arange(date_from.year, date_to.year+1)
        data_obs = np.full([len(tiles), len(pentads), len(years), len(pols), len(orbits[0])], -9999.)
        data_mod = data_obs.copy()
        n_data = np.full([len(tiles), len(pentads), len(years), len(pols), len(orbits[0])], -9999)
        # dummy = np.full([len(tiles), len(pentads), len(years), len(angles), len(pols), len(orbits[0])], -9999)
        # coords = {'tile_id': tiles,
        #           'pentad': pentads,
        #           'year': years,
        #           'angle': angles,
        #           'pol': pols,
        #           'orbit': orbits[0]}
        # darr = xr.DataArray(dummy, coords=coords, dims=['tile_id', 'pentad', 'year', 'angle', 'pol', 'orbit'])
    else:
        # TODO: Currently doesn't work anymore because of modification for lt and st
        dummy = np.full([len(tiles),len(pentads),len(angles),len(pols),len(orbits[0])],-9999)
        coords = {'tile_id': tiles,
                  'pentad': pentads,
                  'angle': angles,
                  'pol': pols,
                  'orbit': orbits[0]}
        darr = xr.DataArray(dummy, coords=coords, dims=['tile_id','pentad','angle','pol','orbit'])

    # ----- calculate mean and reshuffle -----
    for i_til, til in enumerate(tiles):
        logging.info(f'{i_til} / {len(tiles)}')
        for i_pol, pol in enumerate(pols):
            # for i_ang, ang in enumerate(angles):
            ang = angles[0]
            for i_orb, (orb1, orb2) in enumerate(zip(orbits[0], orbits[1])):
                col, row = ios[0].grid.tileid2colrow(til)
                if sensor.upper() == 'SMOSSMAP':
                    spcs = [io.get_species(pol=pol, ang=ang, orbit=orb) for io, orb in zip(ios,[orb1, orb2])]
                    # orb = orb2 if scale_target == 'SMAP' else orb1 # POSSIBLY WRONG!!!!
                    orb = orb1 if scale_target == 'SMAP' else orb2
                else:
                    spcs = [ios[0].get_species(pol=pol, ang=ang, orbit=orb1)]
                    if sensor.upper() == 'SMAP':
                        orb = orb1 if scale_target == 'SMAP' else orb2
                    else:
                        orb = orb2 if scale_target == 'SMAP' else orb1

                if use_pc and (sensor == 'SMOSSMAP'):
                    dss = [io.timeseries['obs_obs'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]
                    obs = PCA(*dss, window=1.5)['PC-1']
                    dss = [io.timeseries['obs_fcst'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]
                    mod = PCA(*dss, window=1.5)['PC-1']
                else:
                    obs = pd.concat([io.timeseries['obs_obs'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]).sort_index()
                    mod = pd.concat([io.timeseries['obs_fcst'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]).sort_index()

                if (len(obs) == 0) | (len(mod) == 0):
                    continue

                if mode == 'longterm':
                    obs_clim = calc_anom(obs, return_clim=True)
                    mod_clim = calc_anom(mod, return_clim=True)
                    obs_anom = calc_anom(obs, mode='shortterm')
                    mod_anom = calc_anom(mod, mode='shortterm')
                    m_obs = (obs_clim + obs_anom).resample('1D').mean()
                    m_mod = (mod_clim + mod_anom).resample('1D').mean()
                    i_yr = m_obs.index.year.values - years.min()
                    i_doy = m_obs.index.dayofyear.values - 1
                    data_obs[i_til, i_doy, i_yr, i_pol, i_orb] = m_obs.replace(np.nan, -9999.).values
                    data_mod[i_til, i_doy, i_yr, i_pol, i_orb] = m_mod.replace(np.nan, -9999.).values
                elif mode == 'shortterm':
                    for i_yr, yr in enumerate(years):
                        data_obs[i_til, :, i_yr, i_pol, i_orb] = calc_clim_p(obs[obs.index.year==yr][date_from:date_to])[0].replace(np.nan, -9999.).values
                        data_mod[i_til, :, i_yr, i_pol, i_orb] = calc_clim_p(mod[mod.index.year==yr][date_from:date_to])[0].replace(np.nan, -9999.).values
                        n_data[i_til, :, i_yr, i_pol, i_orb] = len(obs[obs.index.year==yr][date_from:date_to].dropna())
                else:
                    # TODO: Doesn't work currently!
                    data['m_obs'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:],\
                    data['s_obs'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = calc_clim_p(obs[date_from:date_to])
                    data['m_mod'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:],\
                    data['s_mod'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = calc_clim_p(mod[date_from:date_to])
                    data['N_data'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = len(obs[date_from:date_to].dropna())

    modes = np.array([0, 0])
    sdate = np.array([date_from.year, date_from.month, date_from.day, 0, 0])
    edate = np.array([date_to.year, date_to.month, date_to.day, 0, 0])
    lengths = np.array([len(tiles), len(angles), 1])  # tiles, incidence angles, whatever

    np.save('/Users/u0116961/data_sets/data_mod', data_mod)
    np.save('/Users/u0116961/data_sets/data_obs', data_obs)

    # ----- write output files -----
    if mode == 'longterm':
        for i_orb, orb in enumerate(orbits[0]):
            # !!! inconsistent with the definition in the obs_paramfile (species) !!!
            modes[0] = 1 if orb == 'A' else 0
            for i_yr, yr in enumerate(years):
                for i_doy, doy in enumerate(doys):
                    res = template.copy()
                    # for i_ang, ang in enumerate(angles):
                    ang = angles[0]
                    for i_pol, pol in enumerate(pols):
                        res.loc[:, f'm_obs_{pol}_{ang}'] = data_obs[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f's_obs_{pol}_{ang}'] = data_obs[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f'm_mod_{pol}_{ang}'] = data_mod[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f's_mod_{pol}_{ang}'] = data_mod[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f'N_data_{pol}_{ang}'] = 999
                    res.replace(np.nan, -9999, inplace=True)
                    fdir = froot / f'y{yr:04}'
                    if not fdir.exists():
                        Path.mkdir(fdir, parents=True)
                    fname = fdir / f'{fbase}_{orb}_d{doy:03}.bin'
                    fid = open(fname, 'wb')
                    ios[0].write_fortran_block(fid, modes)
                    ios[0].write_fortran_block(fid, sdate)
                    ios[0].write_fortran_block(fid, edate)
                    ios[0].write_fortran_block(fid, lengths)
                    ios[0].write_fortran_block(fid, angles.astype('float'))  # required by LDASsa!!
                    for f in res.columns.values:
                        ios[0].write_fortran_block(fid, res[f].values)
                    fid.close()
    else:
        for i_pent, pent in enumerate(pentads):
            for i_orb, orb in enumerate(orbits[0]):
                # !!! inconsistent with the definition in the obs_paramfile (species) !!!
                modes[0] = 1 if orb == 'A' else 0
                if mode == 'shortterm':
                    for i_yr, yr in enumerate(years):
                        res = template.copy()
                        for ang in angles:
                            for i_pol, pol in enumerate(pols):
                                res.loc[:, f'm_obs_{pol}_{ang}'] = data_obs[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f's_obs_{pol}_{ang}'] = data_obs[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f'm_mod_{pol}_{ang}'] = data_mod[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f's_mod_{pol}_{ang}'] = data_mod[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f'N_data_{pol}_{ang}'] = n_data[:, i_pent, i_yr, i_pol, i_orb].astype('int32')
                        res.replace(np.nan, -9999, inplace=True)
                        fname = froot / f'{fbase}_{orb}_p{pent:02}_y{yr:04}.bin'
                        fid = open(fname, 'wb')
                        ios[0].write_fortran_block(fid, modes)
                        ios[0].write_fortran_block(fid, sdate)
                        ios[0].write_fortran_block(fid, edate)
                        ios[0].write_fortran_block(fid, lengths)
                        ios[0].write_fortran_block(fid, angles.astype('float'))  # required by LDASsa!!
                        for f in res.columns.values:
                            ios[0].write_fortran_block(fid, res[f].values)
                        fid.close()
                else:
                    res = template.copy()
                    for ang in angles:
                        for pol in pols:
                            res.loc[:, f'm_obs_{pol}_{ang}'] = data['m_obs'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f's_obs_{pol}_{ang}'] = data['s_obs'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f'm_mod_{pol}_{ang}'] = data['m_mod'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f's_mod_{pol}_{ang}'] = data['s_mod'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f'N_data_{pol}_{ang}'] = data['N_data'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                    res.replace(np.nan, -9999, inplace=True)
                    fname = froot / f'{fbase}_{orb}_p{pent:02}.bin'
                    fid = open(fname, 'wb')
                    ios[0].write_fortran_block(fid, modes)
                    ios[0].write_fortran_block(fid, sdate)
                    ios[0].write_fortran_block(fid, edate)
                    ios[0].write_fortran_block(fid, lengths)
                    ios[0].write_fortran_block(fid, angles.astype('float')) # required by LDASsa!!
                    for f in res.columns.values:
                        ios[0].write_fortran_block(fid, res[f].values)
                    fid.close()
def create_observation_perturbations():

    froot = Path('/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas')
    fbase = 'SMOS_fit_Tb_'

    pc = 'Pcorr'

    io = GEOSldas_io()

    ensvar = pd.read_csv(froot / 'ens_vars' / pc / 'ens_var.csv', index_col=0)
    obs_err = ensvar[['col', 'row']]
    obs_err.loc[:, 'tile_id'] = io.grid.tilecoord.loc[obs_err.index, 'tile_id'].values

    tc_res = pd.read_csv(froot / 'sm_validation' / pc / 'result.csv', index_col=0)
    tc_res.index = np.vectorize(io.grid.colrow2tilenum)(tc_res.col.values.astype('int'), tc_res.row.values.astype('int'), local=False)
    tc_res = tc_res.reindex(obs_err.index)

    ks = {'abs': [(1,0)],
          'anom_lt': [(1,0)],
          'anom_lst': [(1,0)],
          'anom_st':[(1,0)]}

    # ks = {'abs': [(1,0)]}

    for mode, k in ks.items():

        tag_r = f'ubrmse_grid_{mode}_m_SMAP_tc_ASCAT_SMAP_CLSM'
        tag_p = f'ubrmse_grid_{mode}_m_CLSM_tc_ASCAT_SMAP_CLSM'

        dir_out = froot / 'observation_perturbations' / pc / mode
        if not dir_out.exists():
            Path.mkdir(dir_out, parents=True)

        for i, (k_m, k_a) in enumerate(k):

            scl = (tc_res[tag_r] * k_m + k_a)**2 / tc_res[tag_p]**2

            # scl = tc_res[tag_r]**2 / tc_res[tag_p]**2
            # tmp = (1 + scl) ** (-1)
            # tmp[tmp < 0.5] **= 1.75
            # scl = tmp ** (-1.75) - 1

            for spc in np.arange(1, 5):
                obs_err.loc[:, 'obs_var_spc%i' % spc] = (ensvar['fcst_var_spc%i' % spc] * scl)**0.5
                obs_err.loc[(obs_err['obs_var_spc%i' % spc] < 0.1), 'obs_var_spc%i' % spc] = 0.1
                # obs_err.loc[(obs_err['obs_var_spc%i' % spc] > 1600), 'obs_var_spc%i' % spc] = 1600
                obs_err.loc[:, 'obs_var_spc%i' % spc] = fill_gaps(obs_err, 'obs_var_spc%i' % spc, smooth=False, grid=io.grid)['obs_var_spc%i' % spc]

            dtype = template_error_Tb40()[0]

            angles = np.array([40., ])
            orbits = ['A', 'D']

            template = pd.DataFrame(columns=dtype.names).astype('float32')
            template['lon'] = io.grid.tilecoord['com_lon'].values.astype('float32')
            template['lat'] = io.grid.tilecoord['com_lat'].values.astype('float32')
            template.index += 1

            modes = np.array([0, 0])
            sdate = np.array([2015, 4, 1, 0, 0])
            edate = np.array([2021, 4, 1, 0, 0])
            lengths = np.array([len(template), len(angles)])  # tiles, incidence angles, whatever

            # ----- write output files -----
            for orb in orbits:
                # !!! inconsistent with the definition in the obs_paramfile (species) !!!
                modes[0] = 1 if orb == 'A' else 0

                res = template.copy()

                spc = 0 if orb == 'A' else 1
                res.loc[:, 'err_Tbh'] = obs_err.loc[res.index, 'obs_var_spc%i' % (spc + 1)].values

                spc = 2 if orb == 'A' else 3
                res.loc[:, 'err_Tbv'] = obs_err.loc[res.index, 'obs_var_spc%i' % (spc + 1)].values

                fname = dir_out / (fbase + orb + '.bin')

                with open(fname, 'wb') as fid:
                    io.write_fortran_block(fid, modes)
                    io.write_fortran_block(fid, sdate)
                    io.write_fortran_block(fid, edate)
                    io.write_fortran_block(fid, lengths)
                    io.write_fortran_block(fid, angles)

                    for f in res.columns.values:
                        io.write_fortran_block(fid, res[f].values)
Beispiel #16
0
def plot_rtm_parameters():

    root = paths().plots / 'RTM_parameters'

    experiments = ['US_M36_SMOS_DA_calibrated_scaled', 'US_M36_SMOS_DA_nocal_scaled_harmonic']

    tc = GEOSldas_io().grid.tilecoord
    tg = GEOSldas_io().grid.tilegrids

    tc.i_indg -= tg.loc['domain','i_offg'] # col / lon
    tc.j_indg -= tg.loc['domain','j_offg'] # row / lat

    lons = np.unique(tc.com_lon.values)
    lats = np.unique(tc.com_lat.values)[::-1]

    lons, lats = np.meshgrid(lons, lats)

    llcrnrlat = 24
    urcrnrlat = 51
    llcrnrlon = -128
    urcrnrlon = -64
    figsize = (20, 10)
    # cbrange = (-20, 20)
    cmap = 'jet'
    fontsize = 20

    for exp in experiments:

        outpath = root / exp
        if not outpath.exists:
            outpath.mkdir(exists=True)

        params = GEOSldas_io(exp=exp).read_params('RTMparam')

        for param in params:

            fname = outpath / (param + '.png')

            img = np.full(lons.shape, np.nan)
            img[tc.j_indg.values, tc.i_indg.values] = params[param].values
            img_masked = np.ma.masked_invalid(img)

            f = plt.figure(num=None, figsize=figsize, dpi=90, facecolor='w', edgecolor='k')

            m = Basemap(projection='mill',
                        llcrnrlat=llcrnrlat,
                        urcrnrlat=urcrnrlat,
                        llcrnrlon=llcrnrlon,
                        urcrnrlon=urcrnrlon,
                        resolution='c')

            m.drawcoastlines()
            m.drawcountries()
            m.drawstates()

            im = m.pcolormesh(lons, lats, img_masked, cmap=cmap, latlon=True)

            # im.set_clim(vmin=cbrange[0], vmax=cbrange[1])

            cb = m.colorbar(im, "bottom", size="7%", pad="8%")

            for t in cb.ax.get_xticklabels():
                t.set_fontsize(fontsize)
            for t in cb.ax.get_yticklabels():
                t.set_fontsize(fontsize)

            plt.title(param)

            plt.savefig(fname, dpi=f.dpi)
            plt.close()
Beispiel #17
0
def plot_perturbations(dir_out):

    root = Path(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/observation_perturbations/Pcorr'
    )
    res_tc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)
    pc = 'Pcorr'
    io = GEOSldas_io('ObsFcstAna')
    io2 = LDASsa_io('ObsFcstAna')

    lut = pd.read_csv(Paths().lut, index_col=0)
    ind = np.vectorize(io.grid.colrow2tilenum)(lut.ease2_col,
                                               lut.ease2_row,
                                               local=False)

    dtype, hdr, length = template_error_Tb40()

    f = plt.figure(figsize=(22, 8))

    fontsize = 14
    cbrange = [0, 8]
    cmap = cc.cm.bjy_r
    # cmap='viridis'

    modes = ['anom_lst', 'anom_lt', 'anom_st']
    titles = ['Anomalies', 'LF signal', 'HF signal']

    for i, (mode, title) in enumerate(zip(modes, titles)):

        fA = root / f'{mode}' / 'SMOS_fit_Tb_A.bin'
        fD = root / f'{mode}' / 'SMOS_fit_Tb_D.bin'

        imgA = io2.read_fortran_binary(fA, dtype, hdr=hdr, length=length)
        imgD = io2.read_fortran_binary(fD, dtype, hdr=hdr, length=length)

        imgA.index += 1
        imgD.index += 1

        r_asc_smap = res_tc[f'r_grid_{mode}_p_ASCAT_SMAP']
        r_asc_clsm = res_tc[f'r_grid_{mode}_p_ASCAT_CLSM']
        r_smap_clsm = res_tc[f'r_grid_{mode}_p_SMAP_CLSM']
        thres = 0.2
        ind_valid = res_tc[(r_asc_smap > thres) & (r_asc_smap > thres) &
                           (r_asc_smap > thres)].index
        ind_valid = np.vectorize(io.grid.colrow2tilenum)(
            res_tc.loc[ind_valid, 'col'].values,
            res_tc.loc[ind_valid, 'row'].values,
            local=False)

        plt.subplot(3, 4, i * 4 + 1)
        im = plot_ease_img2(imgA.reindex(ind).reindex(ind_valid),
                            'err_Tbv',
                            cbrange=cbrange,
                            cmap=cmap,
                            io=io,
                            plot_cmap=False)
        if i == 0:
            plt.title('$\hat{R}$ (V-pol, Asc.)', fontsize=fontsize)
        plt.ylabel(title, fontsize=fontsize)

        plt.subplot(3, 4, i * 4 + 2)
        plot_ease_img2(imgD.reindex(ind).reindex(ind_valid),
                       'err_Tbv',
                       cbrange=cbrange,
                       cmap=cmap,
                       io=io,
                       plot_cmap=False)
        if i == 0:
            plt.title('$\hat{R}$ (V-pol, Dsc.)', fontsize=fontsize)

        plt.subplot(3, 4, i * 4 + 3)
        plot_ease_img2(imgA.reindex(ind).reindex(ind_valid),
                       'err_Tbh',
                       cbrange=cbrange,
                       cmap=cmap,
                       io=io,
                       plot_cmap=False)
        if i == 0:
            plt.title('$\hat{R}$ (H-pol, Asc.)', fontsize=fontsize)

        plt.subplot(3, 4, i * 4 + 4)
        plot_ease_img2(imgD.reindex(ind).reindex(ind_valid),
                       'err_Tbh',
                       cbrange=cbrange,
                       cmap=cmap,
                       io=io,
                       plot_cmap=False)
        if i == 0:
            plt.title('$\hat{R}$ (H-pol, Dsc.)', fontsize=fontsize)

    plot_centered_cbar(f, im, 4, fontsize=fontsize - 4, bottom=0.07)

    plt.savefig(dir_out / f'perturbations.png', dpi=300, bbox_inches='tight')
    plt.close()
Beispiel #18
0
def plot_ismn_statistics(res_path, dir_out):

    res = pd.read_csv(res_path / 'ascat_eval.csv', index_col=0)
    res_ismn = pd.read_csv(res_path / 'insitu_TCA.csv', index_col=0)
    networks = ['SCAN', 'USCRN']
    res_ismn = res_ismn.loc[res_ismn.network.isin(networks), :]
    res_tc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)

    tg = GEOSldas_io().grid.tilegrids
    ind_ismn = []
    for col, row in zip(res_ismn.ease_col.values + tg.loc['domain', 'i_offg'],
                        res_ismn.ease_row.values + tg.loc['domain', 'j_offg']):
        try:
            ind_ismn += [
                res[(res.row == row) & (res.col == col)].index.values[0]
            ]
        except:
            continue

    refs = ['Pcorr_OL', 'Pcorr_4K']
    runs = [['Pcorr_anom_lst', 'Pcorr_anom_lt_ScDY', 'Pcorr_anom_st_ScYH'],
            'Pcorr_LTST']
    titles = [
        'Individual assim. (ASCAT, SSM)',
        'Joint assim. (ASCAT, SSM)',
        'Individual assim. (ISMN, SSM)',
        'Joint assim. (ISMN, SSM)',
        'Individual assim. (ISMN, RZSM)',
        'Joint assim. (ISMN, RZSM)',
    ]

    modes = ['anom_lst', 'anom_lt', 'anom_st']
    labels = [f'Anomaly skill', f'LF skill', f'HF skill']

    f = plt.figure(figsize=(26, 14))
    fontsize = 16

    # output = 'ascat' # 'ascat' or 'ascat_ismn' or 'ismn'
    # met = 'r_corr' # 'R_model_insitu'# or 'R2_model' / 'r' or 'r_corr'

    outputs = ['ascat_ismn', 'ismn', 'ismn']
    mets = ['r_corr', 'R_model_insitu', 'R_model_insitu']
    variables = ['sm_surface', 'sm_surface', 'sm_rootzone']

    xres = res.copy()
    xres_ismn = res_ismn.copy()

    for i, (mode, label) in enumerate(zip(modes, labels)):
        for j, (output, met, var) in enumerate(zip(outputs, mets, variables)):

            res = xres.copy()
            res_ismn = xres_ismn.copy()

            if 'ubRMSD' in met:
                lim = [-0.015, 0.015]
                xloc = -0.0135
                bins = 15
            else:
                if 'ascat' not in output:
                    lim = [-0.2, 0.3]
                    xloc = -0.18
                    bins = 15
                else:
                    lim = [-0.25, 0.25]
                    xloc = -0.22
                    bins = 20

            for k, run in enumerate(runs):

                r = run if k == 1 else run[i]
                title = titles[j * 2 + k] if i == 0 else ''

                if 'ascat' in output:
                    col_ol = f'ana_{met}_Pcorr_OL_{mode}'
                    col_4k = f'ana_{met}_Pcorr_4K_{mode}'
                    col_da = f'ana_{met}_{r}_{mode}'
                    if 'ismn' in output:
                        res = res.reindex(ind_ismn)
                    else:
                        r_asc_smap = res_tc[f'r_grid_{mode}_p_ASCAT_SMAP']
                        r_asc_clsm = res_tc[f'r_grid_{mode}_p_ASCAT_CLSM']
                        r_smap_clsm = res_tc[f'r_grid_{mode}_p_SMAP_CLSM']
                        thres = 0.2
                        ind_valid = res_tc[(r_asc_smap > thres)
                                           & (r_asc_smap > thres) &
                                           (r_asc_smap > thres)].index
                        res = res.reindex(ind_valid)
                else:
                    res = res_ismn
                    col_ol = f'{met}_Pcorr_OL_{mode}_{var}'
                    col_4k = f'{met}_Pcorr_4K_{mode}_{var}'
                    col_da = f'{met}_{r}_{mode}_{var}'
                    if 'R2' in met:
                        res[col_ol] **= 0.5
                        res[col_4k] **= 0.5
                        res[col_da] **= 0.5

                if not ((output == 'ismn') and ('R2' not in met)):
                    print('filtered')
                    res[col_ol][res[col_ol] <= 0] = np.nan
                    res[col_4k][res[col_4k] <= 0] = np.nan
                    res[col_da][res[col_da] <= 0] = np.nan
                    res[col_ol][res[col_ol] >= 1] = np.nan
                    res[col_4k][res[col_4k] >= 1] = np.nan
                    res[col_da][res[col_da] >= 1] = np.nan

                res['da'] = res[col_da] - res[col_ol]
                res['4k'] = res[col_4k] - res[col_ol]

                ax = plt.subplot(3, 6, i * 6 + j * 2 + k + 1)
                p1 = res['4k'].hist(bins=bins,
                                    grid=False,
                                    ax=ax,
                                    range=lim,
                                    alpha=0.8)
                p2 = res['da'].hist(bins=bins,
                                    grid=False,
                                    ax=ax,
                                    range=lim,
                                    alpha=0.6)
                plt.yticks(color='w', fontsize=5)
                if i < 2:
                    plt.xticks(color='w', fontsize=1)
                if i == 0:
                    plt.title(title, fontsize=fontsize - 3)
                if (j == 0) & (k == 0):
                    plt.ylabel(label, fontsize=fontsize)
                if (i == 2) & (j == 2) & (k == 1):
                    plt.legend(labels=[
                        'R$_{4K, ref}$ - R$_{OL, ref}$',
                        'R$_{TC, ref}$ - R$_{OL, ref}$'
                    ],
                               loc='lower right',
                               fontsize=fontsize - 4)
                plt.axvline(color='black', linestyle='--', linewidth=1)
                plt.xlim(lim)
                ylim = ax.get_ylim()
                yloc1 = ylim[1] - (ylim[1] - ylim[0]) / 10
                yloc2 = ylim[1] - 1.6 * (ylim[1] - ylim[0]) / 10
                plt.text(xloc,
                         yloc1,
                         'mean = %.2f' % res['4k'].mean(),
                         color='#1f77b4',
                         fontsize=fontsize - 3)
                plt.text(xloc,
                         yloc2,
                         'mean = %.2f' % res['da'].mean(),
                         color='#ff7f0e',
                         fontsize=fontsize - 3)

    f.savefig(dir_out / f'stats.png', dpi=300, bbox_inches='tight')
    plt.close()
Beispiel #19
0
def plot_filter_diagnostics(res_path, dir_out):

    fname = res_path / 'filter_diagnostics.nc'

    res = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)
    res_tc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)
    tg = GEOSldas_io().grid.tilegrids
    res_cols = res.col.values - tg.loc['domain', 'i_offg']
    res_rows = res.row.values - tg.loc['domain', 'j_offg']
    r_asc_smap = res_tc[f'r_grid_anom_lst_p_ASCAT_SMAP']
    r_asc_clsm = res_tc[f'r_grid_anom_lst_p_ASCAT_CLSM']
    r_smap_clsm = res_tc[f'r_grid_anom_lst_p_SMAP_CLSM']
    thres = 0.2
    ind_valid = res_tc[(r_asc_smap > thres) & (r_asc_smap > thres) &
                       (r_asc_smap > thres)].index

    fontsize = 14

    root = Path('/Users/u0116961/data_sets/GEOSldas_runs')
    runs = [run.name for run in root.glob('*_DA_SMAP_*')]
    runs += ['NLv4_M36_US_OL_Pcorr', 'NLv4_M36_US_OL_noPcorr']

    tags = ['OL_Pcorr', 'Pcorr_4K', f'Pcorr_anom_lst', 'Pcorr_LTST']
    iters = [np.where([tag in run for run in runs])[0][0] for tag in tags]

    titles = ['Open-loop', '4K constant', 'Anomalies']
    labels = ['H pol. / Asc.', 'V pol. / Asc.']

    with Dataset(fname) as ds:

        lons = ds.variables['lon'][:]
        lats = ds.variables['lat'][:]
        lons, lats = np.meshgrid(lons, lats)

        var = 'innov_autocorr'
        cbrange = [0, 0.7]
        step = 0.2
        cmap = 'viridis'

        f = plt.figure(figsize=(19, 6))

        for j, (spc, label) in enumerate(zip([0, 2], labels)):
            for i, (it_tit, it) in enumerate(zip(titles, iters)):

                title = it_tit if j == 0 else ''

                plt.subplot(2, 3, j * 3 + i + 1)
                data = ds.variables[var][:, :, it, spc]

                res['tmp'] = data[res_rows, res_cols]
                im = plot_ease_img(res.reindex(ind_valid),
                                   'tmp',
                                   fontsize=fontsize,
                                   cbrange=cbrange,
                                   cmap=cmap,
                                   title=title,
                                   plot_cb=False,
                                   print_meanstd=True)
                if i == 0:
                    plt.ylabel(label, fontsize=fontsize)

        plot_centered_cbar(f, im, 3, fontsize=fontsize - 2, bottom=0.07)
        fout = dir_out / f'{var}.png'
        f.savefig(fout, dpi=300, bbox_inches='tight')
        plt.close()
Beispiel #20
0
    ax.set_title('Asc / V-pol', fontsize=16)

    ax = f.add_subplot(2,2,4)
    obserr = io.images.sel(species=4)['obs_fcstvar'].mean('time').values
    plot_xarr_img(obserr,lons,lats,cbrange=[0,50])
    ax.set_title('Dsc / V-pol', fontsize=16)

    plt.tight_layout()
    plt.show()



if __name__=='__main__':

    lat, lon = 41.83347716648588, -98.47471538470059
    grid = GEOSldas_io().grid
    col, row = grid.lonlat2colrow(lon, lat)

    plot_innov(spc=1, row=row, col=col)

    # plot_catparams()


# llcrnrlat = -58.,
# urcrnrlat = 78.,
# llcrnrlon = -172.,
# urcrnrlon = 180.,

# clipped global
# llcrnrlat=-58.,
# urcrnrlat=60.,
Beispiel #21
0
def plot_uncertainty_ratios(dir_out):

    sensors = ['ASCAT', 'SMAP', 'CLSM']

    res = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)
    res_tc = pd.read_csv(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SM_err_ratio/GEOSldas/sm_validation/Pcorr/result.csv',
        index_col=0)

    tg = GEOSldas_io().grid.tilegrids
    res_cols = res.col.values - tg.loc['domain', 'i_offg']
    res_rows = res.row.values - tg.loc['domain', 'j_offg']

    figsize = (16, 4)
    fontsize = 10
    cb = [-10, 10]
    cmap = cc.cm.bjy

    modes = ['4K', 'anom_lst', 'anom_lt', 'anom_st']
    titles = ['4K benchmark', 'Anomalies', 'LF signal', 'HF signal']

    ios = [
        GEOSldas_io('ObsFcstAna',
                    exp=f'NLv4_M36_US_DA_SMAP_Pcorr_{mode}').timeseries
        for mode in modes
    ]
    io_ol = GEOSldas_io('ObsFcstAna', exp=f'NLv4_M36_US_OL_Pcorr').timeseries

    grid = GEOSldas_io().grid

    f = plt.figure(figsize=figsize)

    for n, (mode, title, io_da) in enumerate(zip(modes, titles, ios)):

        if n > 0:
            plt.subplot(2, 4, n + 1)
            tagP = 'ubrmse_grid_' + mode + '_m_CLSM_tc_ASCAT_SMAP_CLSM'
            tagR = 'ubrmse_grid_' + mode + '_m_SMAP_tc_ASCAT_SMAP_CLSM'
            res['tmp'] = 10 * np.log10(res[tagP]**2 / res[tagR]**2)

            r_asc_smap = res_tc[f'r_grid_{mode}_p_ASCAT_SMAP']
            r_asc_clsm = res_tc[f'r_grid_{mode}_p_ASCAT_CLSM']
            r_smap_clsm = res_tc[f'r_grid_{mode}_p_SMAP_CLSM']
            thres = 0.2
            ind_valid = res_tc[(r_asc_smap > thres) & (r_asc_smap > thres) &
                               (r_asc_smap > thres)].index

            img = plot_ease_img(res.reindex(ind_valid),
                                'tmp',
                                fontsize=fontsize,
                                cbrange=cb,
                                cmap=cmap,
                                plot_cb=False)
            plt.title(title, fontsize=fontsize)
            if n == 1:
                plt.ylabel('TCA unc. ratio', fontsize=fontsize)

    for n, (mode, title, io_da) in enumerate(zip(modes, titles, ios)):

        if mode != '4K':
            r_asc_smap = res_tc[f'r_grid_{mode}_p_ASCAT_SMAP']
            r_asc_clsm = res_tc[f'r_grid_{mode}_p_ASCAT_CLSM']
            r_smap_clsm = res_tc[f'r_grid_{mode}_p_SMAP_CLSM']
            thres = 0.2
            ind_valid = res_tc[(r_asc_smap > thres) & (r_asc_smap > thres) &
                               (r_asc_smap > thres)].index
        else:
            ind_valid = res.index

        avg = np.full(io_da['obs_obsvar'].shape[1::], np.nan)
        for spc in range(4):
            tmp1 = io_da['obs_obsvar'][:, spc, :, :].values
            tmp2 = io_ol['obs_fcstvar'][:, spc, :, :].values
            avg[spc, :, :] = np.nanmean(tmp2 / tmp1, axis=0)
        avg = np.nanmean(avg, axis=0)

        res['avg'] = 10 * np.log10(avg[res_rows, res_cols])

        plt.subplot(2, 4, n + 5)
        img = plot_ease_img(res.reindex(ind_valid),
                            'avg',
                            fontsize=fontsize,
                            cbrange=cb,
                            cmap=cmap,
                            plot_cb=False)
        if n == 0:
            plt.title(title, fontsize=fontsize)
            plt.ylabel('Ens. var. ratio', fontsize=fontsize)

    plot_centered_cbar(f, img, 4, fontsize=fontsize, bottom=0.07)

    fout = dir_out / 'uncertainty_ratio.png'
    f.savefig(fout, dpi=300, bbox_inches='tight')
    plt.close()