def run_ascat_eval_smos_part(part, parts, ref='ascat'):

    periods = [
        ['2010-04-01', '2020-04-01'],
        ['2010-04-01', '2015-04-01'],
        ['2015-04-01', '2020-04-01'],
        ['2010-04-01', '2012-10-01'],
        ['2012-10-01', '2015-04-01'],
        ['2015-04-01', '2017-10-01'],
        ['2017-10-01', '2020-04-01'],
    ]

    res_path = Path(
        f'~/Documents/work/MadKF/CLSM/SMOS40/validation/multiperiod/ascat'
    ).expanduser()
    if not res_path.exists():
        Path.mkdir(res_path, parents=True)

    result_file = res_path / f'ascat_eval_smos_part{part}.csv'

    lut = pd.read_csv(Paths().lut, index_col=0)

    # Split grid cell list for parallelization
    subs = (np.arange(parts + 1) * len(lut) / parts).astype('int')
    subs[-1] = len(lut)
    start = subs[part - 1]
    end = subs[part]

    # Look-up table that contains the grid cells to iterate over
    lut = lut.iloc[start:end, :]

    names = ['open_loop'] + [f'SMOS40_it62{i}' for i in range(1, 5)]
    runs = ['US_M36_SMOS40_TB_OL_noScl'
            ] + [f'US_M36_SMOS40_TB_MadKF_DA_it62{i}' for i in range(1, 5)]

    grid = LDAS_io('ObsFcstAna', runs[0]).grid
    dss_xhourly = [LDAS_io('xhourly', run).timeseries for run in runs]
    dss_obs_ana = [
        LDAS_io('ObsFcstAna', run).timeseries['obs_ana'] for run in runs
    ]

    modes = ['absolute', 'longterm', 'shortterm']

    ascat = HSAF_io()

    for cnt, (gpi, data) in enumerate(lut.iterrows()):
        print('%i / %i' % (cnt, len(lut)))

        col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg'])
        row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg'])

        res = pd.DataFrame(index=(gpi, ))
        res['col'] = int(data.ease2_col)
        res['row'] = int(data.ease2_row)
        res['lcol'] = col
        res['lrow'] = row

        try:
            ts_ascat = ascat.read(
                data['ascat_gpi'],
                resample_time=False).resample('1d').mean().dropna()
            ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')]
            ts_ascat.name = 'ASCAT'
        except:
            continue

        dfs = [
            ds.sel(species=[1, 2]).isel(
                lat=row, lon=col).to_pandas().resample('1d').mean()
            for ds in dss_obs_ana
        ]
        idx = [df[np.any(~np.isnan(df), axis=1)].index for df in dfs]

        t_ana = idx[0].intersection(idx[1]).intersection(idx[2]).intersection(
            idx[3])

        var = 'sm_surface'
        for mode in modes:

            if mode == 'absolute':
                ts_ref = ts_ascat.copy()
            else:
                ts_ref = calc_anom(ts_ascat.copy(),
                                   longterm=(mode == 'longterm')).dropna()

            for run, ts_model in zip(names, dss_xhourly):

                ind = (ts_model['snow_mass'][:, row, col].values == 0) & (
                    ts_model['soil_temp_layer1'][:, row, col].values > 277.15)
                ts_mod = ts_model[var][:, row, col].to_series().loc[ind]
                ts_mod.index += pd.to_timedelta('2 hours')

                if mode == 'absolute':
                    ts_mod = ts_mod.dropna()
                else:
                    ts_mod = calc_anom(ts_mod,
                                       longterm=mode == 'longterm').dropna()
                ts_mod = ts_mod.reindex(t_ana).dropna()

                for i, p in enumerate(periods):
                    tmp = pd.DataFrame({
                        1: ts_ref,
                        2: ts_mod
                    })[p[0]:p[1]].dropna()
                    res[f'p{i}_len_{run}_{mode}'] = len(tmp)
                    r, p = pearsonr(
                        tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan)
                    res[f'p{i}_r_{run}_{mode}'] = r

        if not result_file.exists():
            res.to_csv(result_file, float_format='%0.3f')
        else:
            res.to_csv(result_file,
                       float_format='%0.3f',
                       mode='a',
                       header=False)
Beispiel #2
0
def run(args, scale_target='SMAP', mode='longterm', use_pc=False):
    '''
    :param args: summarizes the following three for multiprocessing purposes:
        sensor: 'SMOS' or 'SMAP' or 'SMOSSMAP'
        date_from: 'yyyy-mm-dd'
        date_to: 'yyyy-mm-dd'
    :param scale_target: 'SMOS' or 'SMAP'
    :param mode: 'longterm' or "shortterm'
    :param use_pc: If true, the first principal component of SMOS/SMAP Tb will be used
    '''

    sensor, date_from, date_to, pc = args

    exp_smap = f'NLv4_M36_US_OL_{pc}'
    exp_smos = f'NLv4_M36_US_OL_{pc}_SMOS'

    if mode == 'shortterm':
        ext = '_yearly'
    elif mode == 'longterm':
        ext = '_daily'
    else:
        ext = ''

    froot = Path(f'~/data_sets/GEOSldas_runs/_scaling_files_{pc}{ext}').expanduser()
    if not froot.exists():
        Path.mkdir(froot, parents=True)

    ios = []
    if 'SMAP' in sensor:
        ios += [GEOSldas_io('ObsFcstAna', exp=exp_smap)]
    if 'SMOS' in sensor:
        ios += [GEOSldas_io('ObsFcstAna', exp=exp_smos)]

    if not date_from:
        date_from = pd.to_datetime(np.min([io.timeseries['time'].values[0] for io in ios]))
    else:
        date_from = pd.to_datetime(date_from)
    if not date_to:
        date_to = pd.to_datetime(np.max([io.timeseries['time'].values[-1] for io in ios]))
    else:
        date_to = pd.to_datetime(date_to)
    pent_from = int(np.floor((date_from.dayofyear - 1) / 5.) + 1)
    pent_to = int(np.floor((date_to.dayofyear - 1) / 5.) + 1)
    fbase = f'Thvf_TbSM_001_src_{sensor}_trg_{scale_target}_{date_from.year}_p{pent_from:02}_{date_to.year}_p{pent_to:02}_W_9p_Nmin_20'

    dtype, _, _ = template_scaling(sensor='SMAP')

    tiles = ios[0].grid.tilecoord['tile_id'].values.astype('int32')
    angles = np.array([40,], 'int')
    pols = ['H','V']
    orbits = [['A', 'D'],['D', 'A']] # To match SMOS and SMAP species!

    template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32')
    template['lon'] = ios[0].grid.tilecoord['com_lon'].values.astype('float32')
    template['lat'] = ios[0].grid.tilecoord['com_lat'].values.astype('float32')
    template['tile_id'] = tiles.astype('int32')

    pentads = np.arange(73)+1

    if mode == 'longterm':
        years = np.arange(date_from.year, date_to.year + 1)
        doys = np.arange(1,367)
        data_obs = np.full([len(tiles), len(doys), len(years), len(pols), len(orbits[0])], -9999.)
        data_mod = data_obs.copy()
        # dummy = np.full([len(tiles), len(doys), len(years), len(angles), len(pols), len(orbits[0])], -9999)
        # coords = {'tile_id': tiles,
        #           'doy': doys,
        #           'year': years,
        #           'angle': angles,
        #           'pol': pols,
        #           'orbit': orbits[0]}
        # darr = xr.DataArray(dummy, coords=coords, dims=['tile_id', 'doy', 'year', 'angle', 'pol', 'orbit'])
    elif mode == 'shortterm':
        years = np.arange(date_from.year, date_to.year+1)
        data_obs = np.full([len(tiles), len(pentads), len(years), len(pols), len(orbits[0])], -9999.)
        data_mod = data_obs.copy()
        n_data = np.full([len(tiles), len(pentads), len(years), len(pols), len(orbits[0])], -9999)
        # dummy = np.full([len(tiles), len(pentads), len(years), len(angles), len(pols), len(orbits[0])], -9999)
        # coords = {'tile_id': tiles,
        #           'pentad': pentads,
        #           'year': years,
        #           'angle': angles,
        #           'pol': pols,
        #           'orbit': orbits[0]}
        # darr = xr.DataArray(dummy, coords=coords, dims=['tile_id', 'pentad', 'year', 'angle', 'pol', 'orbit'])
    else:
        # TODO: Currently doesn't work anymore because of modification for lt and st
        dummy = np.full([len(tiles),len(pentads),len(angles),len(pols),len(orbits[0])],-9999)
        coords = {'tile_id': tiles,
                  'pentad': pentads,
                  'angle': angles,
                  'pol': pols,
                  'orbit': orbits[0]}
        darr = xr.DataArray(dummy, coords=coords, dims=['tile_id','pentad','angle','pol','orbit'])

    # ----- calculate mean and reshuffle -----
    for i_til, til in enumerate(tiles):
        logging.info(f'{i_til} / {len(tiles)}')
        for i_pol, pol in enumerate(pols):
            # for i_ang, ang in enumerate(angles):
            ang = angles[0]
            for i_orb, (orb1, orb2) in enumerate(zip(orbits[0], orbits[1])):
                col, row = ios[0].grid.tileid2colrow(til)
                if sensor.upper() == 'SMOSSMAP':
                    spcs = [io.get_species(pol=pol, ang=ang, orbit=orb) for io, orb in zip(ios,[orb1, orb2])]
                    # orb = orb2 if scale_target == 'SMAP' else orb1 # POSSIBLY WRONG!!!!
                    orb = orb1 if scale_target == 'SMAP' else orb2
                else:
                    spcs = [ios[0].get_species(pol=pol, ang=ang, orbit=orb1)]
                    if sensor.upper() == 'SMAP':
                        orb = orb1 if scale_target == 'SMAP' else orb2
                    else:
                        orb = orb2 if scale_target == 'SMAP' else orb1

                if use_pc and (sensor == 'SMOSSMAP'):
                    dss = [io.timeseries['obs_obs'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]
                    obs = PCA(*dss, window=1.5)['PC-1']
                    dss = [io.timeseries['obs_fcst'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]
                    mod = PCA(*dss, window=1.5)['PC-1']
                else:
                    obs = pd.concat([io.timeseries['obs_obs'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]).sort_index()
                    mod = pd.concat([io.timeseries['obs_fcst'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]).sort_index()

                if (len(obs) == 0) | (len(mod) == 0):
                    continue

                if mode == 'longterm':
                    obs_clim = calc_anom(obs, return_clim=True)
                    mod_clim = calc_anom(mod, return_clim=True)
                    obs_anom = calc_anom(obs, mode='shortterm')
                    mod_anom = calc_anom(mod, mode='shortterm')
                    m_obs = (obs_clim + obs_anom).resample('1D').mean()
                    m_mod = (mod_clim + mod_anom).resample('1D').mean()
                    i_yr = m_obs.index.year.values - years.min()
                    i_doy = m_obs.index.dayofyear.values - 1
                    data_obs[i_til, i_doy, i_yr, i_pol, i_orb] = m_obs.replace(np.nan, -9999.).values
                    data_mod[i_til, i_doy, i_yr, i_pol, i_orb] = m_mod.replace(np.nan, -9999.).values
                elif mode == 'shortterm':
                    for i_yr, yr in enumerate(years):
                        data_obs[i_til, :, i_yr, i_pol, i_orb] = calc_clim_p(obs[obs.index.year==yr][date_from:date_to])[0].replace(np.nan, -9999.).values
                        data_mod[i_til, :, i_yr, i_pol, i_orb] = calc_clim_p(mod[mod.index.year==yr][date_from:date_to])[0].replace(np.nan, -9999.).values
                        n_data[i_til, :, i_yr, i_pol, i_orb] = len(obs[obs.index.year==yr][date_from:date_to].dropna())
                else:
                    # TODO: Doesn't work currently!
                    data['m_obs'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:],\
                    data['s_obs'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = calc_clim_p(obs[date_from:date_to])
                    data['m_mod'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:],\
                    data['s_mod'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = calc_clim_p(mod[date_from:date_to])
                    data['N_data'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = len(obs[date_from:date_to].dropna())

    modes = np.array([0, 0])
    sdate = np.array([date_from.year, date_from.month, date_from.day, 0, 0])
    edate = np.array([date_to.year, date_to.month, date_to.day, 0, 0])
    lengths = np.array([len(tiles), len(angles), 1])  # tiles, incidence angles, whatever

    np.save('/Users/u0116961/data_sets/data_mod', data_mod)
    np.save('/Users/u0116961/data_sets/data_obs', data_obs)

    # ----- write output files -----
    if mode == 'longterm':
        for i_orb, orb in enumerate(orbits[0]):
            # !!! inconsistent with the definition in the obs_paramfile (species) !!!
            modes[0] = 1 if orb == 'A' else 0
            for i_yr, yr in enumerate(years):
                for i_doy, doy in enumerate(doys):
                    res = template.copy()
                    # for i_ang, ang in enumerate(angles):
                    ang = angles[0]
                    for i_pol, pol in enumerate(pols):
                        res.loc[:, f'm_obs_{pol}_{ang}'] = data_obs[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f's_obs_{pol}_{ang}'] = data_obs[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f'm_mod_{pol}_{ang}'] = data_mod[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f's_mod_{pol}_{ang}'] = data_mod[:, i_doy, i_yr, i_pol, i_orb].astype('float32')
                        res.loc[:, f'N_data_{pol}_{ang}'] = 999
                    res.replace(np.nan, -9999, inplace=True)
                    fdir = froot / f'y{yr:04}'
                    if not fdir.exists():
                        Path.mkdir(fdir, parents=True)
                    fname = fdir / f'{fbase}_{orb}_d{doy:03}.bin'
                    fid = open(fname, 'wb')
                    ios[0].write_fortran_block(fid, modes)
                    ios[0].write_fortran_block(fid, sdate)
                    ios[0].write_fortran_block(fid, edate)
                    ios[0].write_fortran_block(fid, lengths)
                    ios[0].write_fortran_block(fid, angles.astype('float'))  # required by LDASsa!!
                    for f in res.columns.values:
                        ios[0].write_fortran_block(fid, res[f].values)
                    fid.close()
    else:
        for i_pent, pent in enumerate(pentads):
            for i_orb, orb in enumerate(orbits[0]):
                # !!! inconsistent with the definition in the obs_paramfile (species) !!!
                modes[0] = 1 if orb == 'A' else 0
                if mode == 'shortterm':
                    for i_yr, yr in enumerate(years):
                        res = template.copy()
                        for ang in angles:
                            for i_pol, pol in enumerate(pols):
                                res.loc[:, f'm_obs_{pol}_{ang}'] = data_obs[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f's_obs_{pol}_{ang}'] = data_obs[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f'm_mod_{pol}_{ang}'] = data_mod[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f's_mod_{pol}_{ang}'] = data_mod[:, i_pent, i_yr, i_pol, i_orb].astype('float32')
                                res.loc[:, f'N_data_{pol}_{ang}'] = n_data[:, i_pent, i_yr, i_pol, i_orb].astype('int32')
                        res.replace(np.nan, -9999, inplace=True)
                        fname = froot / f'{fbase}_{orb}_p{pent:02}_y{yr:04}.bin'
                        fid = open(fname, 'wb')
                        ios[0].write_fortran_block(fid, modes)
                        ios[0].write_fortran_block(fid, sdate)
                        ios[0].write_fortran_block(fid, edate)
                        ios[0].write_fortran_block(fid, lengths)
                        ios[0].write_fortran_block(fid, angles.astype('float'))  # required by LDASsa!!
                        for f in res.columns.values:
                            ios[0].write_fortran_block(fid, res[f].values)
                        fid.close()
                else:
                    res = template.copy()
                    for ang in angles:
                        for pol in pols:
                            res.loc[:, f'm_obs_{pol}_{ang}'] = data['m_obs'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f's_obs_{pol}_{ang}'] = data['s_obs'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f'm_mod_{pol}_{ang}'] = data['m_mod'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f's_mod_{pol}_{ang}'] = data['s_mod'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                            res.loc[:, f'N_data_{pol}_{ang}'] = data['N_data'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series()
                    res.replace(np.nan, -9999, inplace=True)
                    fname = froot / f'{fbase}_{orb}_p{pent:02}.bin'
                    fid = open(fname, 'wb')
                    ios[0].write_fortran_block(fid, modes)
                    ios[0].write_fortran_block(fid, sdate)
                    ios[0].write_fortran_block(fid, edate)
                    ios[0].write_fortran_block(fid, lengths)
                    ios[0].write_fortran_block(fid, angles.astype('float')) # required by LDASsa!!
                    for f in res.columns.values:
                        ios[0].write_fortran_block(fid, res[f].values)
                    fid.close()
def EC_ascat_smap_ismn_ldas():

    result_file = Path('/Users/u0116961/Documents/work/extended_collocation/ec_ascat_smap_ismn_ldas.csv')

    names = ['insitu', 'ascat', 'smap', 'ol', 'da']
    combs = list(combinations(names, 2))

    ds_ol = LDAS_io('xhourly', 'US_M36_SMAP_TB_OL_noScl').timeseries
    ds_da = LDAS_io('xhourly', 'US_M36_SMAP_TB_MadKF_DA_it11').timeseries
    ds_da_ana = LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_MadKF_DA_it11').timeseries['obs_ana']
    tg = LDAS_io().grid.tilegrids

    modes = ['absolute','longterm','shortterm']

    ismn = ISMN_io()
    ismn.list = ismn.list.iloc[70::]
    ascat = HSAF_io()
    smap = SMAP_io()

    lut = pd.read_csv(Paths().lut, index_col=0)

    i = 0
    for meta, ts_insitu in ismn.iter_stations(surface_only=True):
        i += 1
        logging.info('%i/%i' % (i, len(ismn.list)))

        try:
            if len(ts_insitu := ts_insitu['2015-04-01':'2020-04-01'].resample('1d').mean().dropna()) < 25:
                continue
        except:
            continue

        res = pd.DataFrame(meta.copy()).transpose()
        col = meta.ease_col
        row = meta.ease_row

        colg = col + tg.loc['domain', 'i_offg']  # col / lon
        rowg = row + tg.loc['domain', 'j_offg']  # row / lat

        tmp_lut = lut[(lut.ease2_col == colg) & (lut.ease2_row == rowg)]
        if len(tmp_lut) == 0:
            continue

        gpi_smap = tmp_lut.index.values[0]
        gpi_ascat = tmp_lut.ascat_gpi.values[0]

        try:
            ts_ascat = ascat.read(gpi_ascat, resample_time=False).resample('1d').mean().dropna()
            ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')]
            ts_ascat.name = 'ASCAT'
        except:
            continue

        ts_smap = smap.read(gpi_smap)

        if (ts_ascat is None) | (ts_smap is None):
            continue

        ind = (ds_ol['snow_mass'][:, row, col].values == 0)&(ds_ol['soil_temp_layer1'][:, row, col].values > 277.15)
        ts_ol = ds_ol['sm_surface'][:, row, col].to_series().loc[ind].dropna()
        ts_ol.index += pd.to_timedelta('2 hours')

        ind = (ds_da['snow_mass'][:, row, col].values == 0)&(ds_da['soil_temp_layer1'][:, row, col].values > 277.15)
        ts_da = ds_da['sm_surface'][:, row, col].to_series().loc[ind].dropna()
        ts_da.index += pd.to_timedelta('2 hours')

        for mode in modes:

            if mode == 'absolute':
                ts_ins = ts_insitu.copy()
                ts_asc = ts_ascat.copy()
                ts_smp = ts_smap.copy()
                ts_ol = ts_ol.copy()
                ts_da = ts_da.copy()
            else:
                ts_ins = calc_anom(ts_ins.copy(), longterm=(mode=='longterm')).dropna()
                ts_asc = calc_anom(ts_asc.copy(), longterm=(mode == 'longterm')).dropna()
                ts_smp = calc_anom(ts_smp.copy(), longterm=(mode == 'longterm')).dropna()
                ts_ol = calc_anom(ts_ol.copy(), longterm=(mode == 'longterm')).dropna()
                ts_da = calc_anom(ts_da.copy(), longterm=(mode == 'longterm')).dropna()

            tmp = pd.DataFrame(dict(zip(names, [ts_ins, ts_asc, ts_smp, ts_ol, ts_da]))).dropna()

            corr = tmp.corr()
            ec_res = ecol(tmp[['insitu', 'ascat', 'smap', 'ol', 'da']], correlated=[['smap', 'ol'], ['smap', 'da'], ['ol', 'da']])

            res[f'len_{mode}'] = len(tmp)
            for c in combs:
                res[f'corr_{"_".join(c)}'] = corr.loc[c]
            res[f'err_corr_smap_ol_{mode}'] = ec_res['err_corr_smap_ol']
            res[f'err_corr_smap_da_{mode}'] = ec_res['err_corr_smap_da']
            res[f'err_corr_ol_da_{mode}'] = ec_res['err_corr_ol_da']

        if not result_file.exists():
            res.to_csv(result_file, float_format='%0.4f')
        else:
            res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)
Beispiel #4
0
def run():

    anom = False
    longterm = False
    fcst_err_corrected = False

    exp = 'US_M36_SMAP_TB_MadKF_OL_it11'

    io = LDAS_io('ObsFcstAna', exp)

    froot = Path(
        '/Users/u0116961/Documents/work/MadKF/CLSM/SMAP/rmsd_pert/error_files')
    fbase = 'SMOS_fit_Tb_'

    dir_out = froot / ((('anom_' +
                         ('lt' if longterm else 'st')) if anom else 'abs') +
                       ('_fcst_corr' if fcst_err_corrected else '_uncorr'))
    if not dir_out.exists():
        Path.mkdir(dir_out, parents=True)

    dtype = template_error_Tb40()[0]

    angles = np.array([
        40.,
    ])
    orbits = ['A', 'D']

    tiles = io.grid.tilecoord['tile_id'].values.astype('int32')
    ind_lat = io.grid.tilecoord.loc[:,
                                    'j_indg'].values - io.grid.tilegrids.loc[
                                        'domain', 'j_offg']
    ind_lon = io.grid.tilecoord.loc[:,
                                    'i_indg'].values - io.grid.tilegrids.loc[
                                        'domain', 'i_offg']

    template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32')
    template['lon'] = io.grid.tilecoord['com_lon'].values.astype('float32')
    template['lat'] = io.grid.tilecoord['com_lat'].values.astype('float32')

    modes = np.array([0, 0])
    sdate = np.array([2015, 4, 1, 0, 0])
    edate = np.array([2020, 4, 31, 0, 0])
    lengths = np.array([len(tiles),
                        len(angles)])  # tiles, incidence angles, whatever

    dims = io.timeseries['obs_obs'].shape

    obs_errstd = np.full(dims[1::], np.nan)

    # ----- Calculate anomalies -----
    cnt = 0
    for spc in np.arange(dims[1]):
        for lat in np.arange(dims[2]):
            for lon in np.arange(dims[3]):
                cnt += 1
                logging.info('%i / %i' % (cnt, np.prod(dims[1::])))

                try:
                    if anom:
                        obs = calc_anom(io.timeseries['obs_obs']
                                        [:, spc, lat,
                                         lon].to_dataframe()['obs_obs'],
                                        longterm=longterm)
                        fcst = calc_anom(io.timeseries['obs_fcst']
                                         [:, spc, lat,
                                          lon].to_dataframe()['obs_fcst'],
                                         longterm=longterm)
                    else:
                        obs = io.timeseries['obs_obs'][:, spc,
                                                       lat, lon].to_dataframe(
                                                       )['obs_obs']
                        fcst = io.timeseries['obs_fcst'][:, spc, lat,
                                                         lon].to_dataframe(
                                                         )['obs_fcst']

                    fcst_errvar = np.nanmean(
                        io.timeseries['obs_fcstvar']
                        [:, spc, lat, lon].values) if fcst_err_corrected else 0

                    tmp_obs_errstd = (((obs - fcst)**2).mean() -
                                      fcst_errvar)**0.5
                    if not np.isnan(tmp_obs_errstd):
                        obs_errstd[spc, lat, lon] = tmp_obs_errstd

                except:
                    pass

    np.place(obs_errstd, obs_errstd < 0, 0)
    np.place(obs_errstd, obs_errstd > 20, 20)

    # ----- write output files -----
    for orb in orbits:
        # !!! inconsistent with the definition in the obs_paramfile (species) !!!
        modes[0] = 1 if orb == 'A' else 0

        res = template.copy()
        res.index = np.arange(len(res)) + 1
        res['row'] = ind_lat
        res['col'] = ind_lon

        spc = 0 if orb == 'A' else 1
        res['err_Tbh'] = obs_errstd[spc, ind_lat, ind_lon]

        spc = 2 if orb == 'A' else 3
        res['err_Tbv'] = obs_errstd[spc, ind_lat, ind_lon]

        res = fill_gaps(res, ['err_Tbh', 'err_Tbv'],
                        smooth=False,
                        grid=io.grid)

        fname = os.path.join(dir_out, fbase + orb + '.bin')

        fid = open(fname, 'wb')
        io.write_fortran_block(fid, modes)
        io.write_fortran_block(fid, sdate)
        io.write_fortran_block(fid, edate)
        io.write_fortran_block(fid, lengths)
        io.write_fortran_block(fid, angles)

        for f in res.drop(['row', 'col'], axis='columns').columns.values:
            io.write_fortran_block(fid, res[f].values)
        fid.close()
Beispiel #5
0
def run_ismn_eval():

    experiments = [['SMOSSMAP', 'short']]

    names = ['open_loop'] + ['MadKF_SMOS40'
                             ] + ['_'.join(exp) for exp in experiments]
    runs = ['US_M36_SMAP_TB_OL_noScl'] + [
        'US_M36_SMOS40_TB_MadKF_DA_it613'
    ] + [f'US_M36_SMAP_TB_DA_scl_{name}' for name in names[2::]]

    dss = [LDAS_io('xhourly', run).timeseries for run in runs]

    result_file = Path(
        '/Users/u0116961/Documents/work/LDAS/2020-03_scaling/validation/ismn_eval.csv'
    )
    t_ana = pd.DatetimeIndex(
        LDAS_io('ObsFcstAna', runs[0]).timeseries.time.values).sort_values()

    variables = ['sm_surface', 'sm_rootzone', 'sm_profile']
    modes = ['absolute', 'longterm', 'shortterm']

    ismn = ISMN_io()
    ismn.list = ismn.list.iloc[70::]

    i = 0
    for meta, ts_insitu in ismn.iter_stations(surface_only=False):
        i += 1
        logging.info('%i/%i' % (i, len(ismn.list)))

        if len(ts_insitu := ts_insitu['2015-04-01':'2020-04-01']) < 50:
            continue

        res = pd.DataFrame(meta.copy()).transpose()
        col = meta.ease_col
        row = meta.ease_row

        for var in variables:
            for mode in modes:

                if mode == 'absolute':
                    ts_ref = ts_insitu[var].dropna()
                else:
                    ts_ref = calc_anom(ts_insitu[var],
                                       longterm=(mode == 'longterm')).dropna()

                for run, ts_model in zip(names, dss):

                    ind = (ts_model['snow_mass'][:, row, col].values == 0) & (
                        ts_model['soil_temp_layer1'][:, row,
                                                     col].values > 277.15)
                    ts_mod = ts_model[var][:, row, col].to_series().loc[ind]
                    ts_mod.index += pd.to_timedelta('2 hours')

                    if mode == 'absolute':
                        ts_mod = ts_mod.dropna()
                    else:
                        ts_mod = calc_anom(
                            ts_mod, longterm=mode == 'longterm').dropna()

                    tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna()
                    res['len_' + mode + '_' + var] = len(tmp)
                    r, p = pearsonr(
                        tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan)
                    res['r_' + run + '_' + mode + '_' + var] = r
                    # res['p_' + run +'_' + mode + '_' + var] = p
                    # res['rmsd_' + run +'_' + mode + '_' + var] = np.sqrt(((tmp[1]-tmp[2])**2).mean())
                    res['ubrmsd_' + run + '_' + mode + '_' + var] = np.sqrt(
                        (((tmp[1] - tmp[1].mean()) -
                          (tmp[2] - tmp[2].mean()))**2).mean())

                    tmp = pd.DataFrame({
                        1: ts_ref,
                        2: ts_mod
                    }).reindex(t_ana).dropna()
                    res['ana_len_' + mode + '_' + var] = len(tmp)
                    r, p = pearsonr(
                        tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan)
                    res['ana_r_' + run + '_' + mode + '_' + var] = r
                    # res['ana_p_' + run + '_' + mode + '_' + var] = p
                    # res['ana_rmsd_' + run +'_' + mode + '_' + var] = np.sqrt(((tmp[1]-tmp[2])**2).mean())
                    res['ana_ubrmsd_' + run + '_' + mode + '_' +
                        var] = np.sqrt((((tmp[1] - tmp[1].mean()) -
                                         (tmp[2] - tmp[2].mean()))**2).mean())

        if not result_file.exists():
            res.to_csv(result_file, float_format='%0.4f')
        else:
            res.to_csv(result_file,
                       float_format='%0.4f',
                       mode='a',
                       header=False)
Beispiel #6
0
def run_ascat_eval_part(part, parts):

    res_path = Path(
        '/Users/u0116961/Documents/work/LDAS/2020-03_scaling/validation')
    result_file = res_path / ('ascat_eval_part%i.csv' % part)

    lut = pd.read_csv(Paths().lut, index_col=0)

    # Split grid cell list for parallelization
    subs = (np.arange(parts + 1) * len(lut) / parts).astype('int')
    subs[-1] = len(lut)
    start = subs[part - 1]
    end = subs[part]

    # Look-up table that contains the grid cells to iterate over
    lut = lut.iloc[start:end, :]

    names = ['open_loop', 'SMOSSMAP_short', 'MadKF_SMOS40']
    runs = [
        'US_M36_SMAP_TB_OL_noScl', 'US_M36_SMAP_TB_DA_scl_SMOSSMAP_short',
        'US_M36_SMOS40_TB_MadKF_DA_it613'
    ]

    dss = [LDAS_io('xhourly', run).timeseries for run in runs]
    grid = LDAS_io().grid

    # t_ana = pd.DatetimeIndex(LDAS_io('ObsFcstAna', runs[0]).timeseries.time.values).sort_values()
    ds_obs_smap = (LDAS_io('ObsFcstAna',
                           'US_M36_SMAP_TB_OL_noScl').timeseries['obs_ana'])
    ds_obs_smos = (LDAS_io(
        'ObsFcstAna', 'US_M36_SMOS40_TB_MadKF_DA_it613').timeseries['obs_ana'])

    modes = ['absolute', 'longterm', 'shortterm']

    ascat = HSAF_io()

    for cnt, (gpi, data) in enumerate(lut.iterrows()):
        print('%i / %i' % (cnt, len(lut)))

        col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg'])
        row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg'])

        res = pd.DataFrame(index=(gpi, ))
        res['col'] = int(data.ease2_col)
        res['row'] = int(data.ease2_row)
        res['lcol'] = col
        res['lrow'] = row

        try:
            ts_ascat = ascat.read(
                data['ascat_gpi'],
                resample_time=False).resample('1d').mean().dropna()
            ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')]
            ts_ascat.name = 'ASCAT'
        except:
            continue

        t_df_smap = ds_obs_smap.sel(species=[1, 2]).isel(lat=row,
                                                         lon=col).to_pandas()
        t_df_smos = ds_obs_smos.sel(species=[1, 2]).isel(lat=row,
                                                         lon=col).to_pandas()
        t_ana_smap = t_df_smap[~np.isnan(t_df_smap[1])
                               | ~np.isnan(t_df_smap[2])].resample(
                                   '1d').mean().index
        t_ana_smos = t_df_smos[~np.isnan(t_df_smos[1])
                               | ~np.isnan(t_df_smos[2])].resample(
                                   '1d').mean().index

        var = 'sm_surface'
        for mode in modes:

            if mode == 'absolute':
                ts_ref = ts_ascat.copy()
            else:
                ts_ref = calc_anom(ts_ascat.copy(),
                                   longterm=(mode == 'longterm')).dropna()

            for run, ts_model in zip(names, dss):

                t_ana = t_ana_smos if run == 'MadKF_SMOS40' else t_ana_smap

                ind = (ts_model['snow_mass'][:, row, col].values == 0) & (
                    ts_model['soil_temp_layer1'][:, row, col].values > 277.15)
                ts_mod = ts_model[var][:, row, col].to_series().loc[ind]
                ts_mod.index += pd.to_timedelta('2 hours')

                if mode == 'absolute':
                    ts_mod = ts_mod.dropna()
                else:
                    ts_mod = calc_anom(ts_mod,
                                       longterm=mode == 'longterm').dropna()
                ts_mod = ts_mod.resample('1d').mean()

                tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna()
                res['len_' + run + '_' + mode] = len(tmp)
                r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan,
                                                                       np.nan)
                res['r_' + run + '_' + mode] = r
                # res['p_' + run + '_' + mode] = p
                # res['rmsd_' + run + '_' + mode] = np.sqrt(((tmp[1] - tmp[2]) ** 2).mean())
                res['ubrmsd_' + run + '_' + mode] = np.sqrt(
                    (((tmp[1] - tmp[1].mean()) -
                      (tmp[2] - tmp[2].mean()))**2).mean())

                tmp = pd.DataFrame({
                    1: ts_ref,
                    2: ts_mod
                }).reindex(t_ana).dropna()
                res['ana_len_' + run + '_' + mode] = len(tmp)
                r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan,
                                                                       np.nan)
                res['ana_r_' + run + '_' + mode] = r
                # res['ana_p_' + run + '_' + mode] = p
                # res['ana_rmsd_' + run + '_' + mode] = np.sqrt(((tmp[1] - tmp[2]) ** 2).mean())
                res['ana_ubrmsd_' + run + '_' + mode] = np.sqrt(
                    (((tmp[1] - tmp[1].mean()) -
                      (tmp[2] - tmp[2].mean()))**2).mean())

        if not result_file.exists():
            res.to_csv(result_file, float_format='%0.3f')
        else:
            res.to_csv(result_file,
                       float_format='%0.3f',
                       mode='a',
                       header=False)
Beispiel #7
0
def plot_suspicious_stations(root):

    statlist = pd.read_csv('/Users/u0116961/Documents/work/MadKF/CLSM/suspicious_stations/station_list_r_diff.csv', index_col=0)

    rmsd_root = 'US_M36_SMAP_TB_DA_SM_PROXY_'
    rmsd_exps = list(np.sort([x.name.split(rmsd_root)[1] for x in Path('/Users/u0116961/data_sets/LDASsa_runs').glob('*SM_PROXY*')]))

    ds_ol = LDAS_io('xhourly', 'US_M36_SMAP_TB_OL_scaled_4K_obserr').timeseries
    ds_da = LDAS_io('xhourly', 'US_M36_SMAP_TB_DA_scaled_4K_obserr').timeseries

    ts_ana = LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_DA_scaled_4K_obserr').timeseries['obs_obs']
    t_ana = pd.DatetimeIndex(ts_ana.time.values).sort_values()

    ascat = HSAF_io()
    gpi_list = pd.read_csv(ascat.root / 'warp5_grid' / 'pointlist_warp_conus.csv', index_col=0)

    ismn = ISMN_io()

    variables = ['sm_surface', 'sm_rootzone']
    modes = ['absolute', 'longterm', 'shortterm']

    ismn.list.index = ismn.list.network + '_' + ismn.list.station
    ismn.list.reindex(statlist.index)
    ismn.list = ismn.list.reindex(statlist.index)

    for i, (meta, ts_insitu) in enumerate(ismn.iter_stations(surface_only=False)):
        if 'tmp_res' in locals():
            if (meta.network in tmp_res) & (meta.station in tmp_res):
                print(f'Skipping {i}')
                continue

        try:
            res = pd.DataFrame(meta.copy()).transpose()
            col = meta.ease_col
            row = meta.ease_row

            gpi = lonlat2gpi(meta.lon, meta.lat, gpi_list)

            ts_ascat = ascat.read(gpi) / 100 * 0.6
            if ts_ascat is None:
                continue

            for mode in modes:
                for var in variables:

                    tmp = statlist[(statlist.network==meta.network)&(statlist.station==meta.station)]
                    dpr = tmp[f'diff_pearsonr2_{mode}_{var}'].values[0]
                    dtr = tmp[f'diff_tcar2_{mode}_{var}'].values[0]

                    if not ((dtr < 0) & (dpr > 0)):
                        continue

                    if mode == 'absolute':
                        ts_asc = ts_ascat.dropna()
                    else:
                        ts_asc = calc_anom(ts_ascat, longterm=(mode == 'longterm')).dropna()
                    ts_asc.name = 'ascat'
                    ts_asc = pd.DataFrame(ts_asc)

                    if mode == 'absolute':
                        ts_ins = ts_insitu[var].dropna()
                    else:
                        ts_ins = calc_anom(ts_insitu[var], longterm=(mode == 'longterm')).dropna()
                    ts_ins.name = 'insitu'
                    ts_ins = pd.DataFrame(ts_ins)

                    ind = (ds_ol['snow_mass'].isel(lat=row, lon=col).values == 0) & \
                          (ds_ol['soil_temp_layer1'].isel(lat=row, lon=col).values > 277.15)
                    ts_ol = ds_ol[var].isel(lat=row, lon=col).to_series().loc[ind]
                    ts_ol.index += pd.to_timedelta('2 hours')
                    ind_obs = np.bitwise_or.reduce(~np.isnan(ts_ana[:, :, row, col].values), 1)
                    if mode == 'absolute':
                        ts_ol = ts_ol.reindex(t_ana[ind_obs]).dropna()
                    else:
                        ts_ol = calc_anom(ts_ol.reindex(t_ana[ind_obs]), longterm=(mode == 'longterm')).dropna()
                    ts_ol.name = 'open_loop'
                    ts_ol = pd.DataFrame(ts_ol)

                    ind = (ds_da['snow_mass'].isel(lat=row, lon=col).values == 0) & \
                          (ds_da['soil_temp_layer1'].isel(lat=row, lon=col).values > 277.15)
                    ts_da = ds_da[var].isel(lat=row, lon=col).to_series().loc[ind]
                    ts_da.index += pd.to_timedelta('2 hours')
                    ind_obs = np.bitwise_or.reduce(~np.isnan(ts_ana[:, :, row, col].values), 1)
                    if mode == 'absolute':
                        ts_da = ts_da.reindex(t_ana[ind_obs]).dropna()
                    else:
                        ts_da = calc_anom(ts_da.reindex(t_ana[ind_obs]), longterm=(mode == 'longterm')).dropna()
                    ts_da.name = 'DA_4K'
                    ts_da = pd.DataFrame(ts_da)

                    matched = df_match(ts_ol, ts_da, ts_asc, ts_ins, window=0.5)
                    data = ts_ol.join(matched[0]['DA_4K']).join(matched[1]['ascat']).join(matched[2]['insitu']).dropna()

                    dpr_triplets = data.corr()['DA_4K']['insitu'] - data.corr()['open_loop']['insitu']
                    if dpr_triplets < 0:
                        continue

                    f = plt.figure(figsize=(15, 5))
                    sns.lineplot(data=data[['open_loop', 'DA_4K', 'insitu']], dashes=False, linewidth=1.5, axes=plt.gca())
                    plt.title(f'{meta.network} / {meta.station} ({var}): d(Pearson R2) = {dpr_triplets:.3f} , d(TCA R2) = {dtr:.3f}')

                    fbase = Path('/Users/u0116961/Documents/work/MadKF/CLSM/suspicious_stations/timeseries')
                    fname = fbase / f'{mode}_{var}_{meta.network}_{meta.station}.png'
                    f.savefig(fname, dpi=300, bbox_inches='tight')
                    plt.close()

        except:
            continue
Beispiel #8
0
def noahmp_version_comparison(part, parts):

    result_file = Path(
        f'/Users/u0116961/Documents/work/LIS/noahmp_version_comparison/result_part{part}.csv'
    )
    if not result_file.parent.exists():
        Path.mkdir(result_file.parent, parents=True)

    ascat = HSAF_io()
    smap = SMAP_io()

    noah3 = Dataset('/Users/u0116961/data_sets/LIS/noahmp36/timeseries.nc')
    noah4 = Dataset('/Users/u0116961/data_sets/LIS/noahmp401/timeseries.nc')

    lats = noah3['lat'][:, :]
    lons = noah3['lon'][:, :]

    ind_lat, ind_lon = np.where(~lats.mask)

    # Split grid cell list for parallelization
    subs = (np.arange(parts + 1) * len(ind_lat) / parts).astype('int')
    subs[-1] = len(ind_lat)
    start = subs[part - 1]
    end = subs[part]

    # Look-up table that contains the grid cells to iterate over
    ind_lat = ind_lat[start:end]
    ind_lon = ind_lon[start:end]

    for i, (i_r, i_c) in enumerate(zip(ind_lat, ind_lon)):
        i += 1
        logging.info(f'{i} / {len(ind_lat)}')

        lat = lats[i_r, i_c]
        lon = lons[i_r, i_c]

        res = pd.DataFrame({'lat': lat, 'lon': lon}, index=(i, ))

        for v in [
                'SM1', 'SM2', 'SM3', 'SM4', 'ST1', 'ST2', 'ST3', 'ST4', 'LAI',
                'SWE'
        ]:
            if ('SM' in v) | ('ST' in v):
                res[f'mdiff_{v}'], res[f'sdiff_{v}'], res[f'r2_{v}'] = \
                    stats(noah4[v[0:2]][:, int(v[-1])-1, i_r, i_c], noah3[v[0:2]][:, int(v[-1])-1, i_r, i_c])
            else:
                res[f'mdiff_{v}'], res[f'sdiff_{v}'], res[f'r2_{v}'] = \
                    stats(noah4[v][:, i_r, i_c], noah3[v][:, i_r, i_c])

        time = pd.DatetimeIndex(
            num2date(noah3['time'][:],
                     units=noah3['time'].units,
                     only_use_python_datetimes=True,
                     only_use_cftime_datetimes=False))
        df = pd.DataFrame(
            {
                'noahmp36': noah3['SM'][:, 0, i_r, i_c],
                'noahmp401': noah4['SM'][:, 0, i_r, i_c]
            },
            index=time)

        ts_ascat = ascat.read(lat, lon)
        if ts_ascat is None:
            ts_ascat = pd.Series(name='ascat')

        ts_smap = smap.read(lat, lon)
        if ts_smap is None:
            ts_smap = pd.Series(name='smap')

        df = pd.concat((df, ts_ascat, ts_smap), axis='columns').dropna()

        for mode in ['abs', 'anom']:
            if mode == 'anom':
                for c in df.columns.values:
                    df[c] = calc_anom(df[c], longterm=False)

            res[f'len_{mode}'] = len(df)

            ec_res = ecol(df, correlated=[['noahmp36', 'noahmp401']])
            for c in df.columns.values:
                snr = 10**(ec_res[f'snr_{c}'] / 10)
                res[f'tcr2_{mode}_{c}'] = snr / (1 + snr)

        if not result_file.exists():
            res.to_csv(result_file, float_format='%0.4f')
        else:
            res.to_csv(result_file,
                       float_format='%0.4f',
                       mode='a',
                       header=False)