def run_ascat_eval_smos_part(part, parts, ref='ascat'): periods = [ ['2010-04-01', '2020-04-01'], ['2010-04-01', '2015-04-01'], ['2015-04-01', '2020-04-01'], ['2010-04-01', '2012-10-01'], ['2012-10-01', '2015-04-01'], ['2015-04-01', '2017-10-01'], ['2017-10-01', '2020-04-01'], ] res_path = Path( f'~/Documents/work/MadKF/CLSM/SMOS40/validation/multiperiod/ascat' ).expanduser() if not res_path.exists(): Path.mkdir(res_path, parents=True) result_file = res_path / f'ascat_eval_smos_part{part}.csv' lut = pd.read_csv(Paths().lut, index_col=0) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(lut) / parts).astype('int') subs[-1] = len(lut) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over lut = lut.iloc[start:end, :] names = ['open_loop'] + [f'SMOS40_it62{i}' for i in range(1, 5)] runs = ['US_M36_SMOS40_TB_OL_noScl' ] + [f'US_M36_SMOS40_TB_MadKF_DA_it62{i}' for i in range(1, 5)] grid = LDAS_io('ObsFcstAna', runs[0]).grid dss_xhourly = [LDAS_io('xhourly', run).timeseries for run in runs] dss_obs_ana = [ LDAS_io('ObsFcstAna', run).timeseries['obs_ana'] for run in runs ] modes = ['absolute', 'longterm', 'shortterm'] ascat = HSAF_io() for cnt, (gpi, data) in enumerate(lut.iterrows()): print('%i / %i' % (cnt, len(lut))) col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg']) row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg']) res = pd.DataFrame(index=(gpi, )) res['col'] = int(data.ease2_col) res['row'] = int(data.ease2_row) res['lcol'] = col res['lrow'] = row try: ts_ascat = ascat.read( data['ascat_gpi'], resample_time=False).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue dfs = [ ds.sel(species=[1, 2]).isel( lat=row, lon=col).to_pandas().resample('1d').mean() for ds in dss_obs_ana ] idx = [df[np.any(~np.isnan(df), axis=1)].index for df in dfs] t_ana = idx[0].intersection(idx[1]).intersection(idx[2]).intersection( idx[3]) var = 'sm_surface' for mode in modes: if mode == 'absolute': ts_ref = ts_ascat.copy() else: ts_ref = calc_anom(ts_ascat.copy(), longterm=(mode == 'longterm')).dropna() for run, ts_model in zip(names, dss_xhourly): ind = (ts_model['snow_mass'][:, row, col].values == 0) & ( ts_model['soil_temp_layer1'][:, row, col].values > 277.15) ts_mod = ts_model[var][:, row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') if mode == 'absolute': ts_mod = ts_mod.dropna() else: ts_mod = calc_anom(ts_mod, longterm=mode == 'longterm').dropna() ts_mod = ts_mod.reindex(t_ana).dropna() for i, p in enumerate(periods): tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod })[p[0]:p[1]].dropna() res[f'p{i}_len_{run}_{mode}'] = len(tmp) r, p = pearsonr( tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res[f'p{i}_r_{run}_{mode}'] = r if not result_file.exists(): res.to_csv(result_file, float_format='%0.3f') else: res.to_csv(result_file, float_format='%0.3f', mode='a', header=False)
def run(args, scale_target='SMAP', mode='longterm', use_pc=False): ''' :param args: summarizes the following three for multiprocessing purposes: sensor: 'SMOS' or 'SMAP' or 'SMOSSMAP' date_from: 'yyyy-mm-dd' date_to: 'yyyy-mm-dd' :param scale_target: 'SMOS' or 'SMAP' :param mode: 'longterm' or "shortterm' :param use_pc: If true, the first principal component of SMOS/SMAP Tb will be used ''' sensor, date_from, date_to, pc = args exp_smap = f'NLv4_M36_US_OL_{pc}' exp_smos = f'NLv4_M36_US_OL_{pc}_SMOS' if mode == 'shortterm': ext = '_yearly' elif mode == 'longterm': ext = '_daily' else: ext = '' froot = Path(f'~/data_sets/GEOSldas_runs/_scaling_files_{pc}{ext}').expanduser() if not froot.exists(): Path.mkdir(froot, parents=True) ios = [] if 'SMAP' in sensor: ios += [GEOSldas_io('ObsFcstAna', exp=exp_smap)] if 'SMOS' in sensor: ios += [GEOSldas_io('ObsFcstAna', exp=exp_smos)] if not date_from: date_from = pd.to_datetime(np.min([io.timeseries['time'].values[0] for io in ios])) else: date_from = pd.to_datetime(date_from) if not date_to: date_to = pd.to_datetime(np.max([io.timeseries['time'].values[-1] for io in ios])) else: date_to = pd.to_datetime(date_to) pent_from = int(np.floor((date_from.dayofyear - 1) / 5.) + 1) pent_to = int(np.floor((date_to.dayofyear - 1) / 5.) + 1) fbase = f'Thvf_TbSM_001_src_{sensor}_trg_{scale_target}_{date_from.year}_p{pent_from:02}_{date_to.year}_p{pent_to:02}_W_9p_Nmin_20' dtype, _, _ = template_scaling(sensor='SMAP') tiles = ios[0].grid.tilecoord['tile_id'].values.astype('int32') angles = np.array([40,], 'int') pols = ['H','V'] orbits = [['A', 'D'],['D', 'A']] # To match SMOS and SMAP species! template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32') template['lon'] = ios[0].grid.tilecoord['com_lon'].values.astype('float32') template['lat'] = ios[0].grid.tilecoord['com_lat'].values.astype('float32') template['tile_id'] = tiles.astype('int32') pentads = np.arange(73)+1 if mode == 'longterm': years = np.arange(date_from.year, date_to.year + 1) doys = np.arange(1,367) data_obs = np.full([len(tiles), len(doys), len(years), len(pols), len(orbits[0])], -9999.) data_mod = data_obs.copy() # dummy = np.full([len(tiles), len(doys), len(years), len(angles), len(pols), len(orbits[0])], -9999) # coords = {'tile_id': tiles, # 'doy': doys, # 'year': years, # 'angle': angles, # 'pol': pols, # 'orbit': orbits[0]} # darr = xr.DataArray(dummy, coords=coords, dims=['tile_id', 'doy', 'year', 'angle', 'pol', 'orbit']) elif mode == 'shortterm': years = np.arange(date_from.year, date_to.year+1) data_obs = np.full([len(tiles), len(pentads), len(years), len(pols), len(orbits[0])], -9999.) data_mod = data_obs.copy() n_data = np.full([len(tiles), len(pentads), len(years), len(pols), len(orbits[0])], -9999) # dummy = np.full([len(tiles), len(pentads), len(years), len(angles), len(pols), len(orbits[0])], -9999) # coords = {'tile_id': tiles, # 'pentad': pentads, # 'year': years, # 'angle': angles, # 'pol': pols, # 'orbit': orbits[0]} # darr = xr.DataArray(dummy, coords=coords, dims=['tile_id', 'pentad', 'year', 'angle', 'pol', 'orbit']) else: # TODO: Currently doesn't work anymore because of modification for lt and st dummy = np.full([len(tiles),len(pentads),len(angles),len(pols),len(orbits[0])],-9999) coords = {'tile_id': tiles, 'pentad': pentads, 'angle': angles, 'pol': pols, 'orbit': orbits[0]} darr = xr.DataArray(dummy, coords=coords, dims=['tile_id','pentad','angle','pol','orbit']) # ----- calculate mean and reshuffle ----- for i_til, til in enumerate(tiles): logging.info(f'{i_til} / {len(tiles)}') for i_pol, pol in enumerate(pols): # for i_ang, ang in enumerate(angles): ang = angles[0] for i_orb, (orb1, orb2) in enumerate(zip(orbits[0], orbits[1])): col, row = ios[0].grid.tileid2colrow(til) if sensor.upper() == 'SMOSSMAP': spcs = [io.get_species(pol=pol, ang=ang, orbit=orb) for io, orb in zip(ios,[orb1, orb2])] # orb = orb2 if scale_target == 'SMAP' else orb1 # POSSIBLY WRONG!!!! orb = orb1 if scale_target == 'SMAP' else orb2 else: spcs = [ios[0].get_species(pol=pol, ang=ang, orbit=orb1)] if sensor.upper() == 'SMAP': orb = orb1 if scale_target == 'SMAP' else orb2 else: orb = orb2 if scale_target == 'SMAP' else orb1 if use_pc and (sensor == 'SMOSSMAP'): dss = [io.timeseries['obs_obs'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)] obs = PCA(*dss, window=1.5)['PC-1'] dss = [io.timeseries['obs_fcst'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)] mod = PCA(*dss, window=1.5)['PC-1'] else: obs = pd.concat([io.timeseries['obs_obs'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]).sort_index() mod = pd.concat([io.timeseries['obs_fcst'][:, spc-1, row, col].to_series() for io, spc in zip(ios,spcs)]).sort_index() if (len(obs) == 0) | (len(mod) == 0): continue if mode == 'longterm': obs_clim = calc_anom(obs, return_clim=True) mod_clim = calc_anom(mod, return_clim=True) obs_anom = calc_anom(obs, mode='shortterm') mod_anom = calc_anom(mod, mode='shortterm') m_obs = (obs_clim + obs_anom).resample('1D').mean() m_mod = (mod_clim + mod_anom).resample('1D').mean() i_yr = m_obs.index.year.values - years.min() i_doy = m_obs.index.dayofyear.values - 1 data_obs[i_til, i_doy, i_yr, i_pol, i_orb] = m_obs.replace(np.nan, -9999.).values data_mod[i_til, i_doy, i_yr, i_pol, i_orb] = m_mod.replace(np.nan, -9999.).values elif mode == 'shortterm': for i_yr, yr in enumerate(years): data_obs[i_til, :, i_yr, i_pol, i_orb] = calc_clim_p(obs[obs.index.year==yr][date_from:date_to])[0].replace(np.nan, -9999.).values data_mod[i_til, :, i_yr, i_pol, i_orb] = calc_clim_p(mod[mod.index.year==yr][date_from:date_to])[0].replace(np.nan, -9999.).values n_data[i_til, :, i_yr, i_pol, i_orb] = len(obs[obs.index.year==yr][date_from:date_to].dropna()) else: # TODO: Doesn't work currently! data['m_obs'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:],\ data['s_obs'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = calc_clim_p(obs[date_from:date_to]) data['m_mod'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:],\ data['s_mod'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = calc_clim_p(mod[date_from:date_to]) data['N_data'].sel(tile_id=til, pol=pol, angle=ang, orbit=orb)[:] = len(obs[date_from:date_to].dropna()) modes = np.array([0, 0]) sdate = np.array([date_from.year, date_from.month, date_from.day, 0, 0]) edate = np.array([date_to.year, date_to.month, date_to.day, 0, 0]) lengths = np.array([len(tiles), len(angles), 1]) # tiles, incidence angles, whatever np.save('/Users/u0116961/data_sets/data_mod', data_mod) np.save('/Users/u0116961/data_sets/data_obs', data_obs) # ----- write output files ----- if mode == 'longterm': for i_orb, orb in enumerate(orbits[0]): # !!! inconsistent with the definition in the obs_paramfile (species) !!! modes[0] = 1 if orb == 'A' else 0 for i_yr, yr in enumerate(years): for i_doy, doy in enumerate(doys): res = template.copy() # for i_ang, ang in enumerate(angles): ang = angles[0] for i_pol, pol in enumerate(pols): res.loc[:, f'm_obs_{pol}_{ang}'] = data_obs[:, i_doy, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f's_obs_{pol}_{ang}'] = data_obs[:, i_doy, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f'm_mod_{pol}_{ang}'] = data_mod[:, i_doy, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f's_mod_{pol}_{ang}'] = data_mod[:, i_doy, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f'N_data_{pol}_{ang}'] = 999 res.replace(np.nan, -9999, inplace=True) fdir = froot / f'y{yr:04}' if not fdir.exists(): Path.mkdir(fdir, parents=True) fname = fdir / f'{fbase}_{orb}_d{doy:03}.bin' fid = open(fname, 'wb') ios[0].write_fortran_block(fid, modes) ios[0].write_fortran_block(fid, sdate) ios[0].write_fortran_block(fid, edate) ios[0].write_fortran_block(fid, lengths) ios[0].write_fortran_block(fid, angles.astype('float')) # required by LDASsa!! for f in res.columns.values: ios[0].write_fortran_block(fid, res[f].values) fid.close() else: for i_pent, pent in enumerate(pentads): for i_orb, orb in enumerate(orbits[0]): # !!! inconsistent with the definition in the obs_paramfile (species) !!! modes[0] = 1 if orb == 'A' else 0 if mode == 'shortterm': for i_yr, yr in enumerate(years): res = template.copy() for ang in angles: for i_pol, pol in enumerate(pols): res.loc[:, f'm_obs_{pol}_{ang}'] = data_obs[:, i_pent, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f's_obs_{pol}_{ang}'] = data_obs[:, i_pent, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f'm_mod_{pol}_{ang}'] = data_mod[:, i_pent, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f's_mod_{pol}_{ang}'] = data_mod[:, i_pent, i_yr, i_pol, i_orb].astype('float32') res.loc[:, f'N_data_{pol}_{ang}'] = n_data[:, i_pent, i_yr, i_pol, i_orb].astype('int32') res.replace(np.nan, -9999, inplace=True) fname = froot / f'{fbase}_{orb}_p{pent:02}_y{yr:04}.bin' fid = open(fname, 'wb') ios[0].write_fortran_block(fid, modes) ios[0].write_fortran_block(fid, sdate) ios[0].write_fortran_block(fid, edate) ios[0].write_fortran_block(fid, lengths) ios[0].write_fortran_block(fid, angles.astype('float')) # required by LDASsa!! for f in res.columns.values: ios[0].write_fortran_block(fid, res[f].values) fid.close() else: res = template.copy() for ang in angles: for pol in pols: res.loc[:, f'm_obs_{pol}_{ang}'] = data['m_obs'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series() res.loc[:, f's_obs_{pol}_{ang}'] = data['s_obs'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series() res.loc[:, f'm_mod_{pol}_{ang}'] = data['m_mod'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series() res.loc[:, f's_mod_{pol}_{ang}'] = data['s_mod'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series() res.loc[:, f'N_data_{pol}_{ang}'] = data['N_data'].sel(pol=pol, angle=ang, orbit=orb, pentad=pent).to_series() res.replace(np.nan, -9999, inplace=True) fname = froot / f'{fbase}_{orb}_p{pent:02}.bin' fid = open(fname, 'wb') ios[0].write_fortran_block(fid, modes) ios[0].write_fortran_block(fid, sdate) ios[0].write_fortran_block(fid, edate) ios[0].write_fortran_block(fid, lengths) ios[0].write_fortran_block(fid, angles.astype('float')) # required by LDASsa!! for f in res.columns.values: ios[0].write_fortran_block(fid, res[f].values) fid.close()
def EC_ascat_smap_ismn_ldas(): result_file = Path('/Users/u0116961/Documents/work/extended_collocation/ec_ascat_smap_ismn_ldas.csv') names = ['insitu', 'ascat', 'smap', 'ol', 'da'] combs = list(combinations(names, 2)) ds_ol = LDAS_io('xhourly', 'US_M36_SMAP_TB_OL_noScl').timeseries ds_da = LDAS_io('xhourly', 'US_M36_SMAP_TB_MadKF_DA_it11').timeseries ds_da_ana = LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_MadKF_DA_it11').timeseries['obs_ana'] tg = LDAS_io().grid.tilegrids modes = ['absolute','longterm','shortterm'] ismn = ISMN_io() ismn.list = ismn.list.iloc[70::] ascat = HSAF_io() smap = SMAP_io() lut = pd.read_csv(Paths().lut, index_col=0) i = 0 for meta, ts_insitu in ismn.iter_stations(surface_only=True): i += 1 logging.info('%i/%i' % (i, len(ismn.list))) try: if len(ts_insitu := ts_insitu['2015-04-01':'2020-04-01'].resample('1d').mean().dropna()) < 25: continue except: continue res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row colg = col + tg.loc['domain', 'i_offg'] # col / lon rowg = row + tg.loc['domain', 'j_offg'] # row / lat tmp_lut = lut[(lut.ease2_col == colg) & (lut.ease2_row == rowg)] if len(tmp_lut) == 0: continue gpi_smap = tmp_lut.index.values[0] gpi_ascat = tmp_lut.ascat_gpi.values[0] try: ts_ascat = ascat.read(gpi_ascat, resample_time=False).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue ts_smap = smap.read(gpi_smap) if (ts_ascat is None) | (ts_smap is None): continue ind = (ds_ol['snow_mass'][:, row, col].values == 0)&(ds_ol['soil_temp_layer1'][:, row, col].values > 277.15) ts_ol = ds_ol['sm_surface'][:, row, col].to_series().loc[ind].dropna() ts_ol.index += pd.to_timedelta('2 hours') ind = (ds_da['snow_mass'][:, row, col].values == 0)&(ds_da['soil_temp_layer1'][:, row, col].values > 277.15) ts_da = ds_da['sm_surface'][:, row, col].to_series().loc[ind].dropna() ts_da.index += pd.to_timedelta('2 hours') for mode in modes: if mode == 'absolute': ts_ins = ts_insitu.copy() ts_asc = ts_ascat.copy() ts_smp = ts_smap.copy() ts_ol = ts_ol.copy() ts_da = ts_da.copy() else: ts_ins = calc_anom(ts_ins.copy(), longterm=(mode=='longterm')).dropna() ts_asc = calc_anom(ts_asc.copy(), longterm=(mode == 'longterm')).dropna() ts_smp = calc_anom(ts_smp.copy(), longterm=(mode == 'longterm')).dropna() ts_ol = calc_anom(ts_ol.copy(), longterm=(mode == 'longterm')).dropna() ts_da = calc_anom(ts_da.copy(), longterm=(mode == 'longterm')).dropna() tmp = pd.DataFrame(dict(zip(names, [ts_ins, ts_asc, ts_smp, ts_ol, ts_da]))).dropna() corr = tmp.corr() ec_res = ecol(tmp[['insitu', 'ascat', 'smap', 'ol', 'da']], correlated=[['smap', 'ol'], ['smap', 'da'], ['ol', 'da']]) res[f'len_{mode}'] = len(tmp) for c in combs: res[f'corr_{"_".join(c)}'] = corr.loc[c] res[f'err_corr_smap_ol_{mode}'] = ec_res['err_corr_smap_ol'] res[f'err_corr_smap_da_{mode}'] = ec_res['err_corr_smap_da'] res[f'err_corr_ol_da_{mode}'] = ec_res['err_corr_ol_da'] if not result_file.exists(): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)
def run(): anom = False longterm = False fcst_err_corrected = False exp = 'US_M36_SMAP_TB_MadKF_OL_it11' io = LDAS_io('ObsFcstAna', exp) froot = Path( '/Users/u0116961/Documents/work/MadKF/CLSM/SMAP/rmsd_pert/error_files') fbase = 'SMOS_fit_Tb_' dir_out = froot / ((('anom_' + ('lt' if longterm else 'st')) if anom else 'abs') + ('_fcst_corr' if fcst_err_corrected else '_uncorr')) if not dir_out.exists(): Path.mkdir(dir_out, parents=True) dtype = template_error_Tb40()[0] angles = np.array([ 40., ]) orbits = ['A', 'D'] tiles = io.grid.tilecoord['tile_id'].values.astype('int32') ind_lat = io.grid.tilecoord.loc[:, 'j_indg'].values - io.grid.tilegrids.loc[ 'domain', 'j_offg'] ind_lon = io.grid.tilecoord.loc[:, 'i_indg'].values - io.grid.tilegrids.loc[ 'domain', 'i_offg'] template = pd.DataFrame(columns=dtype.names, index=tiles).astype('float32') template['lon'] = io.grid.tilecoord['com_lon'].values.astype('float32') template['lat'] = io.grid.tilecoord['com_lat'].values.astype('float32') modes = np.array([0, 0]) sdate = np.array([2015, 4, 1, 0, 0]) edate = np.array([2020, 4, 31, 0, 0]) lengths = np.array([len(tiles), len(angles)]) # tiles, incidence angles, whatever dims = io.timeseries['obs_obs'].shape obs_errstd = np.full(dims[1::], np.nan) # ----- Calculate anomalies ----- cnt = 0 for spc in np.arange(dims[1]): for lat in np.arange(dims[2]): for lon in np.arange(dims[3]): cnt += 1 logging.info('%i / %i' % (cnt, np.prod(dims[1::]))) try: if anom: obs = calc_anom(io.timeseries['obs_obs'] [:, spc, lat, lon].to_dataframe()['obs_obs'], longterm=longterm) fcst = calc_anom(io.timeseries['obs_fcst'] [:, spc, lat, lon].to_dataframe()['obs_fcst'], longterm=longterm) else: obs = io.timeseries['obs_obs'][:, spc, lat, lon].to_dataframe( )['obs_obs'] fcst = io.timeseries['obs_fcst'][:, spc, lat, lon].to_dataframe( )['obs_fcst'] fcst_errvar = np.nanmean( io.timeseries['obs_fcstvar'] [:, spc, lat, lon].values) if fcst_err_corrected else 0 tmp_obs_errstd = (((obs - fcst)**2).mean() - fcst_errvar)**0.5 if not np.isnan(tmp_obs_errstd): obs_errstd[spc, lat, lon] = tmp_obs_errstd except: pass np.place(obs_errstd, obs_errstd < 0, 0) np.place(obs_errstd, obs_errstd > 20, 20) # ----- write output files ----- for orb in orbits: # !!! inconsistent with the definition in the obs_paramfile (species) !!! modes[0] = 1 if orb == 'A' else 0 res = template.copy() res.index = np.arange(len(res)) + 1 res['row'] = ind_lat res['col'] = ind_lon spc = 0 if orb == 'A' else 1 res['err_Tbh'] = obs_errstd[spc, ind_lat, ind_lon] spc = 2 if orb == 'A' else 3 res['err_Tbv'] = obs_errstd[spc, ind_lat, ind_lon] res = fill_gaps(res, ['err_Tbh', 'err_Tbv'], smooth=False, grid=io.grid) fname = os.path.join(dir_out, fbase + orb + '.bin') fid = open(fname, 'wb') io.write_fortran_block(fid, modes) io.write_fortran_block(fid, sdate) io.write_fortran_block(fid, edate) io.write_fortran_block(fid, lengths) io.write_fortran_block(fid, angles) for f in res.drop(['row', 'col'], axis='columns').columns.values: io.write_fortran_block(fid, res[f].values) fid.close()
def run_ismn_eval(): experiments = [['SMOSSMAP', 'short']] names = ['open_loop'] + ['MadKF_SMOS40' ] + ['_'.join(exp) for exp in experiments] runs = ['US_M36_SMAP_TB_OL_noScl'] + [ 'US_M36_SMOS40_TB_MadKF_DA_it613' ] + [f'US_M36_SMAP_TB_DA_scl_{name}' for name in names[2::]] dss = [LDAS_io('xhourly', run).timeseries for run in runs] result_file = Path( '/Users/u0116961/Documents/work/LDAS/2020-03_scaling/validation/ismn_eval.csv' ) t_ana = pd.DatetimeIndex( LDAS_io('ObsFcstAna', runs[0]).timeseries.time.values).sort_values() variables = ['sm_surface', 'sm_rootzone', 'sm_profile'] modes = ['absolute', 'longterm', 'shortterm'] ismn = ISMN_io() ismn.list = ismn.list.iloc[70::] i = 0 for meta, ts_insitu in ismn.iter_stations(surface_only=False): i += 1 logging.info('%i/%i' % (i, len(ismn.list))) if len(ts_insitu := ts_insitu['2015-04-01':'2020-04-01']) < 50: continue res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row for var in variables: for mode in modes: if mode == 'absolute': ts_ref = ts_insitu[var].dropna() else: ts_ref = calc_anom(ts_insitu[var], longterm=(mode == 'longterm')).dropna() for run, ts_model in zip(names, dss): ind = (ts_model['snow_mass'][:, row, col].values == 0) & ( ts_model['soil_temp_layer1'][:, row, col].values > 277.15) ts_mod = ts_model[var][:, row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') if mode == 'absolute': ts_mod = ts_mod.dropna() else: ts_mod = calc_anom( ts_mod, longterm=mode == 'longterm').dropna() tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna() res['len_' + mode + '_' + var] = len(tmp) r, p = pearsonr( tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res['r_' + run + '_' + mode + '_' + var] = r # res['p_' + run +'_' + mode + '_' + var] = p # res['rmsd_' + run +'_' + mode + '_' + var] = np.sqrt(((tmp[1]-tmp[2])**2).mean()) res['ubrmsd_' + run + '_' + mode + '_' + var] = np.sqrt( (((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod }).reindex(t_ana).dropna() res['ana_len_' + mode + '_' + var] = len(tmp) r, p = pearsonr( tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res['ana_r_' + run + '_' + mode + '_' + var] = r # res['ana_p_' + run + '_' + mode + '_' + var] = p # res['ana_rmsd_' + run +'_' + mode + '_' + var] = np.sqrt(((tmp[1]-tmp[2])**2).mean()) res['ana_ubrmsd_' + run + '_' + mode + '_' + var] = np.sqrt((((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) if not result_file.exists(): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)
def run_ascat_eval_part(part, parts): res_path = Path( '/Users/u0116961/Documents/work/LDAS/2020-03_scaling/validation') result_file = res_path / ('ascat_eval_part%i.csv' % part) lut = pd.read_csv(Paths().lut, index_col=0) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(lut) / parts).astype('int') subs[-1] = len(lut) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over lut = lut.iloc[start:end, :] names = ['open_loop', 'SMOSSMAP_short', 'MadKF_SMOS40'] runs = [ 'US_M36_SMAP_TB_OL_noScl', 'US_M36_SMAP_TB_DA_scl_SMOSSMAP_short', 'US_M36_SMOS40_TB_MadKF_DA_it613' ] dss = [LDAS_io('xhourly', run).timeseries for run in runs] grid = LDAS_io().grid # t_ana = pd.DatetimeIndex(LDAS_io('ObsFcstAna', runs[0]).timeseries.time.values).sort_values() ds_obs_smap = (LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_OL_noScl').timeseries['obs_ana']) ds_obs_smos = (LDAS_io( 'ObsFcstAna', 'US_M36_SMOS40_TB_MadKF_DA_it613').timeseries['obs_ana']) modes = ['absolute', 'longterm', 'shortterm'] ascat = HSAF_io() for cnt, (gpi, data) in enumerate(lut.iterrows()): print('%i / %i' % (cnt, len(lut))) col = int(data.ease2_col - grid.tilegrids.loc['domain', 'i_offg']) row = int(data.ease2_row - grid.tilegrids.loc['domain', 'j_offg']) res = pd.DataFrame(index=(gpi, )) res['col'] = int(data.ease2_col) res['row'] = int(data.ease2_row) res['lcol'] = col res['lrow'] = row try: ts_ascat = ascat.read( data['ascat_gpi'], resample_time=False).resample('1d').mean().dropna() ts_ascat = ts_ascat[~ts_ascat.index.duplicated(keep='first')] ts_ascat.name = 'ASCAT' except: continue t_df_smap = ds_obs_smap.sel(species=[1, 2]).isel(lat=row, lon=col).to_pandas() t_df_smos = ds_obs_smos.sel(species=[1, 2]).isel(lat=row, lon=col).to_pandas() t_ana_smap = t_df_smap[~np.isnan(t_df_smap[1]) | ~np.isnan(t_df_smap[2])].resample( '1d').mean().index t_ana_smos = t_df_smos[~np.isnan(t_df_smos[1]) | ~np.isnan(t_df_smos[2])].resample( '1d').mean().index var = 'sm_surface' for mode in modes: if mode == 'absolute': ts_ref = ts_ascat.copy() else: ts_ref = calc_anom(ts_ascat.copy(), longterm=(mode == 'longterm')).dropna() for run, ts_model in zip(names, dss): t_ana = t_ana_smos if run == 'MadKF_SMOS40' else t_ana_smap ind = (ts_model['snow_mass'][:, row, col].values == 0) & ( ts_model['soil_temp_layer1'][:, row, col].values > 277.15) ts_mod = ts_model[var][:, row, col].to_series().loc[ind] ts_mod.index += pd.to_timedelta('2 hours') if mode == 'absolute': ts_mod = ts_mod.dropna() else: ts_mod = calc_anom(ts_mod, longterm=mode == 'longterm').dropna() ts_mod = ts_mod.resample('1d').mean() tmp = pd.DataFrame({1: ts_ref, 2: ts_mod}).dropna() res['len_' + run + '_' + mode] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res['r_' + run + '_' + mode] = r # res['p_' + run + '_' + mode] = p # res['rmsd_' + run + '_' + mode] = np.sqrt(((tmp[1] - tmp[2]) ** 2).mean()) res['ubrmsd_' + run + '_' + mode] = np.sqrt( (((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) tmp = pd.DataFrame({ 1: ts_ref, 2: ts_mod }).reindex(t_ana).dropna() res['ana_len_' + run + '_' + mode] = len(tmp) r, p = pearsonr(tmp[1], tmp[2]) if len(tmp) > 10 else (np.nan, np.nan) res['ana_r_' + run + '_' + mode] = r # res['ana_p_' + run + '_' + mode] = p # res['ana_rmsd_' + run + '_' + mode] = np.sqrt(((tmp[1] - tmp[2]) ** 2).mean()) res['ana_ubrmsd_' + run + '_' + mode] = np.sqrt( (((tmp[1] - tmp[1].mean()) - (tmp[2] - tmp[2].mean()))**2).mean()) if not result_file.exists(): res.to_csv(result_file, float_format='%0.3f') else: res.to_csv(result_file, float_format='%0.3f', mode='a', header=False)
def plot_suspicious_stations(root): statlist = pd.read_csv('/Users/u0116961/Documents/work/MadKF/CLSM/suspicious_stations/station_list_r_diff.csv', index_col=0) rmsd_root = 'US_M36_SMAP_TB_DA_SM_PROXY_' rmsd_exps = list(np.sort([x.name.split(rmsd_root)[1] for x in Path('/Users/u0116961/data_sets/LDASsa_runs').glob('*SM_PROXY*')])) ds_ol = LDAS_io('xhourly', 'US_M36_SMAP_TB_OL_scaled_4K_obserr').timeseries ds_da = LDAS_io('xhourly', 'US_M36_SMAP_TB_DA_scaled_4K_obserr').timeseries ts_ana = LDAS_io('ObsFcstAna', 'US_M36_SMAP_TB_DA_scaled_4K_obserr').timeseries['obs_obs'] t_ana = pd.DatetimeIndex(ts_ana.time.values).sort_values() ascat = HSAF_io() gpi_list = pd.read_csv(ascat.root / 'warp5_grid' / 'pointlist_warp_conus.csv', index_col=0) ismn = ISMN_io() variables = ['sm_surface', 'sm_rootzone'] modes = ['absolute', 'longterm', 'shortterm'] ismn.list.index = ismn.list.network + '_' + ismn.list.station ismn.list.reindex(statlist.index) ismn.list = ismn.list.reindex(statlist.index) for i, (meta, ts_insitu) in enumerate(ismn.iter_stations(surface_only=False)): if 'tmp_res' in locals(): if (meta.network in tmp_res) & (meta.station in tmp_res): print(f'Skipping {i}') continue try: res = pd.DataFrame(meta.copy()).transpose() col = meta.ease_col row = meta.ease_row gpi = lonlat2gpi(meta.lon, meta.lat, gpi_list) ts_ascat = ascat.read(gpi) / 100 * 0.6 if ts_ascat is None: continue for mode in modes: for var in variables: tmp = statlist[(statlist.network==meta.network)&(statlist.station==meta.station)] dpr = tmp[f'diff_pearsonr2_{mode}_{var}'].values[0] dtr = tmp[f'diff_tcar2_{mode}_{var}'].values[0] if not ((dtr < 0) & (dpr > 0)): continue if mode == 'absolute': ts_asc = ts_ascat.dropna() else: ts_asc = calc_anom(ts_ascat, longterm=(mode == 'longterm')).dropna() ts_asc.name = 'ascat' ts_asc = pd.DataFrame(ts_asc) if mode == 'absolute': ts_ins = ts_insitu[var].dropna() else: ts_ins = calc_anom(ts_insitu[var], longterm=(mode == 'longterm')).dropna() ts_ins.name = 'insitu' ts_ins = pd.DataFrame(ts_ins) ind = (ds_ol['snow_mass'].isel(lat=row, lon=col).values == 0) & \ (ds_ol['soil_temp_layer1'].isel(lat=row, lon=col).values > 277.15) ts_ol = ds_ol[var].isel(lat=row, lon=col).to_series().loc[ind] ts_ol.index += pd.to_timedelta('2 hours') ind_obs = np.bitwise_or.reduce(~np.isnan(ts_ana[:, :, row, col].values), 1) if mode == 'absolute': ts_ol = ts_ol.reindex(t_ana[ind_obs]).dropna() else: ts_ol = calc_anom(ts_ol.reindex(t_ana[ind_obs]), longterm=(mode == 'longterm')).dropna() ts_ol.name = 'open_loop' ts_ol = pd.DataFrame(ts_ol) ind = (ds_da['snow_mass'].isel(lat=row, lon=col).values == 0) & \ (ds_da['soil_temp_layer1'].isel(lat=row, lon=col).values > 277.15) ts_da = ds_da[var].isel(lat=row, lon=col).to_series().loc[ind] ts_da.index += pd.to_timedelta('2 hours') ind_obs = np.bitwise_or.reduce(~np.isnan(ts_ana[:, :, row, col].values), 1) if mode == 'absolute': ts_da = ts_da.reindex(t_ana[ind_obs]).dropna() else: ts_da = calc_anom(ts_da.reindex(t_ana[ind_obs]), longterm=(mode == 'longterm')).dropna() ts_da.name = 'DA_4K' ts_da = pd.DataFrame(ts_da) matched = df_match(ts_ol, ts_da, ts_asc, ts_ins, window=0.5) data = ts_ol.join(matched[0]['DA_4K']).join(matched[1]['ascat']).join(matched[2]['insitu']).dropna() dpr_triplets = data.corr()['DA_4K']['insitu'] - data.corr()['open_loop']['insitu'] if dpr_triplets < 0: continue f = plt.figure(figsize=(15, 5)) sns.lineplot(data=data[['open_loop', 'DA_4K', 'insitu']], dashes=False, linewidth=1.5, axes=plt.gca()) plt.title(f'{meta.network} / {meta.station} ({var}): d(Pearson R2) = {dpr_triplets:.3f} , d(TCA R2) = {dtr:.3f}') fbase = Path('/Users/u0116961/Documents/work/MadKF/CLSM/suspicious_stations/timeseries') fname = fbase / f'{mode}_{var}_{meta.network}_{meta.station}.png' f.savefig(fname, dpi=300, bbox_inches='tight') plt.close() except: continue
def noahmp_version_comparison(part, parts): result_file = Path( f'/Users/u0116961/Documents/work/LIS/noahmp_version_comparison/result_part{part}.csv' ) if not result_file.parent.exists(): Path.mkdir(result_file.parent, parents=True) ascat = HSAF_io() smap = SMAP_io() noah3 = Dataset('/Users/u0116961/data_sets/LIS/noahmp36/timeseries.nc') noah4 = Dataset('/Users/u0116961/data_sets/LIS/noahmp401/timeseries.nc') lats = noah3['lat'][:, :] lons = noah3['lon'][:, :] ind_lat, ind_lon = np.where(~lats.mask) # Split grid cell list for parallelization subs = (np.arange(parts + 1) * len(ind_lat) / parts).astype('int') subs[-1] = len(ind_lat) start = subs[part - 1] end = subs[part] # Look-up table that contains the grid cells to iterate over ind_lat = ind_lat[start:end] ind_lon = ind_lon[start:end] for i, (i_r, i_c) in enumerate(zip(ind_lat, ind_lon)): i += 1 logging.info(f'{i} / {len(ind_lat)}') lat = lats[i_r, i_c] lon = lons[i_r, i_c] res = pd.DataFrame({'lat': lat, 'lon': lon}, index=(i, )) for v in [ 'SM1', 'SM2', 'SM3', 'SM4', 'ST1', 'ST2', 'ST3', 'ST4', 'LAI', 'SWE' ]: if ('SM' in v) | ('ST' in v): res[f'mdiff_{v}'], res[f'sdiff_{v}'], res[f'r2_{v}'] = \ stats(noah4[v[0:2]][:, int(v[-1])-1, i_r, i_c], noah3[v[0:2]][:, int(v[-1])-1, i_r, i_c]) else: res[f'mdiff_{v}'], res[f'sdiff_{v}'], res[f'r2_{v}'] = \ stats(noah4[v][:, i_r, i_c], noah3[v][:, i_r, i_c]) time = pd.DatetimeIndex( num2date(noah3['time'][:], units=noah3['time'].units, only_use_python_datetimes=True, only_use_cftime_datetimes=False)) df = pd.DataFrame( { 'noahmp36': noah3['SM'][:, 0, i_r, i_c], 'noahmp401': noah4['SM'][:, 0, i_r, i_c] }, index=time) ts_ascat = ascat.read(lat, lon) if ts_ascat is None: ts_ascat = pd.Series(name='ascat') ts_smap = smap.read(lat, lon) if ts_smap is None: ts_smap = pd.Series(name='smap') df = pd.concat((df, ts_ascat, ts_smap), axis='columns').dropna() for mode in ['abs', 'anom']: if mode == 'anom': for c in df.columns.values: df[c] = calc_anom(df[c], longterm=False) res[f'len_{mode}'] = len(df) ec_res = ecol(df, correlated=[['noahmp36', 'noahmp401']]) for c in df.columns.values: snr = 10**(ec_res[f'snr_{c}'] / 10) res[f'tcr2_{mode}_{c}'] = snr / (1 + snr) if not result_file.exists(): res.to_csv(result_file, float_format='%0.4f') else: res.to_csv(result_file, float_format='%0.4f', mode='a', header=False)