def align_synoptic_class_with_pw(path): import xarray as xr from aux_gps import dim_intersection from aux_gps import save_ncfile from aux_gps import xr_reindex_with_date_range pw = xr.load_dataset(path / 'GNSS_PW_thresh_50_homogenized.nc') pw = pw[[x for x in pw if '_error' not in x]] syn = read_synoptic_classification(report=False).to_xarray() # syn = syn.drop(['Name-EN', 'Name-HE']) syn = syn['class'] syn = syn.sel(time=slice('1996', None)) syn = syn.resample(time='5T').ffill() ds_list = [] for sta in pw: print('aligning station {} with synoptics'.format(sta)) new_time = dim_intersection([pw[sta], syn]) syn_da = xr.DataArray(syn.sel(time=new_time)) syn_da.name = '{}_class'.format(sta) syn_da = xr_reindex_with_date_range(syn_da) ds_list.append(syn_da) ds = xr.merge(ds_list) ds = ds.astype('int8') ds = ds.fillna(0) filename = 'GNSS_synoptic_class.nc' save_ncfile(ds, path, filename) return ds
def calculate_zenith_hydrostatic_delay_dsea(ims_path=ims_path, pres=None): from PW_stations import calculate_ZHD from PW_stations import produce_geo_gnss_solved_stations from aux_gps import xr_reindex_with_date_range import xarray as xr if pres is None: pres = xr.open_dataset(ims_path / 'IMS_BP_israeli_10mins.nc')['SEDOM'] p_sta_ht_km = pres.attrs['station_alt'] / 1000 df = produce_geo_gnss_solved_stations(plot=False) lat = df.loc['dsea', 'lat'] ht = df.loc['dsea', 'alt'] zhd = calculate_ZHD(pres, lat=lat, ht_km=ht / 1000, pressure_station_height_km=p_sta_ht_km) zhd = xr_reindex_with_date_range(zhd, freq='5T') zhd = zhd.interpolate_na('time', max_gap='1H', method='linear') return zhd
def prepare_pwv_for_climatol(path=work_yuval, freq='daily', savepath=homo_path, first_year='1998', last_year='2019', pwv_ds=None, group=None): """freq can be daily or monthly. climatol params used: std=2 since the PDF is positively skewed, na.strings="-999.9" this is for NA values, dz.max=7 this is 7 sigma std outliers max, homogen('PWV',1998,2019, na.strings="-999.9",dz.max=7,std=2) dahstat('PWV',1998,2019,stat='series',long=TRUE)""" import xarray as xr import csv from aux_gps import xr_reindex_with_date_range from PW_stations import produce_geo_gnss_solved_stations from PW_from_gps_figures import st_order_climate freq_dict = {'daily': 'D', 'monthly': 'MS'} if pwv_ds is not None: ds = pwv_ds else: ds = xr.load_dataset(path / 'GNSS_PW_{}_thresh_50.nc'.format(freq)) ds = xr_reindex_with_date_range(ds, freq=freq_dict[freq], dt_min='{}-01-01'.format(first_year), dt_max='{}-12-31'.format(last_year), drop=False) df_gnss = produce_geo_gnss_solved_stations(plot=False) # sites = df.dropna()[['lat', 'alt', 'groups_annual']].sort_values(by=['groups_annual', 'lat'],ascending=[1,0]).index df = df_gnss.loc[st_order_climate, :] df['site'] = df.index df['name'] = df['site'].str.upper() df = df[['lat', 'lon', 'alt', 'site', 'name']] data = ds.to_dataframe().T if group is not None: # can be 0 to 2 inds = [x for x in df_gnss[df_gnss['groups_climate'] == group].index] df = df.loc[inds, :] data = data.loc[inds, :] else: inds = [x for x in df_gnss.index if x in ds] df = df.loc[inds, :] if group is not None: if freq == 'daily': filename = 'PWV{}-d_{}-{}.est'.format(group, first_year, last_year) else: filename = 'PWV_{}_{}-{}.est'.format(group, first_year, last_year) else: filename = 'PWV_{}-{}.est'.format(first_year, last_year) df.to_csv(savepath / filename, sep=' ', index=False, header=False, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) filename = filename.replace('.est', '.dat') df = data df = df.round(3) df.to_csv(savepath / filename, sep=' ', index=False, header=False, line_terminator='\n', na_rep=-999.9) return
def read_gipsyx_all_yearly_files(load_path, savepath=None, iqr_k=3.0, plot=False): """read, stitch and clean all yearly post proccessed ppp gipsyx solutions and concat them to a multiple fields time-series dataset""" from aux_gps import path_glob import xarray as xr from aux_gps import get_unique_index from aux_gps import dim_intersection import pandas as pd from aux_gps import filter_nan_errors from aux_gps import keep_iqr from aux_gps import xr_reindex_with_date_range from aux_gps import transform_ds_to_lat_lon_alt import logging def stitch_yearly_files(ds_list): """input is multiple field yearly dataset list and output is the same but with stitched discontinuieties""" fields = [x for x in ds_list[0].data_vars] for i, dss in enumerate(ds_list): if i == len(ds_list) - 1: break first_year = int(ds_list[i].time.dt.year.median().item()) second_year = int(ds_list[i + 1].time.dt.year.median().item()) first_ds = ds_list[i].sel(time=slice( '{}-12-31T18:00'.format(first_year), str(second_year))) second_ds = ds_list[i + 1].sel(time=slice( str(first_year), '{}-01-01T06:00'.format(second_year))) if dim_intersection([first_ds, second_ds], 'time') is None: logger.warning('skipping stitching years {} and {}...'.format( first_year, second_year)) continue else: logger.info('stitching years {} and {}'.format( first_year, second_year)) time = xr.concat([first_ds.time, second_ds.time], 'time') time = pd.to_datetime(get_unique_index(time).values) st_list = [] for field in fields: df = first_ds[field].to_dataframe() df.columns = ['first'] df = df.reindex(time) df['second'] = second_ds[field].to_dataframe() if field in ['X', 'Y', 'Z']: method = 'simple_mean' elif field in ['GradNorth', 'GradEast', 'WetZ']: method = 'smooth_mean' elif 'error' in field: method = 'error_mean' dfs = stitch_two_cols(df, method=method)['stitched_signal'] dfs.index.name = 'time' st = dfs.to_xarray() st.name = field st_list.append(st) # merge to all fields: st_ds = xr.merge(st_list) # replace stitched values to first ds and second ds: first_time = dim_intersection([ds_list[i], st_ds]) vals_rpl = st_ds.sel(time=first_time) for field in ds_list[i].data_vars: ds_list[i][field].loc[{'time': first_time}] = vals_rpl[field] second_time = dim_intersection([ds_list[i + 1], st_ds]) vals_rpl = st_ds.sel(time=second_time) for field in ds_list[i + 1].data_vars: ds_list[i + 1][field].loc[{ 'time': second_time }] = vals_rpl[field] return ds_list logger = logging.getLogger('gipsyx_post_proccesser') files = sorted(path_glob(load_path, '*.nc')) ds_list = [] for file in files: filename = file.as_posix().split('/')[-1] station = file.as_posix().split('/')[-1].split('_')[0] if 'ppp_post' not in filename: continue logger.info('reading {}'.format(filename)) dss = xr.open_dataset(file) ds_list.append(dss) # now loop over ds_list and stitch yearly discontinuities: ds_list = stitch_yearly_files(ds_list) logger.info('merging all years...') ds = xr.merge(ds_list) logger.info('fixing meta-data...') for da in ds.data_vars: old_keys = [x for x in ds[da].attrs.keys()] vals = [x for x in ds[da].attrs.values()] new_keys = [x.split('>')[-1] for x in old_keys] ds[da].attrs = dict(zip(new_keys, vals)) if 'desc' in ds[da].attrs.keys(): ds[da].attrs['full_name'] = ds[da].attrs.pop('desc') logger.info('dropping duplicates time stamps...') ds = get_unique_index(ds) # clean with IQR all fields: logger.info('removing outliers with IQR of {}...'.format(iqr_k)) ds = keep_iqr(ds, dim='time', qlow=0.25, qhigh=0.75, k=iqr_k) # filter the fields based on their errors not being NaNs: logger.info('filtering out fields if their errors are NaN...') ds = filter_nan_errors(ds, error_str='_error', dim='time') logger.info('transforming X, Y, Z coords to lat, lon and alt...') ds = transform_ds_to_lat_lon_alt(ds, ['X', 'Y', 'Z'], '_error', 'time') logger.info( 'reindexing fields with 5 mins frequency(i.e., inserting NaNs)') ds = xr_reindex_with_date_range(ds, 'time', '5min') ds.attrs['station'] = station if plot: plot_gipsy_field(ds, None) if savepath is not None: comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in ds.data_vars} ymin = ds.time.min().dt.year.item() ymax = ds.time.max().dt.year.item() new_filename = '{}_PPP_{}-{}.nc'.format(station, ymin, ymax) ds.to_netcdf(savepath / new_filename, 'w', encoding=encoding) logger.info('{} was saved to {}'.format(new_filename, savepath)) logger.info('Done!') return ds
def read_BD_matfile(path=ceil_path, plot=True, month=None, add_syn=True): from scipy.io import loadmat import pandas as pd from aux_gps import xr_reindex_with_date_range import matplotlib.pyplot as plt from aux_gps import dim_intersection from synoptic_procedures import read_synoptic_classification file = path / 'PBL_BD_LST.mat' mat = loadmat(file) mdata = mat['pblBD4shlomi'] # mdata = mat['PBL_BD_LST'] dates = mdata[:, :3] pbl = mdata[:, 3:] dates = dates.astype(str) dts = [pd.to_datetime(x[0] + '-' + x[1] + '-' + x[2]) for x in dates] dfs = [] for i, dt in enumerate(dts): time = dt + pd.Timedelta(0.5, unit='H') times = pd.date_range(time, periods=48, freq='30T') df = pd.DataFrame(pbl[i], index=times) dfs.append(df) df = pd.concat(dfs) df.columns = ['MLH'] df.index.name = 'time' # switch to UTC: df.index = df.index - pd.Timedelta(2, unit='H') da = df.to_xarray()['MLH'] da.name = 'BD' da.attrs['full_name'] = 'Mixing Layer Height' da.attrs['name'] = 'MLH' da.attrs['units'] = 'm' da.attrs['station_full_name'] = 'Beit Dagan' da.attrs['lon'] = 34.81 da.attrs['lat'] = 32.00 da.attrs['alt'] = 34 da = xr_reindex_with_date_range(da, freq='30T') # add synoptic data: syn = read_synoptic_classification().to_xarray() syn = syn.sel(time=slice('2015', '2016')) syn = syn.resample(time='30T').ffill() new_time = dim_intersection([da, syn]) syn_da = syn.sel(time=new_time) syn_da = xr_reindex_with_date_range(syn_da, freq='30T') if plot: bd2015 = da.sel(time='2015').to_dataframe() bd2016 = da.sel(time='2016').to_dataframe() fig, axes = plt.subplots(2, 1, sharey=True, sharex=False, figsize=(15, 10)) if add_syn: cmap = plt.get_cmap("tab10") syn_df = syn_da.to_dataframe() bd2015['synoptics'] = syn_df.loc['2015', 'class_abbr'] groups = [] for i, (index, group) in enumerate(bd2015.groupby('synoptics')): groups.append(index) d = xr_reindex_with_date_range(group['BD'].to_xarray(), freq='30T') d.to_dataframe().plot(x_compat=True, ms=10, color=cmap(i), ax=axes[0], xlim=['2015-06', '2015-10']) axes[0].legend(groups) bd2016['synoptics'] = syn_df.loc['2016', 'class_abbr'] groups = [] for i, (index, group) in enumerate(bd2016.groupby('synoptics')): groups.append(index) d = xr_reindex_with_date_range(group['BD'].to_xarray(), freq='30T') d.to_dataframe().plot(x_compat=True, ms=10, color=cmap(i), ax=axes[1], xlim=['2016-06', '2016-10']) axes[1].legend(groups) else: bd2015.plot(ax=axes[0], xlim=['2015-06', '2015-10']) bd2016.plot(ax=axes[1], xlim=['2016-06', '2016-10']) for ax in axes.flatten(): ax.set_ylabel('MLH [m]') ax.set_xlabel('UTC') ax.grid() fig.tight_layout() fig.suptitle('MLH from Beit-Dagan ceilometer for 2015 and 2016') filename = 'MLH-BD_syn.png' plt.savefig(savefig_path / filename, orientation='portrait') if add_syn: ds = da.to_dataset(name='BD') ds['syn'] = syn_da['class_abbr'] return ds else: return da
def align_pw_mlh(path=work_yuval, ceil_path=ceil_path, site='tela', interpolate=None, plot=True, dt_range_str='2015'): import xarray as xr from aux_gps import dim_intersection from aux_gps import xr_reindex_with_date_range import pandas as pd import matplotlib.pyplot as plt def pw_mlh_to_df(pw_new, mlh_site): newtime = dim_intersection([pw_new, mlh_site]) MLH = mlh_site.sel(time=newtime) PW = pw_new.sel(time=newtime) df = PW.to_dataframe() df[MLH.name] = MLH.to_dataframe() new_time = pd.date_range(df.index.min(), df.index.max(), freq='1H') df = df.reindex(new_time) df.index.name = 'time' return df mlh = xr.load_dataset(ceil_path / 'MLH_from_ceilometers.nc') mlh_site = xr_reindex_with_date_range(mlh[pw_mlh_dict.get(site)], freq='1H') if interpolate is not None: print('interpolating ceil-site {} with max-gap of {}.'.format( pw_mlh_dict.get(site), interpolate)) attrs = mlh_site.attrs mlh_site_inter = mlh_site.interpolate_na('time', max_gap=interpolate, method='cubic') mlh_site_inter.attrs = attrs pw = xr.open_dataset(work_yuval / 'GNSS_PW_thresh_50_homogenized.nc') pw = pw[['tela', 'klhv', 'jslm', 'nzrt', 'yrcm']] pw.load() pw_new = pw[site] if interpolate is not None: newtime = dim_intersection([pw_new, mlh_site_inter]) else: newtime = dim_intersection([pw_new, mlh_site]) pw_new = pw_new.sel(time=newtime) pw_new = xr_reindex_with_date_range(pw_new, freq='1H') if interpolate is not None: print('interpolating pw-site {} with max-gap of {}.'.format( site, interpolate)) attrs = pw_new.attrs pw_new_inter = pw_new.interpolate_na('time', max_gap=interpolate, method='cubic') pw_new_inter.attrs = attrs df = pw_mlh_to_df(pw_new, mlh_site) if interpolate is not None: df_inter = pw_mlh_to_df(pw_new_inter, mlh_site_inter) if dt_range_str is not None: df = df.loc[dt_range_str, :] if plot: fig, ax = plt.subplots(figsize=(18, 5)) if interpolate is not None: df_inter[pw_new.name].plot(style='b--', ax=ax) # same ax as above since it's automatically added on the right df_inter[mlh_site.name].plot(style='r--', secondary_y=True, ax=ax) ax = df[pw_new.name].plot(style='b-', marker='o', ax=ax, ms=5) # same ax as above since it's automatically added on the right ax_twin = df[mlh_site.name].plot(style='r-', marker='s', secondary_y=True, ax=ax, ms=5) if interpolate is not None: ax.legend(*[ax.get_lines() + ax.right_ax.get_lines()], [ 'PWV {} max interpolation'.format(interpolate), 'PWV', 'MLH {} max interpolation'.format(interpolate), 'MLH' ], loc='best') else: ax.legend([ax.get_lines()[0], ax.right_ax.get_lines()[0]], ['PWV', 'MLH'], loc='upper center') ax.set_title('MLH {} site and PWV {} site'.format( pw_mlh_dict.get(site), site)) ax.set_xlim(df.dropna().index.min(), df.dropna().index.max()) ax.set_ylabel('PWV [mm]', color='b') ax_twin.set_ylabel('MLH [m]', color='r') ax.tick_params(axis='y', colors='b') ax_twin.tick_params(axis='y', colors='r') ax.grid(True, which='both', axis='x') fig.tight_layout() if interpolate is not None: filename = '{}-{}_{}_time_series_{}_max_gap_interpolation.png'.format( site, pw_mlh_dict.get(site), dt_range_str, interpolate) else: filename = '{}-{}_{}_time_series.png'.format( site, pw_mlh_dict.get(site), dt_range_str) plt.savefig(savefig_path / filename, orientation='portrait') if interpolate is not None: ds = df_inter.to_xarray() ds[pw_new.name].attrs.update(pw_new.attrs) ds[mlh_site.name].attrs.update(mlh_site.attrs) return ds else: ds = df.to_xarray() ds[pw_new.name].attrs.update(pw_new.attrs) ds[mlh_site.name].attrs.update(mlh_site.attrs) return ds
def produce_seasonal_trend_breakdown_time_series_from_jpl_gipsyx_site(station='bshm', path=jpl_path, var='V', k=2, verbose=True, plot=True): import xarray as xr from aux_gps import harmonic_da_ts from aux_gps import loess_curve from aux_gps import keep_iqr from aux_gps import get_unique_index from aux_gps import xr_reindex_with_date_range from aux_gps import decimal_year_to_datetime import matplotlib.pyplot as plt if verbose: print('producing seasonal time series for {} station {}'.format(station, var)) ds = read_time_series_jpl_gipsyx_site(station=station, path=path/'time_series', verbose=verbose) # dyear = ds['decimal_year'] da_ts = ds[var] da_ts = xr_reindex_with_date_range(get_unique_index(da_ts), freq='D') xr.infer_freq(da_ts['time']) if k is not None: da_ts = keep_iqr(da_ts, k=k) da_ts.name = '{}_{}'.format(station, var) # detrend: trend = loess_curve(da_ts, plot=False)['mean'] trend.name = da_ts.name + '_trend' trend = xr_reindex_with_date_range(trend, freq='D') da_ts_detrended = da_ts - trend if verbose: print('detrended by loess.') da_ts_detrended.name = da_ts.name + '_detrended' # harmonic cpy fits: harm = harmonic_da_ts(da_ts_detrended.dropna('time'), n=2, grp='month', return_ts_fit=True, verbose=verbose) harm = xr_reindex_with_date_range(harm, time_dim='time', freq='D') harm1 = harm.sel(cpy=1).reset_coords(drop=True) harm1.name = da_ts.name + '_annual' harm1_keys = [x for x in harm1.attrs.keys() if '_1' in x] harm1.attrs = dict(zip(harm1_keys, [harm1.attrs[x] for x in harm1_keys])) harm2 = harm.sel(cpy=2).reset_coords(drop=True) harm2.name = da_ts.name + '_semiannual' harm2_keys = [x for x in harm2.attrs.keys() if '_2' in x] harm2.attrs = dict(zip(harm2_keys, [harm2.attrs[x] for x in harm2_keys])) resid = da_ts_detrended - harm1 - harm2 resid.name = da_ts.name + '_residual' ds = xr.merge([da_ts, trend, harm1, harm2, resid]) # load breakpoints: try: breakpoints = xr.open_dataset( jpl_path/'jpl_break_estimates.nc').sel(station=station.upper())[var] df = breakpoints.dropna('year')['year'].to_dataframe() # load seasonal coeffs: df['dt'] = df['year'].apply(decimal_year_to_datetime) df['dt'] = df['dt'].round('D') bp_da = df.set_index(df['dt'])['dt'].to_xarray() bp_da = bp_da.rename({'dt': 'time'}) ds['{}_{}_breakpoints'.format(station, var)] = bp_da no_bp = False except KeyError: if verbose: print('no breakpoints found for {}!'.format(station)) no_bp = True # seas = xr.load_dataset( # jpl_path/'jpl_seasonal_estimates.nc').sel(station=station.upper()) # ac1, as1, ac2, as2 = seas[var].values # # build seasonal time series: # annual = xr.DataArray(ac1*np.cos(dyear*2*np.pi)+as1 * # np.sin(dyear*2*np.pi), dims=['time']) # annual['time'] = da_ts['time'] # annual.name = '{}_{}_annual'.format(station, var) # annual.attrs['units'] = 'mm' # annual.attrs['long_name'] = 'annual mode' # semiannual = xr.DataArray(ac2*np.cos(dyear*4*np.pi)+as2 * # np.sin(dyear*4*np.pi), dims=['time']) # semiannual['time'] = da_ts['time'] # semiannual.name = '{}_{}_semiannual'.format(station, var) # semiannual.attrs['units'] = 'mm' # semiannual.attrs['long_name'] = 'semiannual mode' # ds = xr.merge([annual, semiannual, da_ts]) if plot: # plt.figure(figsize=(20, 20)) dst = ds[[x for x in ds if 'breakpoints' not in x]] axes = dst.to_dataframe().plot(subplots=True, figsize=(20, 20), color='k') [ax.grid() for ax in axes] [ax.set_ylabel('[mm]') for ax in axes] if not no_bp: for bp in df['dt']: [ax.axvline(bp, color='red') for ax in axes] plt.tight_layout() fig, ax = plt.subplots(figsize=(7, 7)) harm_mm = harmonic_da_ts(da_ts_detrended.dropna('time'), n=2, grp='month', return_ts_fit=False, verbose=verbose) harm_mm['{}_{}_detrended'.format(station, var)].plot.line(ax=ax, linewidth=0, marker='o', color='k') harm_mm['{}_mean'.format(station)].sel(cpy=1).plot.line(ax=ax, marker=None, color='tab:red') harm_mm['{}_mean'.format(station)].sel(cpy=2).plot.line(ax=ax, marker=None, color='tab:blue') harm_mm['{}_mean'.format(station)].sum('cpy').plot.line(ax=ax, marker=None, color='tab:purple') ax.grid() return ds