def align_synoptic_class_with_pw(path):
    import xarray as xr
    from aux_gps import dim_intersection
    from aux_gps import save_ncfile
    from aux_gps import xr_reindex_with_date_range
    pw = xr.load_dataset(path / 'GNSS_PW_thresh_50_homogenized.nc')
    pw = pw[[x for x in pw if '_error' not in x]]
    syn = read_synoptic_classification(report=False).to_xarray()
    # syn = syn.drop(['Name-EN', 'Name-HE'])
    syn = syn['class']
    syn = syn.sel(time=slice('1996', None))
    syn = syn.resample(time='5T').ffill()
    ds_list = []
    for sta in pw:
        print('aligning station {} with synoptics'.format(sta))
        new_time = dim_intersection([pw[sta], syn])
        syn_da = xr.DataArray(syn.sel(time=new_time))
        syn_da.name = '{}_class'.format(sta)
        syn_da = xr_reindex_with_date_range(syn_da)
        ds_list.append(syn_da)
    ds = xr.merge(ds_list)
    ds = ds.astype('int8')
    ds = ds.fillna(0)
    filename = 'GNSS_synoptic_class.nc'
    save_ncfile(ds, path, filename)
    return ds
Example #2
0
def calculate_zenith_hydrostatic_delay_dsea(ims_path=ims_path, pres=None):
    from PW_stations import calculate_ZHD
    from PW_stations import produce_geo_gnss_solved_stations
    from aux_gps import xr_reindex_with_date_range
    import xarray as xr
    if pres is None:
        pres = xr.open_dataset(ims_path / 'IMS_BP_israeli_10mins.nc')['SEDOM']
    p_sta_ht_km = pres.attrs['station_alt'] / 1000
    df = produce_geo_gnss_solved_stations(plot=False)
    lat = df.loc['dsea', 'lat']
    ht = df.loc['dsea', 'alt']
    zhd = calculate_ZHD(pres,
                        lat=lat,
                        ht_km=ht / 1000,
                        pressure_station_height_km=p_sta_ht_km)
    zhd = xr_reindex_with_date_range(zhd, freq='5T')
    zhd = zhd.interpolate_na('time', max_gap='1H', method='linear')
    return zhd
def prepare_pwv_for_climatol(path=work_yuval,
                             freq='daily',
                             savepath=homo_path,
                             first_year='1998',
                             last_year='2019',
                             pwv_ds=None,
                             group=None):
    """freq can be daily or monthly.
    climatol params used:
        std=2 since the PDF is positively skewed,
        na.strings="-999.9" this is for NA values,
        dz.max=7 this is 7 sigma std outliers max,
        homogen('PWV',1998,2019, na.strings="-999.9",dz.max=7,std=2)
        dahstat('PWV',1998,2019,stat='series',long=TRUE)"""
    import xarray as xr
    import csv
    from aux_gps import xr_reindex_with_date_range
    from PW_stations import produce_geo_gnss_solved_stations
    from PW_from_gps_figures import st_order_climate
    freq_dict = {'daily': 'D', 'monthly': 'MS'}
    if pwv_ds is not None:
        ds = pwv_ds
    else:
        ds = xr.load_dataset(path / 'GNSS_PW_{}_thresh_50.nc'.format(freq))
    ds = xr_reindex_with_date_range(ds,
                                    freq=freq_dict[freq],
                                    dt_min='{}-01-01'.format(first_year),
                                    dt_max='{}-12-31'.format(last_year),
                                    drop=False)
    df_gnss = produce_geo_gnss_solved_stations(plot=False)
    #    sites = df.dropna()[['lat', 'alt', 'groups_annual']].sort_values(by=['groups_annual', 'lat'],ascending=[1,0]).index
    df = df_gnss.loc[st_order_climate, :]
    df['site'] = df.index
    df['name'] = df['site'].str.upper()
    df = df[['lat', 'lon', 'alt', 'site', 'name']]
    data = ds.to_dataframe().T
    if group is not None:  # can be 0 to 2
        inds = [x for x in df_gnss[df_gnss['groups_climate'] == group].index]
        df = df.loc[inds, :]
        data = data.loc[inds, :]
    else:
        inds = [x for x in df_gnss.index if x in ds]
        df = df.loc[inds, :]
    if group is not None:
        if freq == 'daily':
            filename = 'PWV{}-d_{}-{}.est'.format(group, first_year, last_year)
        else:
            filename = 'PWV_{}_{}-{}.est'.format(group, first_year, last_year)
    else:
        filename = 'PWV_{}-{}.est'.format(first_year, last_year)
    df.to_csv(savepath / filename,
              sep=' ',
              index=False,
              header=False,
              quotechar='"',
              quoting=csv.QUOTE_NONNUMERIC)
    filename = filename.replace('.est', '.dat')
    df = data
    df = df.round(3)
    df.to_csv(savepath / filename,
              sep=' ',
              index=False,
              header=False,
              line_terminator='\n',
              na_rep=-999.9)
    return
def read_gipsyx_all_yearly_files(load_path,
                                 savepath=None,
                                 iqr_k=3.0,
                                 plot=False):
    """read, stitch and clean all yearly post proccessed ppp gipsyx solutions
    and concat them to a multiple fields time-series dataset"""
    from aux_gps import path_glob
    import xarray as xr
    from aux_gps import get_unique_index
    from aux_gps import dim_intersection
    import pandas as pd
    from aux_gps import filter_nan_errors
    from aux_gps import keep_iqr
    from aux_gps import xr_reindex_with_date_range
    from aux_gps import transform_ds_to_lat_lon_alt
    import logging

    def stitch_yearly_files(ds_list):
        """input is multiple field yearly dataset list and output is the same
        but with stitched discontinuieties"""
        fields = [x for x in ds_list[0].data_vars]
        for i, dss in enumerate(ds_list):
            if i == len(ds_list) - 1:
                break
            first_year = int(ds_list[i].time.dt.year.median().item())
            second_year = int(ds_list[i + 1].time.dt.year.median().item())
            first_ds = ds_list[i].sel(time=slice(
                '{}-12-31T18:00'.format(first_year), str(second_year)))
            second_ds = ds_list[i + 1].sel(time=slice(
                str(first_year), '{}-01-01T06:00'.format(second_year)))
            if dim_intersection([first_ds, second_ds], 'time') is None:
                logger.warning('skipping stitching years {} and {}...'.format(
                    first_year, second_year))
                continue
            else:
                logger.info('stitching years {} and {}'.format(
                    first_year, second_year))
            time = xr.concat([first_ds.time, second_ds.time], 'time')
            time = pd.to_datetime(get_unique_index(time).values)
            st_list = []
            for field in fields:
                df = first_ds[field].to_dataframe()
                df.columns = ['first']
                df = df.reindex(time)
                df['second'] = second_ds[field].to_dataframe()
                if field in ['X', 'Y', 'Z']:
                    method = 'simple_mean'
                elif field in ['GradNorth', 'GradEast', 'WetZ']:
                    method = 'smooth_mean'
                elif 'error' in field:
                    method = 'error_mean'
                dfs = stitch_two_cols(df, method=method)['stitched_signal']
                dfs.index.name = 'time'
                st = dfs.to_xarray()
                st.name = field
                st_list.append(st)
            # merge to all fields:
            st_ds = xr.merge(st_list)
            # replace stitched values to first ds and second ds:
            first_time = dim_intersection([ds_list[i], st_ds])
            vals_rpl = st_ds.sel(time=first_time)
            for field in ds_list[i].data_vars:
                ds_list[i][field].loc[{'time': first_time}] = vals_rpl[field]
            second_time = dim_intersection([ds_list[i + 1], st_ds])
            vals_rpl = st_ds.sel(time=second_time)
            for field in ds_list[i + 1].data_vars:
                ds_list[i + 1][field].loc[{
                    'time': second_time
                }] = vals_rpl[field]
        return ds_list

    logger = logging.getLogger('gipsyx_post_proccesser')
    files = sorted(path_glob(load_path, '*.nc'))
    ds_list = []
    for file in files:
        filename = file.as_posix().split('/')[-1]
        station = file.as_posix().split('/')[-1].split('_')[0]
        if 'ppp_post' not in filename:
            continue
        logger.info('reading {}'.format(filename))
        dss = xr.open_dataset(file)
        ds_list.append(dss)
    # now loop over ds_list and stitch yearly discontinuities:
    ds_list = stitch_yearly_files(ds_list)
    logger.info('merging all years...')
    ds = xr.merge(ds_list)
    logger.info('fixing meta-data...')
    for da in ds.data_vars:
        old_keys = [x for x in ds[da].attrs.keys()]
        vals = [x for x in ds[da].attrs.values()]
        new_keys = [x.split('>')[-1] for x in old_keys]
        ds[da].attrs = dict(zip(new_keys, vals))
        if 'desc' in ds[da].attrs.keys():
            ds[da].attrs['full_name'] = ds[da].attrs.pop('desc')
    logger.info('dropping duplicates time stamps...')
    ds = get_unique_index(ds)
    # clean with IQR all fields:
    logger.info('removing outliers with IQR of {}...'.format(iqr_k))
    ds = keep_iqr(ds, dim='time', qlow=0.25, qhigh=0.75, k=iqr_k)
    # filter the fields based on their errors not being NaNs:
    logger.info('filtering out fields if their errors are NaN...')
    ds = filter_nan_errors(ds, error_str='_error', dim='time')
    logger.info('transforming X, Y, Z coords to lat, lon and alt...')
    ds = transform_ds_to_lat_lon_alt(ds, ['X', 'Y', 'Z'], '_error', 'time')
    logger.info(
        'reindexing fields with 5 mins frequency(i.e., inserting NaNs)')
    ds = xr_reindex_with_date_range(ds, 'time', '5min')
    ds.attrs['station'] = station
    if plot:
        plot_gipsy_field(ds, None)
    if savepath is not None:
        comp = dict(zlib=True, complevel=9)  # best compression
        encoding = {var: comp for var in ds.data_vars}
        ymin = ds.time.min().dt.year.item()
        ymax = ds.time.max().dt.year.item()
        new_filename = '{}_PPP_{}-{}.nc'.format(station, ymin, ymax)
        ds.to_netcdf(savepath / new_filename, 'w', encoding=encoding)
        logger.info('{} was saved to {}'.format(new_filename, savepath))
    logger.info('Done!')
    return ds
Example #5
0
def read_BD_matfile(path=ceil_path, plot=True, month=None, add_syn=True):
    from scipy.io import loadmat
    import pandas as pd
    from aux_gps import xr_reindex_with_date_range
    import matplotlib.pyplot as plt
    from aux_gps import dim_intersection
    from synoptic_procedures import read_synoptic_classification
    file = path / 'PBL_BD_LST.mat'
    mat = loadmat(file)
    mdata = mat['pblBD4shlomi']
    # mdata = mat['PBL_BD_LST']
    dates = mdata[:, :3]
    pbl = mdata[:, 3:]
    dates = dates.astype(str)
    dts = [pd.to_datetime(x[0] + '-' + x[1] + '-' + x[2]) for x in dates]
    dfs = []
    for i, dt in enumerate(dts):
        time = dt + pd.Timedelta(0.5, unit='H')
        times = pd.date_range(time, periods=48, freq='30T')
        df = pd.DataFrame(pbl[i], index=times)
        dfs.append(df)
    df = pd.concat(dfs)
    df.columns = ['MLH']
    df.index.name = 'time'
    # switch to UTC:
    df.index = df.index - pd.Timedelta(2, unit='H')
    da = df.to_xarray()['MLH']
    da.name = 'BD'
    da.attrs['full_name'] = 'Mixing Layer Height'
    da.attrs['name'] = 'MLH'
    da.attrs['units'] = 'm'
    da.attrs['station_full_name'] = 'Beit Dagan'
    da.attrs['lon'] = 34.81
    da.attrs['lat'] = 32.00
    da.attrs['alt'] = 34
    da = xr_reindex_with_date_range(da, freq='30T')
    # add synoptic data:
    syn = read_synoptic_classification().to_xarray()
    syn = syn.sel(time=slice('2015', '2016'))
    syn = syn.resample(time='30T').ffill()
    new_time = dim_intersection([da, syn])
    syn_da = syn.sel(time=new_time)
    syn_da = xr_reindex_with_date_range(syn_da, freq='30T')
    if plot:
        bd2015 = da.sel(time='2015').to_dataframe()
        bd2016 = da.sel(time='2016').to_dataframe()
        fig, axes = plt.subplots(2,
                                 1,
                                 sharey=True,
                                 sharex=False,
                                 figsize=(15, 10))
        if add_syn:
            cmap = plt.get_cmap("tab10")
            syn_df = syn_da.to_dataframe()
            bd2015['synoptics'] = syn_df.loc['2015', 'class_abbr']
            groups = []
            for i, (index, group) in enumerate(bd2015.groupby('synoptics')):
                groups.append(index)
                d = xr_reindex_with_date_range(group['BD'].to_xarray(),
                                               freq='30T')
                d.to_dataframe().plot(x_compat=True,
                                      ms=10,
                                      color=cmap(i),
                                      ax=axes[0],
                                      xlim=['2015-06', '2015-10'])
            axes[0].legend(groups)
            bd2016['synoptics'] = syn_df.loc['2016', 'class_abbr']
            groups = []
            for i, (index, group) in enumerate(bd2016.groupby('synoptics')):
                groups.append(index)
                d = xr_reindex_with_date_range(group['BD'].to_xarray(),
                                               freq='30T')
                d.to_dataframe().plot(x_compat=True,
                                      ms=10,
                                      color=cmap(i),
                                      ax=axes[1],
                                      xlim=['2016-06', '2016-10'])
            axes[1].legend(groups)
        else:
            bd2015.plot(ax=axes[0], xlim=['2015-06', '2015-10'])
            bd2016.plot(ax=axes[1], xlim=['2016-06', '2016-10'])
        for ax in axes.flatten():
            ax.set_ylabel('MLH [m]')
            ax.set_xlabel('UTC')
            ax.grid()
        fig.tight_layout()
        fig.suptitle('MLH from Beit-Dagan ceilometer for 2015 and 2016')
        filename = 'MLH-BD_syn.png'
        plt.savefig(savefig_path / filename, orientation='portrait')
    if add_syn:
        ds = da.to_dataset(name='BD')
        ds['syn'] = syn_da['class_abbr']
        return ds
    else:
        return da
Example #6
0
def align_pw_mlh(path=work_yuval,
                 ceil_path=ceil_path,
                 site='tela',
                 interpolate=None,
                 plot=True,
                 dt_range_str='2015'):
    import xarray as xr
    from aux_gps import dim_intersection
    from aux_gps import xr_reindex_with_date_range
    import pandas as pd
    import matplotlib.pyplot as plt

    def pw_mlh_to_df(pw_new, mlh_site):
        newtime = dim_intersection([pw_new, mlh_site])
        MLH = mlh_site.sel(time=newtime)
        PW = pw_new.sel(time=newtime)
        df = PW.to_dataframe()
        df[MLH.name] = MLH.to_dataframe()
        new_time = pd.date_range(df.index.min(), df.index.max(), freq='1H')
        df = df.reindex(new_time)
        df.index.name = 'time'
        return df

    mlh = xr.load_dataset(ceil_path / 'MLH_from_ceilometers.nc')
    mlh_site = xr_reindex_with_date_range(mlh[pw_mlh_dict.get(site)],
                                          freq='1H')
    if interpolate is not None:
        print('interpolating ceil-site {} with max-gap of {}.'.format(
            pw_mlh_dict.get(site), interpolate))
        attrs = mlh_site.attrs
        mlh_site_inter = mlh_site.interpolate_na('time',
                                                 max_gap=interpolate,
                                                 method='cubic')
        mlh_site_inter.attrs = attrs
    pw = xr.open_dataset(work_yuval / 'GNSS_PW_thresh_50_homogenized.nc')
    pw = pw[['tela', 'klhv', 'jslm', 'nzrt', 'yrcm']]
    pw.load()
    pw_new = pw[site]
    if interpolate is not None:
        newtime = dim_intersection([pw_new, mlh_site_inter])
    else:
        newtime = dim_intersection([pw_new, mlh_site])
    pw_new = pw_new.sel(time=newtime)
    pw_new = xr_reindex_with_date_range(pw_new, freq='1H')
    if interpolate is not None:
        print('interpolating pw-site {} with max-gap of {}.'.format(
            site, interpolate))
        attrs = pw_new.attrs
        pw_new_inter = pw_new.interpolate_na('time',
                                             max_gap=interpolate,
                                             method='cubic')
        pw_new_inter.attrs = attrs
    df = pw_mlh_to_df(pw_new, mlh_site)
    if interpolate is not None:
        df_inter = pw_mlh_to_df(pw_new_inter, mlh_site_inter)
    if dt_range_str is not None:
        df = df.loc[dt_range_str, :]
    if plot:
        fig, ax = plt.subplots(figsize=(18, 5))
        if interpolate is not None:
            df_inter[pw_new.name].plot(style='b--', ax=ax)
            # same ax as above since it's automatically added on the right
            df_inter[mlh_site.name].plot(style='r--', secondary_y=True, ax=ax)
        ax = df[pw_new.name].plot(style='b-', marker='o', ax=ax, ms=5)
        # same ax as above since it's automatically added on the right
        ax_twin = df[mlh_site.name].plot(style='r-',
                                         marker='s',
                                         secondary_y=True,
                                         ax=ax,
                                         ms=5)
        if interpolate is not None:
            ax.legend(*[ax.get_lines() + ax.right_ax.get_lines()], [
                'PWV {} max interpolation'.format(interpolate), 'PWV',
                'MLH {} max interpolation'.format(interpolate), 'MLH'
            ],
                      loc='best')
        else:
            ax.legend([ax.get_lines()[0],
                       ax.right_ax.get_lines()[0]], ['PWV', 'MLH'],
                      loc='upper center')
        ax.set_title('MLH {} site and PWV {} site'.format(
            pw_mlh_dict.get(site), site))
        ax.set_xlim(df.dropna().index.min(), df.dropna().index.max())
        ax.set_ylabel('PWV [mm]', color='b')
        ax_twin.set_ylabel('MLH [m]', color='r')
        ax.tick_params(axis='y', colors='b')
        ax_twin.tick_params(axis='y', colors='r')
        ax.grid(True, which='both', axis='x')
        fig.tight_layout()
        if interpolate is not None:
            filename = '{}-{}_{}_time_series_{}_max_gap_interpolation.png'.format(
                site, pw_mlh_dict.get(site), dt_range_str, interpolate)
        else:
            filename = '{}-{}_{}_time_series.png'.format(
                site, pw_mlh_dict.get(site), dt_range_str)
        plt.savefig(savefig_path / filename, orientation='portrait')
    if interpolate is not None:
        ds = df_inter.to_xarray()
        ds[pw_new.name].attrs.update(pw_new.attrs)
        ds[mlh_site.name].attrs.update(mlh_site.attrs)
        return ds
    else:
        ds = df.to_xarray()
        ds[pw_new.name].attrs.update(pw_new.attrs)
        ds[mlh_site.name].attrs.update(mlh_site.attrs)
        return ds
Example #7
0
def produce_seasonal_trend_breakdown_time_series_from_jpl_gipsyx_site(station='bshm',
                                                                      path=jpl_path,
                                                                      var='V', k=2,
                                                                      verbose=True,
                                                                      plot=True):
    import xarray as xr
    from aux_gps import harmonic_da_ts
    from aux_gps import loess_curve
    from aux_gps import keep_iqr
    from aux_gps import get_unique_index
    from aux_gps import xr_reindex_with_date_range
    from aux_gps import decimal_year_to_datetime
    import matplotlib.pyplot as plt
    if verbose:
        print('producing seasonal time series for {} station {}'.format(station, var))
    ds = read_time_series_jpl_gipsyx_site(station=station,
                                          path=path/'time_series', verbose=verbose)
    # dyear = ds['decimal_year']
    da_ts = ds[var]
    da_ts = xr_reindex_with_date_range(get_unique_index(da_ts), freq='D')
    xr.infer_freq(da_ts['time'])
    if k is not None:
        da_ts = keep_iqr(da_ts, k=k)
    da_ts.name = '{}_{}'.format(station, var)
    # detrend:
    trend = loess_curve(da_ts, plot=False)['mean']
    trend.name = da_ts.name + '_trend'
    trend = xr_reindex_with_date_range(trend, freq='D')
    da_ts_detrended = da_ts - trend
    if verbose:
        print('detrended by loess.')
    da_ts_detrended.name = da_ts.name + '_detrended'
    # harmonic cpy fits:
    harm = harmonic_da_ts(da_ts_detrended.dropna('time'), n=2, grp='month',
                          return_ts_fit=True, verbose=verbose)
    harm = xr_reindex_with_date_range(harm, time_dim='time', freq='D')
    harm1 = harm.sel(cpy=1).reset_coords(drop=True)
    harm1.name = da_ts.name + '_annual'
    harm1_keys = [x for x in harm1.attrs.keys() if '_1' in x]
    harm1.attrs = dict(zip(harm1_keys, [harm1.attrs[x] for x in harm1_keys]))
    harm2 = harm.sel(cpy=2).reset_coords(drop=True)
    harm2.name = da_ts.name + '_semiannual'
    harm2_keys = [x for x in harm2.attrs.keys() if '_2' in x]
    harm2.attrs = dict(zip(harm2_keys, [harm2.attrs[x] for x in harm2_keys]))
    resid = da_ts_detrended - harm1 - harm2
    resid.name = da_ts.name + '_residual'
    ds = xr.merge([da_ts, trend, harm1, harm2, resid])
    # load breakpoints:
    try:
        breakpoints = xr.open_dataset(
            jpl_path/'jpl_break_estimates.nc').sel(station=station.upper())[var]
        df = breakpoints.dropna('year')['year'].to_dataframe()
    # load seasonal coeffs:
        df['dt'] = df['year'].apply(decimal_year_to_datetime)
        df['dt'] = df['dt'].round('D')
        bp_da = df.set_index(df['dt'])['dt'].to_xarray()
        bp_da = bp_da.rename({'dt': 'time'})
        ds['{}_{}_breakpoints'.format(station, var)] = bp_da
        no_bp = False
    except KeyError:
        if verbose:
            print('no breakpoints found for {}!'.format(station))
            no_bp = True
    # seas = xr.load_dataset(
    #     jpl_path/'jpl_seasonal_estimates.nc').sel(station=station.upper())
    # ac1, as1, ac2, as2 = seas[var].values
    # # build seasonal time series:
    # annual = xr.DataArray(ac1*np.cos(dyear*2*np.pi)+as1 *
    #                       np.sin(dyear*2*np.pi), dims=['time'])
    # annual['time'] = da_ts['time']
    # annual.name = '{}_{}_annual'.format(station, var)
    # annual.attrs['units'] = 'mm'
    # annual.attrs['long_name'] = 'annual mode'
    # semiannual = xr.DataArray(ac2*np.cos(dyear*4*np.pi)+as2 *
    #                           np.sin(dyear*4*np.pi), dims=['time'])
    # semiannual['time'] = da_ts['time']
    # semiannual.name = '{}_{}_semiannual'.format(station, var)
    # semiannual.attrs['units'] = 'mm'
    # semiannual.attrs['long_name'] = 'semiannual mode'
    # ds = xr.merge([annual, semiannual, da_ts])
    if plot:
        # plt.figure(figsize=(20, 20))
        dst = ds[[x for x in ds if 'breakpoints' not in x]]
        axes = dst.to_dataframe().plot(subplots=True, figsize=(20, 20), color='k')
        [ax.grid() for ax in axes]
        [ax.set_ylabel('[mm]') for ax in axes]
        if not no_bp:
            for bp in df['dt']:
                [ax.axvline(bp, color='red') for ax in axes]
        plt.tight_layout()
        fig, ax = plt.subplots(figsize=(7, 7))
        harm_mm = harmonic_da_ts(da_ts_detrended.dropna('time'), n=2, grp='month',
                                 return_ts_fit=False, verbose=verbose)
        harm_mm['{}_{}_detrended'.format(station, var)].plot.line(ax=ax, linewidth=0, marker='o', color='k')
        harm_mm['{}_mean'.format(station)].sel(cpy=1).plot.line(ax=ax, marker=None, color='tab:red')
        harm_mm['{}_mean'.format(station)].sel(cpy=2).plot.line(ax=ax, marker=None, color='tab:blue')
        harm_mm['{}_mean'.format(station)].sum('cpy').plot.line(ax=ax, marker=None, color='tab:purple')
        ax.grid()
    return ds