Python keep_iqr Examples, aux_gps.keep_iqr Python Examples

Example #1

0

Show file

def process_mpoint_da_with_station_num(path=sound_path,
                                       station='08001',
                                       k_iqr=1):
    from aux_gps import path_glob
    import xarray as xr
    from aux_gps import keep_iqr
    file = path_glob(sound_path, 'ALL*{}*.nc'.format(station))
    da = xr.open_dataarray(file[0])
    ts, tm, tpw = calculate_ts_tm_tpw_from_mpoint_da(da)
    ds = xr.Dataset()
    ds['Tm'] = xr.DataArray(tm, dims=['time'], name='Tm')
    ds['Tm'].attrs['unit'] = 'K'
    ds['Tm'].attrs['name'] = 'Water vapor mean atmospheric temperature'
    ds['Ts'] = xr.DataArray(ts, dims=['time'], name='Ts')
    ds['Ts'].attrs['unit'] = 'K'
    ds['Ts'].attrs['name'] = 'Surface temperature'
    ds['Tpw'] = xr.DataArray(tpw, dims=['time'], name='Tpw')
    ds['Tpw'].attrs['unit'] = 'mm'
    ds['Tpw'].attrs['name'] = 'precipitable_water'
    ds['time'] = da.time
    ds = keep_iqr(ds, k=k_iqr, dim='time')
    yr_min = ds.time.min().dt.year.item()
    yr_max = ds.time.max().dt.year.item()
    ds = ds.rename({'time': 'sound_time'})
    filename = 'station_{}_soundings_ts_tm_tpw_{}-{}.nc'.format(
        station, yr_min, yr_max)
    print('saving {} to {}'.format(filename, path))
    comp = dict(zlib=True, complevel=9)  # best compression
    encoding = {var: comp for var in ds}
    ds.to_netcdf(path / filename, 'w', encoding=encoding)
    print('Done!')
    return ds

Example #2

0

Show file

def get_pressure_lapse_rate(path=ims_path, model='LR', plot=False):
    from aux_gps import linear_fit_using_scipy_da_ts
    import matplotlib.pyplot as plt
    import xarray as xr
    from aux_gps import keep_iqr
    bp = xr.load_dataset(ims_path / 'IMS_BP_israeli_10mins.nc')
    bps = [keep_iqr(bp[x]) for x in bp]
    bp = xr.merge(bps)
    mean_p = bp.mean('time').to_array('alt')
    mean_p.name = 'mean_pressure'
    alts = [bp[x].attrs['station_alt'] for x in bp.data_vars]
    mean_p['alt'] = alts
    _, results = linear_fit_using_scipy_da_ts(mean_p,
                                              model=model,
                                              slope_factor=1,
                                              not_time=True)
    slope = results['slope']
    inter = results['intercept']
    modeled_var = slope * mean_p['alt'] + inter
    if plot:
        fig, ax = plt.subplots()
        modeled_var.plot(ax=ax, color='r')
        mean_p.plot.line(linewidth=0., marker='o', ax=ax, color='b')
        # lr = 1000 * abs(slope)
        textstr = 'Pressure lapse rate: {:.1f} hPa/km'.format(1000 * slope)
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        # place a text box in upper left in axes coords
        ax.text(0.5,
                0.95,
                textstr,
                transform=ax.transAxes,
                fontsize=12,
                verticalalignment='top',
                bbox=props)
        ax.set_xlabel('Height a.s.l [m]')
        ax.set_ylabel('Mean Pressure [hPa]')
    return results

Example #3

0

Show file

File: aeronet_analysis.py Project: ZiskinZiv/PW_from_GPS

def prepare_station_to_pw_comparison(path=aero_path,
                                     gis_path=gis_path,
                                     station='boker',
                                     mm_anoms=False):
    from aux_gps import keep_iqr
    from aux_gps import anomalize_xr
    ds_dict = load_all_station(path=aero_path, gis_path=gis_path)
    try:
        da = ds_dict[station]['WV(cm)_935nm-AOD']
    except KeyError as e:
        print('station {} has no {} field'.format(station, e))
        return
    da = keep_iqr(da)
    # convert to mm:
    da = da * 10
    da.name = station
    if mm_anoms:
        da_mm = da.resample(time='MS').mean()
        da_mm_anoms = anomalize_xr(da_mm, freq='MS')
        da = da_mm_anoms
    da.attrs['data_source'] = 'AERONET'
    da.attrs['data_field'] = 'WV(cm)_935nm-AOD'
    da.attrs['units'] = 'mm'
    return da

Example #4

0

Show file

File: gipsyx_post_proc.py Project: ZiskinZiv/PW_from_GPS

def read_gipsyx_all_yearly_files(load_path,
                                 savepath=None,
                                 iqr_k=3.0,
                                 plot=False):
    """read, stitch and clean all yearly post proccessed ppp gipsyx solutions
    and concat them to a multiple fields time-series dataset"""
    from aux_gps import path_glob
    import xarray as xr
    from aux_gps import get_unique_index
    from aux_gps import dim_intersection
    import pandas as pd
    from aux_gps import filter_nan_errors
    from aux_gps import keep_iqr
    from aux_gps import xr_reindex_with_date_range
    from aux_gps import transform_ds_to_lat_lon_alt
    import logging

    def stitch_yearly_files(ds_list):
        """input is multiple field yearly dataset list and output is the same
        but with stitched discontinuieties"""
        fields = [x for x in ds_list[0].data_vars]
        for i, dss in enumerate(ds_list):
            if i == len(ds_list) - 1:
                break
            first_year = int(ds_list[i].time.dt.year.median().item())
            second_year = int(ds_list[i + 1].time.dt.year.median().item())
            first_ds = ds_list[i].sel(time=slice(
                '{}-12-31T18:00'.format(first_year), str(second_year)))
            second_ds = ds_list[i + 1].sel(time=slice(
                str(first_year), '{}-01-01T06:00'.format(second_year)))
            if dim_intersection([first_ds, second_ds], 'time') is None:
                logger.warning('skipping stitching years {} and {}...'.format(
                    first_year, second_year))
                continue
            else:
                logger.info('stitching years {} and {}'.format(
                    first_year, second_year))
            time = xr.concat([first_ds.time, second_ds.time], 'time')
            time = pd.to_datetime(get_unique_index(time).values)
            st_list = []
            for field in fields:
                df = first_ds[field].to_dataframe()
                df.columns = ['first']
                df = df.reindex(time)
                df['second'] = second_ds[field].to_dataframe()
                if field in ['X', 'Y', 'Z']:
                    method = 'simple_mean'
                elif field in ['GradNorth', 'GradEast', 'WetZ']:
                    method = 'smooth_mean'
                elif 'error' in field:
                    method = 'error_mean'
                dfs = stitch_two_cols(df, method=method)['stitched_signal']
                dfs.index.name = 'time'
                st = dfs.to_xarray()
                st.name = field
                st_list.append(st)
            # merge to all fields:
            st_ds = xr.merge(st_list)
            # replace stitched values to first ds and second ds:
            first_time = dim_intersection([ds_list[i], st_ds])
            vals_rpl = st_ds.sel(time=first_time)
            for field in ds_list[i].data_vars:
                ds_list[i][field].loc[{'time': first_time}] = vals_rpl[field]
            second_time = dim_intersection([ds_list[i + 1], st_ds])
            vals_rpl = st_ds.sel(time=second_time)
            for field in ds_list[i + 1].data_vars:
                ds_list[i + 1][field].loc[{
                    'time': second_time
                }] = vals_rpl[field]
        return ds_list

    logger = logging.getLogger('gipsyx_post_proccesser')
    files = sorted(path_glob(load_path, '*.nc'))
    ds_list = []
    for file in files:
        filename = file.as_posix().split('/')[-1]
        station = file.as_posix().split('/')[-1].split('_')[0]
        if 'ppp_post' not in filename:
            continue
        logger.info('reading {}'.format(filename))
        dss = xr.open_dataset(file)
        ds_list.append(dss)
    # now loop over ds_list and stitch yearly discontinuities:
    ds_list = stitch_yearly_files(ds_list)
    logger.info('merging all years...')
    ds = xr.merge(ds_list)
    logger.info('fixing meta-data...')
    for da in ds.data_vars:
        old_keys = [x for x in ds[da].attrs.keys()]
        vals = [x for x in ds[da].attrs.values()]
        new_keys = [x.split('>')[-1] for x in old_keys]
        ds[da].attrs = dict(zip(new_keys, vals))
        if 'desc' in ds[da].attrs.keys():
            ds[da].attrs['full_name'] = ds[da].attrs.pop('desc')
    logger.info('dropping duplicates time stamps...')
    ds = get_unique_index(ds)
    # clean with IQR all fields:
    logger.info('removing outliers with IQR of {}...'.format(iqr_k))
    ds = keep_iqr(ds, dim='time', qlow=0.25, qhigh=0.75, k=iqr_k)
    # filter the fields based on their errors not being NaNs:
    logger.info('filtering out fields if their errors are NaN...')
    ds = filter_nan_errors(ds, error_str='_error', dim='time')
    logger.info('transforming X, Y, Z coords to lat, lon and alt...')
    ds = transform_ds_to_lat_lon_alt(ds, ['X', 'Y', 'Z'], '_error', 'time')
    logger.info(
        'reindexing fields with 5 mins frequency(i.e., inserting NaNs)')
    ds = xr_reindex_with_date_range(ds, 'time', '5min')
    ds.attrs['station'] = station
    if plot:
        plot_gipsy_field(ds, None)
    if savepath is not None:
        comp = dict(zlib=True, complevel=9)  # best compression
        encoding = {var: comp for var in ds.data_vars}
        ymin = ds.time.min().dt.year.item()
        ymax = ds.time.max().dt.year.item()
        new_filename = '{}_PPP_{}-{}.nc'.format(station, ymin, ymax)
        ds.to_netcdf(savepath / new_filename, 'w', encoding=encoding)
        logger.info('{} was saved to {}'.format(new_filename, savepath))
    logger.info('Done!')
    return ds

Example #5

0

Show file

File: nasa_jpl_gipsyx.py Project: ZiskinZiv/PW_from_GPS

def produce_seasonal_trend_breakdown_time_series_from_jpl_gipsyx_site(station='bshm',
                                                                      path=jpl_path,
                                                                      var='V', k=2,
                                                                      verbose=True,
                                                                      plot=True):
    import xarray as xr
    from aux_gps import harmonic_da_ts
    from aux_gps import loess_curve
    from aux_gps import keep_iqr
    from aux_gps import get_unique_index
    from aux_gps import xr_reindex_with_date_range
    from aux_gps import decimal_year_to_datetime
    import matplotlib.pyplot as plt
    if verbose:
        print('producing seasonal time series for {} station {}'.format(station, var))
    ds = read_time_series_jpl_gipsyx_site(station=station,
                                          path=path/'time_series', verbose=verbose)
    # dyear = ds['decimal_year']
    da_ts = ds[var]
    da_ts = xr_reindex_with_date_range(get_unique_index(da_ts), freq='D')
    xr.infer_freq(da_ts['time'])
    if k is not None:
        da_ts = keep_iqr(da_ts, k=k)
    da_ts.name = '{}_{}'.format(station, var)
    # detrend:
    trend = loess_curve(da_ts, plot=False)['mean']
    trend.name = da_ts.name + '_trend'
    trend = xr_reindex_with_date_range(trend, freq='D')
    da_ts_detrended = da_ts - trend
    if verbose:
        print('detrended by loess.')
    da_ts_detrended.name = da_ts.name + '_detrended'
    # harmonic cpy fits:
    harm = harmonic_da_ts(da_ts_detrended.dropna('time'), n=2, grp='month',
                          return_ts_fit=True, verbose=verbose)
    harm = xr_reindex_with_date_range(harm, time_dim='time', freq='D')
    harm1 = harm.sel(cpy=1).reset_coords(drop=True)
    harm1.name = da_ts.name + '_annual'
    harm1_keys = [x for x in harm1.attrs.keys() if '_1' in x]
    harm1.attrs = dict(zip(harm1_keys, [harm1.attrs[x] for x in harm1_keys]))
    harm2 = harm.sel(cpy=2).reset_coords(drop=True)
    harm2.name = da_ts.name + '_semiannual'
    harm2_keys = [x for x in harm2.attrs.keys() if '_2' in x]
    harm2.attrs = dict(zip(harm2_keys, [harm2.attrs[x] for x in harm2_keys]))
    resid = da_ts_detrended - harm1 - harm2
    resid.name = da_ts.name + '_residual'
    ds = xr.merge([da_ts, trend, harm1, harm2, resid])
    # load breakpoints:
    try:
        breakpoints = xr.open_dataset(
            jpl_path/'jpl_break_estimates.nc').sel(station=station.upper())[var]
        df = breakpoints.dropna('year')['year'].to_dataframe()
    # load seasonal coeffs:
        df['dt'] = df['year'].apply(decimal_year_to_datetime)
        df['dt'] = df['dt'].round('D')
        bp_da = df.set_index(df['dt'])['dt'].to_xarray()
        bp_da = bp_da.rename({'dt': 'time'})
        ds['{}_{}_breakpoints'.format(station, var)] = bp_da
        no_bp = False
    except KeyError:
        if verbose:
            print('no breakpoints found for {}!'.format(station))
            no_bp = True
    # seas = xr.load_dataset(
    #     jpl_path/'jpl_seasonal_estimates.nc').sel(station=station.upper())
    # ac1, as1, ac2, as2 = seas[var].values
    # # build seasonal time series:
    # annual = xr.DataArray(ac1*np.cos(dyear*2*np.pi)+as1 *
    #                       np.sin(dyear*2*np.pi), dims=['time'])
    # annual['time'] = da_ts['time']
    # annual.name = '{}_{}_annual'.format(station, var)
    # annual.attrs['units'] = 'mm'
    # annual.attrs['long_name'] = 'annual mode'
    # semiannual = xr.DataArray(ac2*np.cos(dyear*4*np.pi)+as2 *
    #                           np.sin(dyear*4*np.pi), dims=['time'])
    # semiannual['time'] = da_ts['time']
    # semiannual.name = '{}_{}_semiannual'.format(station, var)
    # semiannual.attrs['units'] = 'mm'
    # semiannual.attrs['long_name'] = 'semiannual mode'
    # ds = xr.merge([annual, semiannual, da_ts])
    if plot:
        # plt.figure(figsize=(20, 20))
        dst = ds[[x for x in ds if 'breakpoints' not in x]]
        axes = dst.to_dataframe().plot(subplots=True, figsize=(20, 20), color='k')
        [ax.grid() for ax in axes]
        [ax.set_ylabel('[mm]') for ax in axes]
        if not no_bp:
            for bp in df['dt']:
                [ax.axvline(bp, color='red') for ax in axes]
        plt.tight_layout()
        fig, ax = plt.subplots(figsize=(7, 7))
        harm_mm = harmonic_da_ts(da_ts_detrended.dropna('time'), n=2, grp='month',
                                 return_ts_fit=False, verbose=verbose)
        harm_mm['{}_{}_detrended'.format(station, var)].plot.line(ax=ax, linewidth=0, marker='o', color='k')
        harm_mm['{}_mean'.format(station)].sel(cpy=1).plot.line(ax=ax, marker=None, color='tab:red')
        harm_mm['{}_mean'.format(station)].sel(cpy=2).plot.line(ax=ax, marker=None, color='tab:blue')
        harm_mm['{}_mean'.format(station)].sum('cpy').plot.line(ax=ax, marker=None, color='tab:purple')
        ax.grid()
    return ds