def teqc_concat_rinex(path_dir, rfn=None, glob='*.14o', cmd_path=None, delete_after_concat=False): import subprocess from subprocess import CalledProcessError from aux_gps import path_glob from aux_gps import replace_char_at_string_position if not path_dir.is_dir(): raise ValueError('{} is not a directory!'.format(path_dir)) orig_files = path_glob(path_dir, glob) # subprocess.call("ls", cwd=path_dir) if rfn is None: files = sorted(path_glob(path_dir, glob)) rfn = files[0].as_posix().split('/')[-1] rfn = replace_char_at_string_position(rfn, char='0', pos=7) print('rfn is : {}'.format(rfn)) # -R -S -C -E keep only GPS data and not GLONASS, BAIDU, GALILEO if cmd_path is not None: cmd = '{}/teqc -phc -R -S -C -E {} > {}'.format(cmd_path.as_posix(), glob, rfn) else: cmd = 'teqc -phc -R -S -C -E {} > {}'.format(glob, rfn) try: subprocess.run(cmd, shell=True, check=True, cwd=path_dir) except CalledProcessError: print('{} failed !'.format(cmd)) return if delete_after_concat: print('deleting files after teqc concat.') [x.unlink() for x in orig_files] return
def daily_prep_all_steps(path, staDb, new_filename=False, delete_last_rinex=False, gde_tree=None): from aux_gps import path_glob from aux_gps import replace_char_at_string_position try: files = sorted(path_glob(path, '*.gz')) except FileNotFoundError: files = sorted(path_glob(path, '*.Z')) rfn = files[0].as_posix().split('/')[-1] rfn_dfile = replace_char_at_string_position(rfn, pos=7, char='0')[0:12] last_rfn = files[-1].as_posix().split('/')[-1] rfn_dr_file = rfn[0:8] + '-' + last_rfn[4:8] if not new_filename: rfn_dr_file = rfn_dfile # 1) rinex concat and prep: daily_prep_and_concat_rinex(path) # 2) dataRecordDump: dataRecordDump_single_file(path / 'dr', rfn_dfile + '.gz') # 3) rinex edit: rnxEditGde_single_file(path / 'dr', rfn_dfile + '.dr.gz', staDb, new_filename=rfn_dr_file, delete_dr_after=True, gde_tree=gde_tree) if delete_last_rinex: #) finally, delete first rinex file (earliest): files[0].unlink() logger.info('{} has been deleted.'.format(files[0])) return
def move_bet_dagan_physical_to_main_path(bet_dagan_path): """rename bet_dagan physical radiosonde filenames and move them to main path i.e., bet_dagan_path! warning-DO ONCE """ from aux_gps import path_glob import shutil year_dirs = sorted( [x for x in path_glob(bet_dagan_path, '*/') if x.is_dir()]) for year_dir in year_dirs: month_dirs = sorted( [x for x in path_glob(year_dir, '*/') if x.is_dir()]) for month_dir in month_dirs: day_dirs = sorted( [x for x in path_glob(month_dir, '*/') if x.is_dir()]) for day_dir in day_dirs: hour_dirs = sorted( [x for x in path_glob(day_dir, '*/') if x.is_dir()]) for hour_dir in hour_dirs: file = [ x for x in path_glob(hour_dir, '*/') if x.is_file() ] splitted = file[0].as_posix().split('/') name = splitted[-1] hour = splitted[-2] day = splitted[-3] month = splitted[-4] year = splitted[-5] filename = '{}_{}{}{}{}'.format(name, year, month, day, hour) orig = file[0] dest = bet_dagan_path / filename shutil.move(orig.resolve(), dest.resolve()) print('moving {} to {}'.format(filename, bet_dagan_path)) return year_dirs
def daily_prep_drdump_and_rnxedit(path, staDb, gde_tree=None): from aux_gps import path_glob from aux_gps import replace_char_at_string_position import shutil import os try: files = sorted(path_glob(path, '*.gz')) suff = '.gz' except FileNotFoundError: files = sorted(path_glob(path, '*.Z')) suff = '.Z' rfn = files[0].as_posix().split('/')[-1] rfn_dfile = replace_char_at_string_position(rfn, pos=7, char='0')[0:12] dr_path = path / 'dr' if not dr_path.is_dir(): os.mkdir(dr_path) # 0) move daily files to dr_path: file = dr_path / (rfn_dfile + suff) shutil.copy(files[0], file) # 1) dataRecordDump: dataRecordDump_single_file(dr_path, rfn_dfile + suff) # 3) rinex edit: rnxEditGde_single_file(dr_path, rfn_dfile + '.dr.gz', staDb, gde_tree=gde_tree) return
def calculate_Tm_from_era5(path=era5_path, Tfile='era5_T_israel*.nc', RHfile='era5_RH_israel*.nc', savepath=None): import xarray as xr from aux_gps import path_glob tfile = path_glob(path, Tfile) rhfile = path_glob(path, RHfile) T = xr.open_dataarray(tfile) RH = xr.open_dataarray(rhfile) Dewpt = dewpoint_rh(T, RH) WVpress = VaporPressure(Dewpt, units='hPa', method='Buck') nom = WVpress / T nom = nom.integrate('level') denom = WVpress / T**2.0 denom = denom.integrate('level') Tm = nom / denom Tm.name = 'Tm' Tm.attrs['units'] = 'K' if savepath is not None: yr_min = Tm.time.min().dt.year.item() yr_max = Tm.time.max().dt.year.item() filename = 'era5_Tm_israel_{}-{}.nc'.format(yr_min, yr_max) print('saving {} to {}'.format(filename, savepath)) comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in Tm.to_dataset(name='Tm')} Tm.to_netcdf(savepath / filename, 'w', encoding=encoding) print('Done!') return Tm
def download_all_10mins_ims(savepath, channel_name='TD'): """download all 10mins stations per specified channel, updateing fields is automatic""" from aux_gps import path_glob import xarray as xr import logging logger = logging.getLogger('ims_downloader') glob = '*_{}_10mins.nc'.format(channel_name) files = sorted(path_glob(savepath, glob, return_empty_list=True)) files = [x for x in files if x.is_file()] if files: time_dim = list(set(xr.open_dataarray(files[0]).dims))[0] last_dates = [check_ds_last_datetime(xr.open_dataarray(x)) for x in files] st_id_downloaded = [ int(x.as_posix().split('/')[-1].split('_')[1]) for x in files ] d = dict(zip(st_id_downloaded, last_dates)) stations = ims_api_get_meta(active_only=True, channel_name=channel_name) for index, row in stations.iterrows(): st_id = row['stationId'] if st_id not in d.keys(): download_ims_single_station(savepath=savepath, channel_name=channel_name, stationid=st_id, update=None) elif st_id in d.keys(): logger.info('updating station {}...'.format(st_id)) da = download_ims_single_station(savepath=savepath, channel_name=channel_name, stationid=st_id, update=d[st_id]) if da is not None: file = path_glob( savepath, '*_{}_{}_10mins.nc'.format(st_id, channel_name))[0] da_old = xr.load_dataarray(file) da = xr.concat([da, da_old], time_dim) filename = '_'.join([ '-'.join(row['name'].split(' ')), str(st_id), channel_name, '10mins' ]) + '.nc' comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in da.to_dataset().data_vars} logger.info('saving to {} to {}'.format(filename, savepath)) try: da.to_netcdf(savepath / filename, 'w', encoding=encoding) except PermissionError: (savepath / filename).unlink() da.to_netcdf(savepath / filename, 'w', encoding=encoding) # print('done!') else: logger.warning('station {} is already in {}, skipping...'.format( st_id, savepath)) return
def save_resampled_versions_gispyx_results(load_path, sample, sample_rate='1H'): from aux_gps import path_glob import xarray as xr import logging """resample gipsyx results nc files and save them.options for sample_rate are in sample dict""" logger = logging.getLogger('gipsyx_post_proccesser') path = path_glob(load_path, '*.nc')[0] station = path.as_posix().split('/')[-1].split('_')[0] # path = GNSS / station / 'gipsyx_solutions' glob = '{}_PPP*.nc'.format(station.upper()) try: file = path_glob(load_path, glob_str=glob)[0] except FileNotFoundError: logger.warning( 'did not find {} in gipsyx_solutions dir, skipping...'.format( station)) return filename = file.as_posix().split('/')[-1].split('.')[0] years_str = filename.split('_')[-1] ds = xr.open_dataset(file) time_dim = list(set(ds.dims))[0] logger.info('resampaling {} to {}'.format(station, sample[sample_rate])) years = [str(x) for x in sorted(list(set(ds[time_dim].dt.year.values)))] if sample_rate == '1H' or sample_rate == '3H': dsr_list = [] for year in years: logger.info('resampling {} of year {}'.format(sample_rate, year)) dsr = ds.sel({ time_dim: year }).resample({ time_dim: sample_rate }, keep_attrs=True, skipna=True).mean(keep_attrs=True) dsr_list.append(dsr) dsr = xr.concat(dsr_list, time_dim) else: dsr = ds.resample({ time_dim: sample_rate }, keep_attrs=True, skipna=True).mean(keep_attrs=True) new_filename = '_'.join( [station.upper(), sample[sample_rate], 'PPP', years_str]) new_filename = new_filename + '.nc' logger.info('saving resmapled station {} to {}'.format(station, load_path)) comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in dsr.data_vars} dsr.to_netcdf(load_path / new_filename, 'w', encoding=encoding) logger.info('Done resampling!') return
def daily_prep_and_concat_rinex(path): import logging import os from axis_process import run_rinex_compression_on_file from axis_process import run_rinex_compression_on_folder from axis_process import teqc_concat_rinex from aux_gps import path_glob from aux_gps import replace_char_at_string_position from axis_process import move_files logger = logging.getLogger('axis-gipsyx') # get rfn,i.e., DSea2150.14o: try: files = sorted(path_glob(path, '*.gz')) unzip_glob = '*.gz' except FileNotFoundError: files = sorted(path_glob(path, '*.Z')) unzip_glob = '*.Z' rfn = files[0].as_posix().split('/')[-1] rfn_dfile = replace_char_at_string_position(rfn, pos=7, char='0')[0:12] rfn_ofile = replace_char_at_string_position(rfn_dfile, pos=-1, char='o') # create dr path if not exists: dr_path = path / 'dr' if not dr_path.is_dir(): os.mkdir(dr_path) # 1) unzip all files: logger.info('unzipping {}'.format(path)) run_rinex_compression_on_folder(path, command='gunzip', glob=unzip_glob) # 2) convert to obs files: logger.info('converting d to obs.') run_rinex_compression_on_folder(path, command='crx2rnx', glob='*.*d') # 3) concat using teqc: logger.info('teqc concating.') teqc_concat_rinex(path, rfn=rfn_ofile, glob='*.*o') # 4) convert to d file: logger.info('compressing concated file and moving to dr path.') run_rinex_compression_on_file(path, filename=rfn_ofile, command='rnx2crx') # 5) gzip d file: # rfn = replace_char_at_string_position(rfn, char='d', pos=-1) run_rinex_compression_on_file(path, rfn_dfile, command='gzip') # 6) move copressed file to dr_path and delete all other files except original rinex gz: move_files(path, dr_path, rfn_dfile + '.gz', rfn_dfile + '.gz') files = path_glob(path, '*.*o') [x.unlink() for x in files] # 7) gzip all d files: logger.info('gzipping {}'.format(path)) run_rinex_compression_on_folder(path, command='gzip', glob='*.*d') # 8) dataRecordDump: logger.info('Done preping daily {} path.'.format(path)) return
def plot_figure_2(path=tela_results_path, plot='WetZ', save=True): from aux_gps import path_glob import matplotlib.pyplot as plt from gipsyx_post_proc import process_one_day_gipsyx_output filepath = path_glob(path, 'tela*_smoothFinal.tdp')[3] if plot is None: df, meta = process_one_day_gipsyx_output(filepath, True) return df, meta else: df, meta = process_one_day_gipsyx_output(filepath, False) if not isinstance(plot, str): raise ValueError('pls pick only one field to plot., e.g., WetZ') error_plot = '{}_error'.format(plot) fig, ax = plt.subplots(1, 1, figsize=(8, 6)) desc = meta['desc'][plot] unit = meta['units'][plot] df[plot].plot(ax=ax, legend=False, color='k') ax.fill_between(df.index, df[plot] - df[error_plot], df[plot] + df[error_plot], alpha=0.5) ax.grid() # ax.set_title('{} from station TELA in {}'.format( # desc, df.index[100].strftime('%Y-%m-%d'))) ax.set_ylabel('WetZ [{}]'.format(unit)) ax.set_xlabel('') ax.grid('on') fig.tight_layout() filename = 'wetz_tela_daily.png' caption( '{} from station TELA in {}. Note the error estimation from the GipsyX software(filled)' .format(desc, df.index[100].strftime('%Y-%m-%d'))) if save: plt.savefig(savefig_path / filename, bbox_inches='tight') return ax
def save_yearly(movepath, savepath, years, name=None): from dask.diagnostics import ProgressBar ps = path_glob(movepath, '*.nc') for year in years: print('saving year {}...'.format(year)) # for sea level : ps_year = [ x for x in ps if str(year) in x.as_posix().split('/')[-1].split('_')[-2][0:4] ] # for ssts: # ps_year = [x for x in ps if str(year) in x.as_posix().split('/')[-1][0:4]] # ds = xr.open_mfdataset(ps_year) print(len(ps_year)) ds_list = [xr.open_dataset(x) for x in ps_year] ds = xr.concat(ds_list, 'time') ds = ds.sortby('time') # years, datasets = zip(*ds.groupby("time.year")) if name is None: filename = '{}-'.format(year) + '-'.join( ps[0].as_posix().split('/')[-1].split('-')[1:]) else: filename = '{}-'.format(year) + '-' + name + '.nc' filepath = savepath / filename delayed = ds.to_netcdf(filepath, compute=False) with ProgressBar(): results = delayed.compute() print('Done!') return
def read_all_jpl_station_harmonic_analysis(path=jpl_path, harm_path=jpl_path/'harmonic_analysis'): from aux_gps import path_glob import xarray as xr import pandas as pd files = sorted(path_glob(harm_path, '*_V_harmonic_mm.nc')) dsl = [xr.open_dataset(x) for x in files] stations = [x.as_posix().split('/')[-1].split('_')[0] for x in files] annual_params = [] semiannual_params = [] annual_peak_doy = [] semiannual_peak_doy = [] for i, ds in enumerate(dsl): # print('processing {} station'.format(stations[i])) a_name = '{}_V_annual'.format(stations[i]) sa_name = '{}_V_semiannual'.format(stations[i]) annual_params.append([x[0] for x in ds[a_name].attrs.values()]) semiannual_params.append([x[0] for x in ds[sa_name].attrs.values()]) annual_peak_doy.append(ds[a_name].idxmax().dt.dayofyear) semiannual_peak_doy.append(ds[sa_name].idxmax().dt.dayofyear) continue df = pd.DataFrame(annual_params, index=stations) df.columns = ['A_Amp', 'A_offset', 'A_freq', 'A_x0'] df['SA_Amp'] = [x[0] for x in semiannual_params] df['SA_offset'] = [x[1] for x in semiannual_params] df['SA_freq'] = [x[2] for x in semiannual_params] df['SA_x0'] = [x[3] for x in semiannual_params] df['A_peak_doy'] = [x.item() for x in annual_peak_doy] df['SA_peak_doy'] = [x.item() for x in semiannual_peak_doy] return df
def save_subset(savepath, subset='med1'): from dask.diagnostics import ProgressBar ps = path_glob(savepath, '*.nc') print(len(ps)) ds_list = [ xr.open_dataset(x, chunks={'time': 10})[['analysed_sst', 'analysis_error']] for x in ps ] ds = xr.concat(ds_list, 'time') ds = ds.sortby('time') if subset == 'med1': print('subsetting to med1') # ssts: lat_slice = [30, 50] lon_slice = [-20, 45] # sla: lat_slice = [31, 32] lon_slice = [34, 35] ds = ds.sel(lat=slice(*lat_slice), lon=slice(*lon_slice)) yrmin = ds['time'].dt.year.min().item() yrmax = ds['time'].dt.year.max().item() filename = '{}-{}_{}-'.format(subset, yrmin, yrmax) + \ '-'.join(ps[0].as_posix().split('/')[-1].split('-')[1:]) delayed = ds.to_netcdf(savepath / filename, compute=False) with ProgressBar(): results = delayed.compute() return ds
def get_unique_rfns_from_folder(path, glob_str='*.gz', rfn_cut=7): from aux_gps import path_glob import numpy as np files = path_glob(path, glob_str) fns = [x.as_posix().split('/')[-1].split('.')[0][0:rfn_cut] for x in files] ufns = np.unique(fns) return ufns
def get_wrf_pw_at_dsea_gnss_coord(path=des_path, work_path=work_yuval, point=None): from PW_stations import produce_geo_gnss_solved_stations import xarray as xr from aux_gps import get_nearest_lat_lon_for_xy from aux_gps import path_glob from aux_gps import get_unique_index df = produce_geo_gnss_solved_stations(path=work_path / 'gis', plot=False) dsea_point = df.loc['dsea'][['lat', 'lon']].astype(float).values files = path_glob(path, 'pw_wrfout*.nc') pw_list = [] for file in files: pw_all = xr.load_dataset(file) freq = xr.infer_freq(pw_all['Time']) print(freq) if point is not None: print('looking for {} at wrf.'.format(point)) dsea_point = point loc = get_nearest_lat_lon_for_xy(pw_all['XLAT'], pw_all['XLONG'], dsea_point) print(loc) pw = pw_all.isel(south_north=loc[0][0], west_east=loc[0][1]) pw_list.append(pw) pw_ts = xr.concat(pw_list, 'Time') pw_ts = get_unique_index(pw_ts, dim='Time') return pw_ts
def calculate_PW_from_era5(path=era5_path, glob_str='era5_Q_israel*.nc', water_density=1000.0, savepath=None): import xarray as xr from aux_gps import path_glob from aux_gps import calculate_g import numpy as np file = path_glob(path, glob_str)[0] Q = xr.open_dataset(file)['q'] g = calculate_g(Q.lat) g.name = 'g' g = g.mean('lat') plevel_in_pa = Q.level * 100.0 # P_{i+1} - P_i: plevel_diff = np.abs(plevel_in_pa.diff('level')) # Q_i + Q_{i+1}: Q_sum = Q.shift(level=-1) + Q pw_in_mm = ((Q_sum * plevel_diff) / (2.0 * water_density * g)).sum('level') * 1000.0 pw_in_mm.name = 'pw' pw_in_mm.attrs['units'] = 'mm' if savepath is not None: yr_min = pw_in_mm.time.min().dt.year.item() yr_max = pw_in_mm.time.max().dt.year.item() filename = 'era5_PW_israel_{}-{}.nc'.format(yr_min, yr_max) print('saving {} to {}'.format(filename, savepath)) comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in pw_in_mm.to_dataset(name='pw')} pw_in_mm.to_netcdf(savepath / filename, 'w', encoding=encoding) print('Done!') return pw_in_mm
def read_BD_ceilometer_yoav_all_years(path=ceil_path, savepath=None): from aux_gps import path_glob from aux_gps import save_ncfile import pandas as pd files = path_glob(path, 'ceilometer_BD*.csv') dfs = [] for file in files: dfs.append(read_BD_ceilometer_yoav_one_year_csv(file)) df = pd.concat(dfs) df = df.sort_index() names = [x.split('[')[0] for x in df.columns] units = [x.split('[')[1].split(']')[0] for x in df.columns] long_names = [ 'total cloud cover', 'cloud cover of the most cloudy layer', 'cloud cover of the 1st cloud layer', '1st cloud base height', 'cloud cover of the 2nd cloud layer', '2nd cloud base height', 'cloud cover of the 3rd cloud layer', '3rd cloud base height', 'cloud cover of the 4th cloud layer', '4th cloud base height', 'cloud cover of the 5th cloud layer', '5th cloud base height', 'Mixing layer height' ] df.columns = names # fix cloud height to meters again for until 22-09-2013: hs = [x for x in df.columns if '_H' in x] df.loc[:'2013-09-22', hs] *= (1 / 0.3048) ds = df.to_xarray() for i, da in enumerate(ds): ds[da].attrs['units'] = units[i] ds[da].attrs['long_name'] = long_names[i] if savepath is not None: filename = 'BD_clouds_and_MLH_from_ceilometers.nc' save_ncfile(ds, savepath, filename) return ds
def count_rinex_files_all_years(main_folder, suffix='*.gz', savepath=None, reindex_with_hourly_freq=True): from aux_gps import path_glob import pandas as pd years = path_glob(main_folder, '*/') years = [x for x in years if x.as_posix().split('/')[-1].isdigit()] dfs = [] for year in years: df = count_rinex_files_on_year_folder(year, suffix) dfs.append(df) df = pd.concat(dfs, axis=0) df = df.sort_index() if reindex_with_hourly_freq: full_time = pd.date_range(df.index[0], df.index[-1], freq='1H') df = df.reindex(full_time) # now cutoff with 3 weeks before current time: now = pd.Timestamp.utcnow().floor('H') end_dt = now - pd.Timedelta(21, unit='D') end_dt = end_dt.tz_localize(None) df = df.loc[:end_dt] df.index.name = 'time' if savepath is not None: filename = 'Axis_RINEX_count_datetimes_historic.csv' df.to_csv(savepath/filename, na_rep='None', index=True) print('{} was saved to {}.'.format(filename, savepath)) return df
def check_for_missing_rinex_in_axis_path(args): import calendar import datetime from aux_gps import path_glob from pathlib import Path now = datetime.datetime.now() month_abr = calendar.month_abbr[now.month] month_path = 'Month.{}'.format(month_abr) T02_path = Path('/home/axis-gps/{}'.format(month_path)) # in month transition: if not T02_path.is_dir(): month_abr = calendar.month_abbr[now.month - 1] month_path = 'Month.{}'.format(month_abr) T02_path = Path('/home/axis-gps/{}'.format(month_path)) days = sorted(path_glob(T02_path, '*/')) last_day = int(days[-1].as_posix().split('.')[-1]) day_diff = now.day - last_day if day_diff <= 1: print('Last day is {}th in {}, no rinex gaps above 1 days.'.format( last_day, month_abr)) return args else: print('Found rinex gaps of {} days, recovering last rinex day {}.'. format(day_diff, last_day)) args.T02_path = days[-1] return args
def read_dat_file(loadpath, station='tela', sample='monthly', field='PW', rfunc='StepSize'): import pandas as pd from aux_gps import path_glob rfunc_dict = {'FindU': 'U', 'FindUD': 'UD', 'StepSize': 'FINAL_F'} file = path_glob( loadpath, '{}_{}_{}_means_*_{}.dat'.format(station, field, sample, rfunc_dict.get(rfunc)))[0] df = pd.read_csv(file, header=None, delim_whitespace=True, na_values="-999.00") df.columns = [ 'ind', 'date', station, 'trend_shift', 'mean_adj', '{}_anom'.format(station), 'anom_trend_shift', 'seasonal_trend_shift', 'QM_adj', 'anom_trend_no_shift' ] df['date'] = df['date'].astype(str) if sample == 'monthly': df['date'] = df['date'].str[:6] df['date'] = pd.to_datetime(df['date'], format='%Y%m') else: df['date'] = pd.to_datetime(df['date'], format='%Y%m%d') df = df.set_index(df['date']) df = df.drop(['ind', 'date'], axis=1) df['shift'] = df['mean_adj'] - df[station] stats = read_stat_txt_file(loadpath, station=station, sample=sample, field=field, rfunc=rfunc) return df, stats
def generate_backup(station, task, dates): from aux_gps import tar_dir from aux_gps import slice_task_date_range for curr_sta in station: station_path = workpath / curr_sta if task == 'drdump': path = station_path / 'rinex/dr' glob_str = '*.dr.gz' elif task == 'edit30hr': path = station_path / 'rinex/30hr' glob_str = '*.dr.gz' elif task == 'run': path = station_path / 'rinex/hr30/results' glob_str = '*.tdp' elif task == 'post': path = station_path / 'gipsyx_solutions' glob_str = '*.nc' filename = '{}_{}_backup.tar.gz'.format(curr_sta, task) savepath = station_path / 'backup' savepath.mkdir(parents=True, exist_ok=True) files_to_tar = path_glob(path, glob_str) if dates is not None: files_to_tar = slice_task_date_range(files_to_tar, dates, 'backup') try: tar_dir(files_to_tar, filename, savepath, compresslevel=None) except FileNotFoundError: print( 'skipping {} , because no {} found in {}'.format( curr_sta, glob_str, path)) continue print('{} station {} files were backed up to {}'.format( curr_sta, glob_str, savepath / filename)) return
def prep_30hr_all_steps(station_path, files, staDb, station='Alon', gde_tree=None): import numpy as np from axis_process import copy_rinex_files_to_folder from axis_process import run_rinex_compression_on_folder from axis_process import teqc_concat_rinex from aux_gps import path_glob import os # first get station name from files and assert it is like station: station_from_files = np.unique([x.name.split('.')[0][:4] for x in files]).item() assert station_from_files == station # also get year: yrs = np.unique([x.name.split('.')[1][:2] for x in files]).astype(int) # now copy files to station path in rinexfinal_path: copy_rinex_files_to_folder(files, station_path) # unzip them and crx2rnx and delete d files: for yr in yrs: # if there are more than one year (only in DEC-JAN): run_rinex_compression_on_folder(station_path, command='gunzip', glob='*.{}d.gz'.format(yr)) run_rinex_compression_on_folder(station_path, command='crx2rnx', glob='*.{}d'.format(yr)) dfiles = path_glob(station_path, '*.{}d'.format(yr)) [x.unlink() for x in dfiles] # use new filename for concated rinex: yr = yrs[0] doy_hour_start = files[0].name[4:8] doy_hour_end = files[-1].name[4:8] filename = '{}{}-{}.{}o'.format(station, doy_hour_start, doy_hour_end, yr) if len(yrs) > 1 and (np.abs(np.diff(yrs)) == 1).item(): teqc_concat_rinex(station_path, rfn=filename, glob='*.*o', cmd_path=None, delete_after_concat=True) else: teqc_concat_rinex(station_path, rfn=filename, glob='*.{}o'.format(yr), cmd_path=None, delete_after_concat=True) # now, dataRecordDump and delete o file: dataRecordDump_single_file(station_path, filename, rfn=filename[:17]) file = station_path / filename file.unlink() # now rnxEditGde: dr_filename = filename[:17] + '.dr.gz' # new_filename = filename[:13] + '_edited' + filename[13:17] + '.dr.gz' rnxEditGde_single_file(station_path, dr_filename, staDb, new_filename=filename[:13], delete_dr_after=True, gde_tree=gde_tree) return
def final_historic_perp(rinexpath, rinexfinal_path, staDb, rinex_df, station='Alon', gde_tree=None): import os from aux_gps import path_glob st_df = create_rinex_df_per_station(rinex_df, station=station) mindex = st_df.groupby(['year', 'doy'])['rfn'].count() station_path = rinexfinal_path / station if not station_path.is_dir(): logger.warning('{} is missing, creating it.'.format(station_path)) os.mkdir(station_path) already_edited = sorted( path_glob(station_path, '{}*_edited.*.dr.gz'.format(station), return_empty_list=True)) if already_edited: last_hourly_rfn = already_edited[-1].name _, last_year, last_doy = parse_hourly_range_rfn(last_hourly_rfn) mindex = mindex.loc[slice(last_year, None), slice(last_doy, None), :] logger.info('found last RINEX: {}, year={}, doy={}.'.format( already_edited[-1].name, last_year, last_doy)) for year, doy in mindex.index: files = create_station_windowed_chunk_rinex(st_df, station=station, doy=int(doy), year=int(year)) prep_30hr_all_steps(station_path, files, staDb, station=station, gde_tree=gde_tree) logger.info('Done prepring final {}.'.format(station))
def read_all_rinex_file_headers(rinexpath): from aux_gps import path_glob import pandas as pd import logging logger = logging.getLogger('rinex_hdr_reader') di_list = [] files = path_glob(rinexpath, '*.Z') logger.info('staring to read rnx files headers.') logger.info('proccessing {}'.format(rinexpath)) cnt = 0 total = len(files) for rfn in sorted(files): filename = rfn.as_posix().split('/')[-1][0:12] logger.info('reading rnx header of {} ({}/{})'.format( filename, cnt, total)) try: dic = read_one_rinex_file(rfn) di_list.append(dic) except ValueError: logger.error('error parsing rnx header of {}'.format(filename)) continue except OSError: logger.error('error parsing rnx header of {}'.format(filename)) continue cnt += 1 df = pd.DataFrame(di_list) logger.info('done reading {} rnx files.'.format(cnt)) return df
def process_mpoint_da_with_station_num(path=sound_path, station='08001', k_iqr=1): from aux_gps import path_glob import xarray as xr from aux_gps import keep_iqr file = path_glob(sound_path, 'ALL*{}*.nc'.format(station)) da = xr.open_dataarray(file[0]) ts, tm, tpw = calculate_ts_tm_tpw_from_mpoint_da(da) ds = xr.Dataset() ds['Tm'] = xr.DataArray(tm, dims=['time'], name='Tm') ds['Tm'].attrs['unit'] = 'K' ds['Tm'].attrs['name'] = 'Water vapor mean atmospheric temperature' ds['Ts'] = xr.DataArray(ts, dims=['time'], name='Ts') ds['Ts'].attrs['unit'] = 'K' ds['Ts'].attrs['name'] = 'Surface temperature' ds['Tpw'] = xr.DataArray(tpw, dims=['time'], name='Tpw') ds['Tpw'].attrs['unit'] = 'mm' ds['Tpw'].attrs['name'] = 'precipitable_water' ds['time'] = da.time ds = keep_iqr(ds, k=k_iqr, dim='time') yr_min = ds.time.min().dt.year.item() yr_max = ds.time.max().dt.year.item() ds = ds.rename({'time': 'sound_time'}) filename = 'station_{}_soundings_ts_tm_tpw_{}-{}.nc'.format( station, yr_min, yr_max) print('saving {} to {}'.format(filename, path)) comp = dict(zlib=True, complevel=9) # best compression encoding = {var: comp for var in ds} ds.to_netcdf(path / filename, 'w', encoding=encoding) print('Done!') return ds
def load_all_station(path=aero_path, gis_path=gis_path): from aux_gps import path_glob import xarray as xr files = path_glob(aero_path, '*.nc') dsl = [xr.open_dataset(file) for file in files] names = [x.attrs['new_name'] for x in dsl] ds_dict = dict(zip(names, dsl)) return ds_dict
def read_all_rinex_files_in_path(path=ionex_path): import xarray as xr from aux_gps import path_glob files = path_glob(path, '*.*o') rnxs = [read_rinex_obs_with_attrs(x) for x in files] rinex_ds = xr.concat(rnxs, 'time') rinex_ds = rinex_ds.sortby('time') return rinex_ds
def read_all_stations(path=aero_path, savepath=aero_path, glob='*.lev20'): from aux_gps import path_glob files = path_glob(path, glob) for file in files: print('reading {}...'.format(file.as_posix().split('/')[-1])) read_one_station(file, savepath) print('Done!') return
def read_sinex(path, glob='*.BSX'): from aux_gps import path_glob import xarray as xr files = path_glob(path, glob_str=glob) for file in files: ds = read_one_sinex(file) ds_list.append(ds) dss = xr.concat(ds, 'time') return dss
def read_radiometers(path=des_path): from aux_gps import path_glob import xarray as xr files = path_glob(des_path, 'KIT_HATPRO_*.nc') dsl = [xr.load_dataset(x) for x in files] ds = xr.concat(dsl, 'time') ds = ds.sortby('time') iwv = ds['iwv'].resample(time='5T', keep_attrs=True).mean(keep_attrs=True) return iwv
def produce_pw_all_stations(ds, axis_path, mda_path): from PW_stations import load_mda from PW_stations import produce_GNSS_station_PW from aux_gps import fill_na_xarray_time_series_with_its_group from aux_gps import path_glob from aux_gps import save_ncfile import xarray as xr # first load mda: mda = load_mda(mda_path) # now loop over each station, produce pwv and save: st_dirs = path_glob(axis_path, '*/') st_dirs = [x for x in st_dirs if x.is_dir()] st_dirs = [x for x in st_dirs if not x.as_posix().split('/')[-1].isnumeric()] assert len(st_dirs) == 27 pwv_list = [] for st_dir in st_dirs: station = st_dir.as_posix().split('/')[-1] last_file = sorted(path_glob(st_dir/'dr/ultra', '*.nc'))[-1] last_file_str = last_file.as_posix().split('/')[-1][4:13] wet = xr.load_dataset(last_file)['WetZ'].squeeze(drop=True) logger.info('loaded {}.'.format(last_file)) wet_error = xr.load_dataset(last_file)['WetZ_error'].squeeze(drop=True) wet.name = station wet_error.name = station # resample temp to 5 mins and reindex to wet delay time: t = ds[station].resample(time='5T').ffill().reindex_like(wet.time) # fill in NaNs with mean hourly signal: t_new = fill_na_xarray_time_series_with_its_group(t, grp='hour') try: pwv = produce_GNSS_station_PW(wet, t_new, mda=mda, model_name='LR', plot=False) pwv_error = produce_GNSS_station_PW(wet_error, t_new, mda=mda, model_name='LR', plot=False) pwv_error.name = '{}_error'.format(pwv.name) pwv_ds = xr.merge([pwv, pwv_error]) filename = '{}{}_PWV.nc'.format(station, last_file_str) save_ncfile(pwv_ds, st_dir/'dr/ultra', filename) pwv_list.append(pwv_ds) except ValueError as e: logger.warning('encountered error: {}, skipping {}'.format(e, last_file)) continue dss = xr.merge(pwv_list) filename = 'AXIS_{}_PWV_ultra.nc'.format(last_file_str) save_ncfile(dss, axis_path, filename)