Example #1
0
 def run_drMerge(filenames, in_path, duration):
     from aux_gps import get_timedate_and_station_code_from_rinex
     rfns = [x[0:12] for x in filenames]
     dts = [get_timedate_and_station_code_from_rinex(x, True) for x in rfns]
     if duration == '30hr':
         start = dts[0].strftime('%Y-%m-%d') + ' 21:00:00'
         end = dts[2].strftime('%Y-%m-%d') + ' 03:00:00'
     dr_merged_file = Path().cwd() / '{}_merged.dr.gz'.format(rfns[1])
     logger.info('merging {}, {} and {} to {}'.format(
         *rfns, rfns[1] + '_merged.dr.gz'))
     f_and_paths = [in_path / x for x in filenames]
     files_to_move = [rfn + x for x in ['_drmerge.log', '_drmerge.err']]
     command = 'drMerge.py -inFiles {} {} {} -outFile {} -start {} -end {} > {}.log 2>{}.err'.format(
         f_and_paths[0].as_posix(),
         f_and_paths[1].as_posix(), f_and_paths[2].as_posix(),
         dr_merged_file.as_posix(), start, end, rfn + '_drmerge',
         rfn + '_drmerge')
     try:
         subprocess.run(command, shell=True, check=True, timeout=60)
     except CalledProcessError:
         logger.error('drMerge.py failed on {}...'.format(filenames))
     except TimeoutExpired:
         logger.error(
             'drMerge.py timed out on {}, copying log files.'.format(
                 filenames))
         # next(failed)
         cnt['failed'] += 1
         with open(Path().cwd() / files_to_move[1], 'a') as f:
             f.write('drMerge.py run has Timed out !')
         return None
     move_files(Path().cwd(), Path().cwd(), files_to_move)
     return rfns[1] + '_merged.dr.gz'
Example #2
0
def copy_rinex_to_station_dir(main_rinexpath, filenames, suffix='.gz'):
    import shutil
    import os
    from aux_gps import get_timedate_and_station_code_from_rinex
    station = filenames[0][0:4]
    station_dir = main_rinexpath / station
    if not station_dir.is_dir():
        os.mkdir(station_dir)
    cnt = 0
    for filename in filenames:
        year = get_timedate_and_station_code_from_rinex(filename, just_dt=True).year
        doy = filename[4:7]
        if suffix is not None:
            filename += suffix
        to_copy_from = main_rinexpath / str(year) / doy / filename
        to_copy_to = station_dir / filename
        if to_copy_to.is_file():
            print('{} already exists in {}, skipping.'.format(filename, station_dir))
            continue
        try:
            shutil.copy(to_copy_from, to_copy_to)
        except FileNotFoundError:
            print('{} not found, missing ?'.format(filename))
            continue
        cnt += 1
    print('Done copying {} rinex files to {}.'.format(cnt, station_dir))
    return
def read_one_rnx_file(rfn_with_path):
    import georinex as gr
    import pandas as pd
    from pandas.errors import OutOfBoundsDatetime
    from aux_gps import get_timedate_and_station_code_from_rinex

    def parse_field(field):
        field_new = [x.split(' ') for x in field]
        flat = [item for sublist in field_new for item in sublist]
        return [x for x in flat if len(x) > 1]

    hdr = gr.rinexheader(rfn_with_path)
    header = {}
    ant = [val for key, val in hdr.items() if 'ANT' in key]
    try:
        header['ant'] = parse_field(ant)[1]
    except IndexError:
        header['ant'] = parse_field(ant)
    try:
        header['ant_serial'] = parse_field(ant)[0]
    except IndexError:
        header['ant_serial'] = parse_field(ant)
    rec = [val for key, val in hdr.items() if 'REC' in key]
    try:
        rec = ' '.join(parse_field(rec)[1:3])
    except IndexError:
        rec = parse_field(rec)
    header['rec'] = rec
    name = [val for key, val in hdr.items() if 'NAME' in key]
    try:
        header['name'] = parse_field(name)[0]
    except IndexError:
        header['name'] = parse_field(name)
    try:
        dts = pd.to_datetime(hdr['t0'])
    except OutOfBoundsDatetime:
        rfn = rfn_with_path.as_posix().split('/')[-1][0:12]
        dts = get_timedate_and_station_code_from_rinex(rfn, True)
    except KeyError:
        rfn = rfn_with_path.as_posix().split('/')[-1][0:12]
        dts = get_timedate_and_station_code_from_rinex(rfn, True)
    header['dt'] = dts
    return header
def save_yearly_gipsyx_results(path, savepath):
    """call read one station for each year and save the results, then
    concat and save to a bigger raw file, can add postproccess function"""
    from aux_gps import path_glob
    from aux_gps import get_timedate_and_station_code_from_rinex
    import logging
    import pandas as pd
    global cnt
    global tot
    logger = logging.getLogger('gipsyx_post_proccesser')
    files = path_glob(path, '*.tdp')
    tot = len(files)
    est_time_per_single_run = 0.3  # seconds
    logger.info('found {} _smoothFinal tdp files in {} to process.'.format(
        tot, path))
    dtt = pd.to_timedelta(est_time_per_single_run, unit='s') * tot
    extra_dtt = pd.to_timedelta(0.4, unit='s') * tot
    resample_dtt = pd.to_timedelta(0.75, unit='s') * tot
    dtt += extra_dtt
    dtt += resample_dtt
    logger.info('estimated time to completion of run: {}'.format(dtt))
    logger.info('check again in {}'.format(pd.Timestamp.now() + dtt))
    rfns = [x.as_posix().split('/')[-1][0:12] for x in files]
    dts = [
        get_timedate_and_station_code_from_rinex(rfn, just_dt=True)
        for rfn in rfns
    ]
    _, station = get_timedate_and_station_code_from_rinex(rfns[0])
    years = list(set([dt.year for dt in dts]))
    cnt = {'succ': 0, 'failed': 0}
    for year in sorted(years):
        filename = '{}_ppp_raw_{}.nc'.format(station, year)
        if (savepath / filename).is_file():
            logger.warning('{} already in {}, skipping...'.format(
                filename, savepath))
            continue
        _, _ = read_one_station_gipsyx_results(path, savepath, year)
    total = cnt['failed'] + cnt['succ']
    logger.info('Total files: {}, success: {}, failed: {}'.format(
        total, cnt['succ'], cnt['failed']))
    return
Example #5
0
def read_organize_rinex(path, glob_str='*.Z', date_range=None):
    """read and organize the rinex file names for 30 hour run"""
    from aux_gps import get_timedate_and_station_code_from_rinex
    from aux_gps import path_glob
    from aux_gps import slice_task_date_range
    import pandas as pd
    import numpy as np
    import logging
    logger = logging.getLogger('gipsyx')
    dts = []
    rfns = []
    stations = []
    logger.info('reading and organizing rinex files in {}'.format(path))
    files = path_glob(path, glob_str)
    if date_range is not None:
        files = slice_task_date_range(files, date_range, 'read_organize_rinex')
    for file_and_path in files:
        filename = file_and_path.as_posix().split('/')[-1][0:12]
        dt, station = get_timedate_and_station_code_from_rinex(filename)
        stations.append(station)
        dts.append(dt)
        rfns.append(filename)
    # check for more station than one:
    if len(set(stations)) > 1:
        raise Exception('mixed station names in folder {}'.format(path))
    df = pd.DataFrame(data=rfns, index=dts)
    df = df.sort_index()
    df = df[~df.index.duplicated()]
    full_time = pd.date_range(df.index[0], df.index[-1], freq='1D')
    df = df.reindex(full_time)
    df.columns = ['rinex']
    df.index.name = 'time'
    df['30hr'] = np.nan
    df.iat[0, 1] = 0
    df.iat[-1, 1] = 0
    for i in range(1, len(df) - 1):
        nums = np.array([i - 1, i, i + 1])
        nan3days = df.iloc[nums, 0].isnull()
        if not nan3days[0] and not nan3days[1] and not nan3days[2]:
            # print('all')
            df.iat[i, 1] = 1
        elif not nan3days[0] and not nan3days[1] and nan3days[2]:
            # print('0')
            df.iat[i, 1] = 0
        elif nan3days[0] and not nan3days[1] and not nan3days[2]:
            # print('00')
            df.iat[i, 1] = 0
        elif not nan3days[1] and nan3days[0] and nan3days[2]:
            # print('000')
            df.iat[i, 1] = 0
        # print(i, nan3days, df.iat[i, 1])
        # input("Press Enter to continue...")
    return df
Example #6
0
def count_rinex_files_on_doy_folder(doy_folder, suffix='*.gz'):
    from aux_gps import path_glob
    from aux_gps import get_timedate_and_station_code_from_rinex
    import pandas as pd
    files = sorted(path_glob(doy_folder, suffix))
    print('counting {} folder, {} files found.'.format(doy_folder, len(files)))
    # names = [x.as_posix().split('/')[-1][0:12] for x in files]
    ser = []
    for file in files:
        name = file.name[0:12]
        dt, st = get_timedate_and_station_code_from_rinex(name, st_upper=False)
        ser.append(pd.Series([dt, st, file]))
    df = pd.DataFrame(ser)
    df.columns = ['datetime', 'station', 'filepath']
    # df['values'] = 1
    df = df.pivot(index='datetime', columns='station', values='filepath')
    # df.columns = df.columns.droplevel(0)
    df.index.name = 'time'
    df = df.sort_index()
    return df
Example #7
0
def get_dryz_from_one_station(path=dsea_gipsy_path / 'results'):
    from aux_gps import path_glob
    from aux_gps import get_timedate_and_station_code_from_rinex
    import xarray as xr
    files = sorted(path_glob(path, '*_debug.tree'))
    dt_list = []
    zhd_list = []
    for file in files:
        rfn = file.as_posix().split('/')[-1][0:12]
        dt = get_timedate_and_station_code_from_rinex(rfn, just_dt=True)
        # print('datetime {}'.format(dt.strftime('%Y-%m-%d')))
        dt_list.append(dt)
        zhd = get_dryz_from_one_file(file)
        zhd_list.append(zhd)
    zhd_da = xr.DataArray(zhd_list, dims=['time'])
    zhd_da['time'] = dt_list
    zhd_da *= 100
    # zhd_da.name = station
    zhd_da.attrs['units'] = 'cm'
    zhd_da.attrs['long_name'] = 'Zenith Hydrostatic Delay'
    zhd_da = zhd_da.sortby('time')
    return zhd_da
Example #8
0
def read_rinex_obs_with_attrs(filepath=ionex_path / 'bshm0210.20o'):
    import georinex as gr
    import pandas as pd
    from aux_gps import get_timedate_and_station_code_from_rinex
    ds = gr.load(filepath)
    print('reading {} rinex file'.format(filepath.as_posix().split('/')[-1]))
    dt, station = get_timedate_and_station_code_from_rinex(
        ds.attrs['filename'])
    ds.attrs['starting datetime'] = dt
    ds.attrs['station'] = station
    ssi = {
        'GPS': 'G',
        'GLONASS': 'R',
        'SBAS_payload': 'S',
        'Galileo': 'E',
        'Compass': '******'
    }
    ssi_list = list(ssi.items())
    ds.attrs['satellite system identifier'] = ssi_list
    names = {
        'P': 'pseudorange value',
        'C': 'pseudorange value',
        'L': 'carrier phase value',
        'S': 'raw signal strength value'
    }
    units = {'P': 'm', 'C': 'm', 'L': 'full cycles', 'S': 'dbHz'}
    ds['time'] = pd.to_datetime(ds['time'])
    for da in ds.data_vars.keys():
        ds[da].attrs['name'] = names.get(da[0])
        ds[da].attrs['unit'] = units.get(da[0])
    if 'P1' in ds.data_vars and 'P2' in ds.data_vars:
        ds['P4'] = ds['P2'] - ds['P1']
    if 'L1' in ds.data_vars and 'L2' in ds.data_vars:
        ds['L4'] = ds['L1'] * (speed_of_light /
                               F1) - ds['L2'] * (speed_of_light / F2)
    return ds
def read_one_station_gipsyx_results(path, savepath=None, year=None):
    """read one station (all years) consisting of many tdp files"""
    import xarray as xr
    from aux_gps import get_timedate_and_station_code_from_rinex
    from aux_gps import path_glob
    import logging
    logger = logging.getLogger('gipsyx_post_proccesser')
    if year is not None:
        year = int(year)
        logger.info('getting tdp files from year {}'.format(year))
    df_list = []
    errors = []
    dts = []
    # logger.info('reading folder:{}'.format(path))
    files = path_glob(path, '*.tdp')
    for tdp_file in files:
        rfn = tdp_file.as_posix().split('/')[-1][0:12]
        dt, station = get_timedate_and_station_code_from_rinex(rfn)
        if year is not None:
            if dt.year != year:
                continue
            else:
                logger.info('processing {} ({}, {}/{})'.format(
                    rfn, dt.strftime('%Y-%m-%d'), cnt['succ'] + cnt['failed'],
                    tot))
                try:
                    df, meta = process_one_day_gipsyx_output(tdp_file)
                    dts.append(df.index[0])
                    cnt['succ'] += 1
                except TypeError:
                    logger.error(
                        'problem reading {}, appending to errors...'.format(
                            rfn))
                    errors.append(rfn)
                    cnt['failed'] += 1
                    continue
                df_list.append(df)
        elif year is None:
            try:
                df, meta = process_one_day_gipsyx_output(tdp_file)
                dts.append(df.index[0])
                cnt['succ'] += 1
            except TypeError:
                logger.error(
                    'problem reading {}, appending to errors...'.format(rfn))
                errors.append(rfn)
                cnt['failed'] += 1
                continue
            df_list.append(df)
    # sort by first dates of each df:
    df_dict = dict(zip(dts, df_list))
    df_list = []
    for key in sorted(df_dict):
        df_list.append(df_dict[key])
    dss = [df.to_xarray() for df in df_list]
    dss_new = []
    for i, ds in enumerate(dss):
        keys_to_rename = [x for x in ds.data_vars.keys()]
        keys_to_rename.append('time')
        values_to_rename = [x + '-{}'.format(i) for x in keys_to_rename]
        dict_to_rename = dict(zip(keys_to_rename, values_to_rename))
        dss_new.append(ds.rename(dict_to_rename))
    ds = xr.merge(dss_new)
    ds.attrs['station'] = station
    for key, val in meta['units'].items():
        ds.attrs[key + '>units'] = val
    for key, val in meta['desc'].items():
        ds.attrs[key + '>desc'] = val
    if savepath is not None:
        comp = dict(zlib=True, complevel=9)  # best compression
        encoding = {var: comp for var in ds.data_vars}
        filename = '{}_ppp_raw_{}.nc'.format(station, year)
        ds.to_netcdf(savepath / filename, 'w', encoding=encoding)
        logger.info('{} was saved to {}'.format(filename, savepath))
    return ds, errors
Example #10
0
def run_gd2e_for_one_station(dr_path, staDb, tree, rewrite, date_range=None):
    """runs gd2e.py for all datarecodrs in one folder(dr_path) with staDb.
    rewrite: overwrite the results tdp in dr_path / results."""
    from pathlib import Path
    import subprocess
    # from itertools import count
    from subprocess import CalledProcessError
    from subprocess import TimeoutExpired
    import logging
    from aux_gps import get_timedate_and_station_code_from_rinex
    from aux_gps import path_glob
    from aux_gps import slice_task_date_range
    import pandas as pd
    logger = logging.getLogger('gipsyx')
    logger.info('starting gd2e.py main gipsyX run.')
    logger.info('working with {} station database'.format(staDb))
    if rewrite:
        logger.warning('overwrite files mode initiated.')
    results_path = dr_path / 'results'
    if tree.as_posix().strip():
        logger.info('working with {} tree'.format(tree))
    try:
        results_path.mkdir()
    except FileExistsError:
        logger.info(
            '{} already exists, using that folder.'.format(results_path))


#    succ = count(1)
#    failed = count(1)
    cnt = {'succ': 0, 'failed': 0}
    files = path_glob(dr_path, '*.dr.gz')
    if date_range is not None:
        files = slice_task_date_range(files, date_range, 'run')
    tot = len(files)
    logger.info('found {} dr.gz files in {} to run.'.format(tot, dr_path))
    tot_final = len(path_glob(results_path, '*_smoothFinal.tdp', True))
    logger.info('found {} _smoothFinal.tdp files in {}'.format(
        tot_final, results_path))
    tot_to_run = tot - tot_final
    est_time_per_single_run = 22.0  # seconds
    dtt = pd.to_timedelta(est_time_per_single_run, unit='s') * tot_to_run
    logger.info('estimated time to completion of run: {}'.format(dtt))
    logger.info('check again in {}'.format(pd.Timestamp.now() + dtt))
    for file_and_path in files:
        rfn = file_and_path.as_posix().split('/')[-1][0:12]
        dt, station = get_timedate_and_station_code_from_rinex(rfn)
        final_tdp = '{}_smoothFinal.tdp'.format(rfn)
        logger.info('processing {} ({}, {}/{})'.format(
            rfn, dt.strftime('%Y-%m-%d'), cnt['succ'] + cnt['failed'], tot))
        if not rewrite:
            if (results_path / final_tdp).is_file():
                logger.warning('{} already exists in {}, skipping...'.format(
                    final_tdp, results_path))
                cnt['succ'] += 1
                continue
        command = 'gd2e.py -drEditedFile {} -recList {} -staDb {} -treeS {} \
        > {}.log 2>{}.err'.format(file_and_path.as_posix(), station,
                                  staDb.as_posix(), tree, rfn, rfn)
        files_to_move = ['{}{}'.format(rfn, x) for x in ['.log', '.err']]
        more_files = ['finalResiduals.out', 'smoothFinal.tdp']
        more_files_rfn = ['{}_{}'.format(rfn, x) for x in more_files]
        try:
            subprocess.run(command, shell=True, check=True, timeout=300)
            move_files(Path().cwd(), results_path, more_files, more_files_rfn)
            move_files(Path().cwd(), results_path, 'Summary',
                       '{}_Summary.txt'.format(rfn))
            # next(succ)
            cnt['succ'] += 1
        except CalledProcessError:
            logger.error('gipsyx failed on {}, copying log files.'.format(rfn))
            # next(failed)
            cnt['failed'] += 1
        except TimeoutExpired:
            logger.error(
                'gipsyx timed out on {}, copying log files.'.format(rfn))
            # next(failed)
            cnt['failed'] += 1
            with open(Path().cwd() / files_to_move[1], 'a') as f:
                f.write('GipsyX run has Timed out !')
        move_files(Path().cwd(), results_path, files_to_move)
        move_files(Path().cwd(), results_path, 'debug.tree',
                   '{}_debug.tree'.format(rfn))
    logger.info('Done!')
    # total = next(failed) + next(succ) - 2
    total = cnt['succ'] + cnt['failed']
    #    succses = next(succ) - 2
    #    failure = next(failed) - 2
    logger.info('Total files: {}, success: {}, failed: {}'.format(
        total, cnt['succ'], cnt['failed']))
    return
def single_station_rinex_using_wget(save_dir,
                                    minimum_year=None,
                                    station='tela',
                                    db='garner'):
    import subprocess
    from subprocess import CalledProcessError
    from aux_gps import datetime_to_rinex_filename
    from aux_gps import get_timedate_and_station_code_from_rinex
    import pandas as pd
    import logging
    # import os
    logger = logging.getLogger('rinex_garner')
    savepath = save_dir
    cnt = 0
    logger.info(
        'Starting rinex download for station {} using wget from {} ftp site'.
        format(station, db))
    #    if not os.path.exists(savepath):
    #        try:
    #            os.makedirs(savepath)
    #            logger.info('Creating {} for station {}'.format(savepath, station))
    #        except OSError:
    #            logger.error("Creation of the directory %s failed" % savepath)
    #        else:
    #            logger.info("Successfully created the directory %s" % savepath)
    savepath.mkdir(parents=True, exist_ok=True)
    #    else:
    #        logger.warning('Folder {} already exists.'.format(savepath))
    if minimum_year is not None:
        logger.info('starting search from year {}'.format(minimum_year))
        dts = pd.date_range('{}-01-01'.format(minimum_year),
                            '2019-10-15',
                            freq='1D')
    else:
        today = pd.Timestamp.now().strftime('%Y-%m-%d')
        dts = pd.date_range('1988-01-01', today, freq='1D')
    rfns = [datetime_to_rinex_filename(station, x) for x in dts.to_list()]
    for rfn in rfns:
        filename = rfn + '.Z'
        if (savepath / filename).is_file():
            logger.warning('{} already exists in {}, skipping...'.format(
                filename, savepath))
            continue
        dt = get_timedate_and_station_code_from_rinex(rfn, just_dt=True)
        year = dt.year
        yrd = '{}{}'.format(str(year)[-2:], 'd')
        dayofyear = dt.dayofyear
        if len(str(dayofyear)) == 1:
            dayofyear = '00' + str(dayofyear)
        elif len(str(dayofyear)) == 2:
            dayofyear = '0' + str(dayofyear)
        if db == 'garner':
            command = 'wget -q -P {}'.format(savepath)\
                + ' http://anonymous:shlomiziskin%[email protected]'\
                + '/pub/rinex/{}/{}/{}'.format(year, dayofyear, filename)
        elif db == 'cddis':
            command = 'wget -q -P {}'.format(savepath)\
                + ' ftp://anonymous:shlomiziskin%[email protected]/gnss/data/daily/'\
                + '{}/{}/{}/{}'.format(year, dayofyear, yrd, filename)
        try:
            subprocess.run(command, shell=True, check=True)
            logger.info('Downloaded {} to {}.'.format(filename, savepath))
            cnt += 1
        except CalledProcessError:
            logger.error('File {} not found in url'.format(filename))
    logger.info('Done downloding sum total of {} files to {}'.format(
        cnt, savepath))
    return