Beispiel #1
0
def main(new_arg):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg:
        return
    if cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')
    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**cfg['in'],
                                      b_interact=cfg['program']['b_interact'])
    except Ex_nothing_done as e:
        print(e.message)
        return ()

    # cfg = {'in': {}}
    # cfg['in']['path'] = \
    #     r'd:\workData\BalticSea\181005_ABP44\navigation\2018-10-06tracks_copy.gpx'
    # r'd:\WorkData\_experiment\_2017\tracker\170502.gpx'
    # r'd:\workData\_experiment\2016\GPS_tracker\sms_backup\sms-20160225135922.gpx'
    for ifile, nameFull in enumerate(cfg['in']['paths'], start=1):
        print('{}. {}'.format(ifile, nameFull), end=', ')
        gpx2csv(nameFull)
Beispiel #2
0
    def main_cfg(cfg: DictConfig):  # hydra required arg, not use when call
        """
        ----------------------------
        Add data from CSV-like files
        to Pandas HDF5 store*.h5
        ----------------------------
        """

        #print(OmegaConf.to_yaml(cfg))
        global lf
        # cfg = cfg_from_args(argparser_files(), **kwargs)
        if not cfg.program.return_:
            print('Can not initialise')
            return cfg
        elif cfg.program.return_ == '<cfg_from_args>':  # to help testing
            return cfg

        lf = LoggingStyleAdapter(init_logging(logging, None, cfg.program.log, cfg.rogram.verbose))
        print('\n' + this_prog_basename(__file__), end=' started. ')
        try:
            cfg['in']['paths'], cfg['in']['nfiles'], cfg['in']['path'] = init_file_names(
                **cfg['in'], b_interact=cfg['program']['b_interact'])
        except Ex_nothing_done as e:
            print(e.message)
            return ()

        return cfg
Beispiel #3
0
        def main_cfg(cfg: DictConfig):
            global l

            # cfg = cfg_from_args(argparser_files(), **kwargs)
            if not cfg or not cfg['program'].get('return'):
                print('Can not initialise')
                return cfg
            elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
                return cfg

            l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])
            print('\n' + this_prog_basename(__file__), end=' started. ')
            try:
                cfg['in']['paths'], cfg['in']['nfiles'], cfg['in']['path'] = init_file_names(
                    **cfg['in'], b_interact=cfg['program']['b_interact'])

            except Ex_nothing_done as e:
                print(e.message)
                return ()

            return cfg
Beispiel #4
0
drive_d = Path('D:/' if sys.platform == 'win32' else
               '/mnt/D')  # allows to run on both my Linux and Windows systems:
scripts_path = drive_d.joinpath('Work/_Python3/And0K/h5toGrid/scripts')
sys.path.append(str(Path(scripts_path).parent.resolve()))  # os.getcwd()
from to_pandas_hdf5.csv2h5 import main as csv2h5
from to_pandas_hdf5.csv_specific_proc import correct_kondrashov_txt, rep_in_file, correct_baranov_txt
from to_pandas_hdf5.h5_dask_pandas import h5q_interval2coord
from inclinometer.h5inclinometer_coef import h5copy_coef
import inclinometer.incl_h5clc as incl_h5clc
import inclinometer.incl_h5spectrum as incl_h5spectrum
import veuszPropagate
from utils_time import pd_period_to_timedelta
from utils2init import path_on_drive_d, init_logging, open_csv_or_archive_of_them, st

# l = logging.getLogger(__name__)
l = init_logging(logging, None, None, 'INFO')

if True:  # False. Experimental speedup but takes memory
    from dask.cache import Cache
    cache = Cache(2e9)  # Leverage two gigabytes of memory
    cache.register()  # Turn cache on globally
if False:  #  True:  # False:  #
    l.warning('using "synchronous" scheduler for debugging')
    import dask
    dask.config.set(scheduler='synchronous')

# Directory where inclinometer data will be stored
path_cruise = path_on_drive_d(
    r'd:\WorkData\BalticSea\200628_Pregolya,Lagoon-inclinometer')
r"""
d:\WorkData\BalticSea\200630_AI55\inclinometer
Beispiel #5
0
def main(new_arg=None):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg:
        return
    if new_arg == '<return_cfg>':  # to help testing
        return cfg
    l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])
    if not cfg['out']['path'].is_absolute():
        # set path relative to cfg['in']['db_path']
        cfg['out']['path'] = cfg['in']['db_path'].with_name(str(cfg['out']['path']))

    l.warning('\n {}({}) is gonna save gpx to ..{} dir. '.format(
        this_prog_basename(__file__), cfg['in']['db_path'], cfg['out']['path'].parent))

    if cfg['out']['select_from_tablelog_ranges'] is None:
        gpx_symbols = None
    else:
        gpx_symbols = init_gpx_symbols_fun(cfg['out'])

    global gpx_names_funs  # Shortcat for cfg['out']['gpx_names_funs']

    # Load data #################################################################
    qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')"
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        # Find tables by pattern
        if '*' in cfg['in']['tables'][0]:
            # if 'table_prefix' in cfg['in']
            pattern_tables = cfg['in']['tables'][0]
            cfg['in']['tables'] = h5find_tables(store, pattern_tables)
            len_tables = len(cfg['in']['tables'])
            msg = 'Found {} tables with pattern {}'.format(len_tables, pattern_tables)
            if len_tables:
                l.info(msg)
            else:
                raise Ex_nothing_done(msg + '!')

            gpx_names_funs = []
            for itbl in range(len(cfg['in']['tables'])):  # same fo each table
                gpx_names_funs.append(cfg['out']['gpx_names_funs'][0])
        else:  # fixed number of tables
            # initialise with defaults if need:
            gpx_names_funs = cfg['out']['gpx_names_funs']
            for itbl in range(len(gpx_names_funs), len(cfg['in']['tables'])):
                gpx_names_funs.append('i+1')
        dfs_rnav = []
        tbl_names_all_shortened = []
        for itbl, tblD in enumerate(cfg['in']['tables']):
            print(itbl, '. ', tblD, end=': ', sep='')
            if cfg['in']['tables_log'][0]:
                tblL = tblD + '/' + cfg['in']['tables_log'][0]
                try:
                    dfL = store[tblL]
                except KeyError as e:
                    l.warning(' '.join([s for s in e.args if isinstance(s, str)]))
                    continue
            else:  # only for tables without log (usually no such tables)
                l.warning('configuration specifies to get data without use of "log..." tables')
                st_en = store[tblD].index[[0, -1]]
                if cfg['process']['period_files']:
                    t_intervals_start = pd.date_range(
                        start=st_en[0].normalize(),
                        end=max(st_en[-1], st_en[-1].normalize() + pd_period_to_timedelta(
                            cfg['process']['period_files'])),
                        freq=cfg['process']['period_files'])[1:]  # makes last t_interval_start >= all_data[-1]
                    dfL = pd.DataFrame.from_records({'DateEnd': t_intervals_start, 'fileName': tblD},
                                                    index=st_en[:1].append(t_intervals_start[:-1]))
                else:
                    dfL = pd.DataFrame.from_records({'DateEnd': st_en[-1], 'fileName': tblD}, index=st_en[:1])

            gpx_names_fun_str = "lambda i, row, t=0: '{}'.format({})".format(
                cfg['out']['gpx_names_fun_format'],
                gpx_names_funs[itbl])
            gpx_names_fun = eval(compile(gpx_names_fun_str, '', 'eval'))
            if cfg['out']['select_from_tablelog_ranges'] is None:
                # Use all data for ranges specified in log rows and saves tracks (not points)

                for irow, r in enumerate(dfL.itertuples()):  # iterrows()
                    qstr = qstr_trange_pattern.format(r.Index, r.DateEnd)
                    print(qstr, end='... ')
                    try:
                        dfD = store.select(cfg['in']['table_nav'
                                           ] if cfg['in']['table_nav'] else tblD, qstr,
                                           columns=['Lat', 'Lon', 'DepEcho'])
                    except Exception as e:
                        l.exception('Error when query:  {}. '.format(qstr))
                        # '\n==> '.join([s for s in e.args if isinstance(s, str)])))
                        continue
                    # Keep data with period = 1s only
                    dfD = dfD[~dfD.index.round(pd.Timedelta(seconds=1)).duplicated()]
                    # dfD.drop_duplicates(['Lat', 'Lon', 'index'])'

                    bGood = filterGlobal_minmax(dfD, dfD.index, cfg['filter'])
                    dfD = dfD[bGood]
                    # Add UTC time and table name to output file name
                    # Local time and table name to gpx object name
                    str_time_long = '{:%y%m%d_%H%M}'.format(r.Index)
                    r = r._replace(Index=timzone_view(r.Index, cfg['out']['dt_from_utc_in_comments']))
                    tblD_safe = file_from_tblname(tblD, cfg['in']['tables_log'][0])
                    try:
                        gpx_names_fun_result = gpx_names_fun(tblD_safe, r)  # '{:%y%m%d}'.format(timeLocal)
                    except TypeError as e:
                        raise TypeError('Can not evalute gpx_names_fun "{}"'.format(gpx_names_fun_str)).with_traceback(
                            e.__traceback__)

                    save_to_gpx(
                        dfD, cfg['out']['path'].with_name(f'{str_time_long}{tblD_safe}'),
                        gpx_obj_namef=gpx_names_fun_result, cfg_proc=cfg['process'])

                    if len(cfg['in']['tables']) > 1:
                        nav2add_cur = dfD if irow == 0 else nav2add_cur.append(dfD)
                if len(cfg['in']['tables']) > 1:
                    nav2add_cur = dfD.assign(itbl=itbl)

            else:
                # Use only 1 data point per log row

                if cfg['out']['select_from_tablelog_ranges'] != 0:
                    print('selecting from {} row index of log table'.format(
                        cfg['out']['select_from_tablelog_ranges']))

                try:
                    dfL.index = dfL.index.tz_convert('UTC')
                except TypeError as e:
                    print((e.msg if hasattr(e, 'msg') else str(e)) + '!\n- continue presume on UTC log index...')
                print(end='all log data ')
                time_points = (dfL.index if cfg['out']['select_from_tablelog_ranges'] == 0 else
                               dfL['DateEnd'] if cfg['out']['select_from_tablelog_ranges'] == -1 else
                               None)
                if time_points is None:
                    raise (ValueError("cfg['out']['select_from_tablelog_ranges'] must be 0 or -1"))
                cols_nav = ['Lat', 'Lon', 'DepEcho']
                nav2add = h5select(store, cfg['in']['table_nav'], cols_nav, time_points=time_points,
                                   dt_check_tolerance=cfg['process']['dt_search_nav_tolerance'],
                                   query_range_lims=(time_points[0], dfL['DateEnd'][-1])
                                   )[0]
                cols_nav = nav2add.columns  # not all columns may be loaded
                # Try get non NaN from dfL if it has needed columns (we used to write there edges' data with _st/_en suffixes)
                isna = nav2add.isna()
                dfL_col_suffix = 'st' if cfg['out']['select_from_tablelog_ranges'] == 0 else 'en'
                for col in cols_nav:
                    col_dat = f'{col}_{dfL_col_suffix}'
                    if isna[col].any() and  col_dat in dfL.columns:
                        b_use = isna[col].values & dfL[col_dat].notna().values
                        nav2add.loc[b_use, col] = dfL.loc[b_use, col_dat].values

                nav2add.index = timzone_view(nav2add.index, dt_from_utc=cfg['out']['dt_from_utc_in_comments'])
                # tz_local= tzoffset(None, cfg['out']['dt_from_utc_in_comments'].total_seconds())
                # if nav2add.index.tz is None:
                #     # think if time zone of tz-naive Timestamp is naive then it is UTC
                #     nav2add.index = nav2add.index.tz_localize('UTC')
                # nav2add.tz_convert(tz_local, copy= False)

                # Save to gpx waypoints
                nav2add_cur = nav2add.assign(itbl=itbl)

                # if 'gpx_names_funs' in cfg['out'] and \
                #     len(cfg['out']['gpx_names_funs'])>itbl:
                #
                #     gpx_names = eval(compile('lambda i: str({})'.format(
                #         cfg['out']['gpx_names_funs'][itbl]), [], 'eval'))
                #
                save_to_gpx(nav2add_cur,
                            cfg['out']['path'] / f"stations_{file_from_tblname(tblD, cfg['in']['tables_log'][0])}",
                            gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols,
                            cfg_proc=cfg['process']
                            )
                # save_to_csv(nav2add, dfL.index, cfg['out']['path'].with_name(f'nav{tblD}.txt'))
                if False:  # Show table info
                    store.get_storer(tblD).table

                    nodes = sorted(store.root.__members__)  # , key=number_key
                    print(nodes)
                    # store.get_node('CTD_Idronaut(Redas)').logFiles        # next level nodes

            # prepare saving of combined gpx
            if tbl_names_all_shortened:
                i_new = 0
                for c_prev, c_new in zip(tbl_names_all_shortened[-1], tblD):
                    if c_new == c_prev:
                        i_new += 1
                    else:
                        break
                tbl_names_all_shortened.append(tblD[i_new:])
            else:
                tbl_names_all_shortened.append(tblD)
            dfs_rnav.append(nav2add_cur)

        if len(cfg['in']['tables']) > 1 and cfg['out']['gpx_names_funs_cobined']:
            print('combined: ', end='')  # Save combined data to gpx
            df_rnav_combined = pd.concat(dfs_rnav)
            df_rnav_combined.sort_index(inplace=True)
            # Save to gpx waypoints
            if 'gpx_names_funs' in cfg['out']['gpx_names_funs_cobined']:
                gpx_names_funs = [  # row not used, it is here only for compability with tracks
                    eval(compile("lambda i: " + f, '', 'eval')) for f in gpx_names_funs]
            gpx_names_fun = eval(compile(
                "lambda i,row,t: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(
                    cfg['out']), '', 'eval'))

            # gpx_symbols = lambda row: cfg['out']['gpx_symbols'][sym_index_fun(row)]

            # gpx_names = eval(compile("lambda i,row: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(cfg['out']), '', 'eval'))
            # gpx_names = lambda i: str(i + 1)

            save_to_gpx(
                df_rnav_combined,
                cfg['out']['path'].with_name(
                    'all_' + file_from_tblname(','.join(tbl_names_all_shortened), cfg['in']['tables_log'][0])),
                gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols, cfg_proc=cfg['process'])
    print('Ok')
Beispiel #6
0
def main(new_arg=None, **kwargs):
    """

    :param new_arg: list of strings, command line arguments
    :kwargs: dicts of dictcts (for each ini section): specified values overwrites ini values
    """

    # global l
    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    cfg['in']['db_coefs'] = Path(cfg['in']['db_coefs'])
    for path_field in ['db_coefs', 'path_cruise']:
        if not cfg['in'][path_field].is_absolute():
            cfg['in'][path_field] = (
                cfg['in']['cfgFile'].parent / cfg['in'][path_field]
            ).resolve().absolute()  # cfg['in']['cfgFile'].parent /

    def constant_factory(val):
        def default_val():
            return val

        return default_val

    for lim in ('min_date', 'max_date'):
        cfg['filter'][lim] = defaultdict(
            constant_factory(cfg['filter'][lim].get(
                '0', cfg['filter'][lim].get(0))), cfg['filter'][lim])

    l = init_logging(logging, None, None, 'INFO')
    #l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])

    if True:  # False. Experimental speedup but takes memory
        from dask.cache import Cache
        cache = Cache(2e9)  # Leverage two gigabytes of memory
        cache.register()  # Turn cache on globally
    if cfg['program']['dask_scheduler']:
        if cfg['program']['dask_scheduler'] == 'distributed':
            from dask.distributed import Client
            client = Client(
                processes=False
            )  # navigate to http://localhost:8787/status to see the diagnostic dashboard if you have Bokeh installed
            # processes=False: avoide inter-worker communication for computations releases the GIL (numpy, da.array)  # without is error
        else:
            if cfg['program']['dask_scheduler'] == 'synchronous':
                l.warning('using "synchronous" scheduler for debugging')
            import dask
            dask.config.set(scheduler=cfg['program']['dask_scheduler'])

    # Run steps :
    st.start = cfg['program']['step_start']
    st.end = cfg['program']['step_end']
    st.go = True

    if not cfg['out'][
            'db_name']:  # set name by 'path_cruise' name or parent if it has digits at start. priority for name  is  "*inclinometer*"
        for p in (lambda p: [p, p.parent])(cfg['in']['path_cruise']):
            m = re.match('(^[\d_]*).*', p.name)
            if m:
                break
        cfg['out']['db_name'] = f"{m.group(1).strip('_')}incl.h5"
    cfg['in']['path_cruise'].glob('*inclinometer*')
    dir_incl = next((d for d in cfg['in']['path_cruise'].glob('*inclinometer*')
                     if d.is_dir()), cfg['in']['path_cruise'])
    db_path = dir_incl / cfg['out']['db_name']

    # ---------------------------------------------------------------------------------------------
    def fs(probe, name):
        return 5
        # if 'w' in name.lower():  # Baranov's wavegauge electronic
        #     return 5  # 10
        # if probe < 20 or probe in [23, 29, 30, 32, 33]:  # 30 [4, 11, 5, 12] + [1, 7, 13, 30]
        #     return 5
        # if probe in [21, 25, 26] + list(range(28, 35)):
        #     return 8.2
        # return 4.8

    def datetime64_str(time_str: Optional[str] = None) -> np.ndarray:
        """
        Reformat time_str to ISO 8601 or to 'NaT'. Used here for input in funcs that converts str to numpy.datetime64
        :param time_str: May be 'NaT'
        :return: ndarray of strings (tested for 1 element only) formatted by numpy.
        """
        return np.datetime_as_string(np.datetime64(time_str, 's'))

    probes = cfg['in']['probes'] or range(
        1, 41)  # sets default range, specify your values before line ---
    raw_root, subs_made = re.subn('INCL_?', 'INKL_',
                                  cfg['in']['probes_prefix'].upper())
    if st(
            1
    ):  # Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need)
        i_proc_probe = 0  # counter of processed probes
        i_proc_file = 0  # counter of processed files
        # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]':

        raw_parent = dir_incl / '_raw'
        dir_out = raw_parent / re.sub(
            r'[.\\/ ]', '_', cfg['in']['raw_subdir']
        )  # sub replaces multilevel subdirs to 1 level that correct_fun() can only make
        raw_parent /= cfg['in']['raw_subdir']
        for probe in probes:
            raw_found = []
            raw_pattern_file = cfg['in']['raw_pattern'].format(prefix=raw_root,
                                                               number=probe)
            correct_fun = partial(
                correct_kondrashov_txt if subs_made else correct_baranov_txt,
                dir_out=dir_out)
            # if not archive:
            if (not '.zip' in cfg['in']['raw_subdir'].lower() and not '.rar'
                    in cfg['in']['raw_subdir'].lower()) or raw_parent.is_dir():
                raw_found = list(raw_parent.glob(raw_pattern_file))
            if not raw_found:
                # Check if already have corrected files for probe generated by correct_kondrashov_txt(). If so then just use them
                raw_found = list(
                    raw_parent.glob(
                        f"{cfg['in']['probes_prefix']}{probe:0>2}.txt"))
                if raw_found:
                    print('corrected csv file', [r.name for r in raw_found],
                          'found')
                    correct_fun = lambda x: x
                elif not cfg['in']['raw_subdir']:
                    continue

            for file_in in (raw_found or open_csv_or_archive_of_them(
                    raw_parent, binary_mode=False, pattern=raw_pattern_file)):
                file_in = correct_fun(file_in)
                if not file_in:
                    continue
                tbl = f"{cfg['in']['probes_prefix']}{probe:0>2}"
                # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w',  # correct name
                #              re.sub('^[\d_]*|\*', '', file_in.stem).lower()),  # remove date-prefix if in name
                csv2h5(
                    [
                        str(
                            Path(__file__).parent / 'ini' /
                            f"csv_inclin_{'Kondrashov' if subs_made else 'Baranov'}.ini"
                        ),
                        '--path',
                        str(file_in),
                        '--blocksize_int',
                        '50_000_000',  # 50Mbt
                        '--table',
                        tbl,
                        '--db_path',
                        str(db_path),
                        # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'),
                        # '--b_raise_on_err', '0',  # ?
                        '--b_interact',
                        '0',
                        '--fs_float',
                        f'{fs(probe, file_in.stem)}',
                        '--dt_from_utc_seconds',
                        str(cfg['in']['dt_from_utc'].total_seconds()),
                        '--b_del_temp_db',
                        '1',
                    ] +
                    (['--csv_specific_param_dict', 'invert_magnitometr: True']
                     if subs_made else
                     ['--cols_load_list', "yyyy,mm,dd,HH,MM,SS,P,U"]),
                    **{
                        'filter': {
                            'min_date': cfg['filter']['min_date'][probe],
                            'max_date': cfg['filter']['max_date'][probe],
                        }
                    })

                # Get coefs:
                l.info(
                    f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}"
                )
                try:
                    h5copy_coef(cfg['in']['db_coefs'], db_path, tbl)
                except KeyError as e:  # Unable to open object (component not found)
                    l.warning(
                        'No coefs to copy?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                except OSError as e:
                    l.warning(
                        'Not found DB with coefs?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                i_proc_file += 1
            else:
                print('no', raw_pattern_file, end=', ')
            i_proc_probe += 1
        print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.')

    # Calculate velocity and average
    if st(2):
        # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5
        if not cfg['out']['aggregate_period_s']:
            cfg['out']['aggregate_period_s'] = [
                None, 2, 600,
                3600 if 'w' in cfg['in']['probes_prefix'] else 7200
            ]

        if cfg['in']['azimuth_add']:
            if 'Lat' in cfg['in']['azimuth_add']:
                from datetime import datetime
                # add magnetic declination,° for used coordinates
                # todo: get time
                azimuth_add = mag_dec(cfg['in']['azimuth_add']['Lat'],
                                      cfg['in']['azimuth_add']['Lon'],
                                      datetime(2020, 9, 10),
                                      depth=-1)
            else:
                azimuth_add = 0
            if 'constant' in cfg['in']['azimuth_add']:
                # and add constant. For example, subtruct declination at the calibration place if it was applied
                azimuth_add += cfg['in']['azimuth_add'][
                    'constant']  # add -6.65644183° to account for calibration in Kaliningrad
        for aggregate_period_s in cfg['out']['aggregate_period_s']:
            if aggregate_period_s is None:
                db_path_in = db_path
                db_path_out = db_path.with_name(
                    f'{db_path.stem}_proc_noAvg.h5')
            else:
                db_path_in = db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')
                db_path_out = f'{db_path.stem}_proc.h5'  # or separately: '_proc{aggregate_period_s}.h5'

            args = [
                Path(incl_h5clc.__file__).with_name(
                    f'incl_h5clc_{db_path.stem}.yaml'),
                # if no such file all settings are here
                '--db_path',
                str(db_path_in),
                # !   'incl.*|w\d*'  inclinometers or wavegauges w\d\d # 'incl09':
                '--tables_list',
                'incl.*' if not cfg['in']['probes'] else
                f"incl.*(?:{'|'.join('{:0>2}'.format(p) for p in cfg['in']['probes'])})",
                '--aggregate_period',
                f'{aggregate_period_s}S' if aggregate_period_s else '',
                '--out.db_path',
                str(db_path_out),
                '--table',
                f'V_incl_bin{aggregate_period_s}'
                if aggregate_period_s else 'V_incl',
                '--verbose',
                'INFO',  #'DEBUG' get many numba messages
                '--b_del_temp_db',
                '1',
                # '--calc_version', 'polynom(force)',  # depreshiated
                # '--chunksize', '20000',
                # '--not_joined_h5_path', f'{db_path.stem}_proc.h5',
            ]
            # if aggregate_period_s <= 5:   # [s], do not need split csv for big average interval
            #     args += (['--split_period', '1D'])
            if aggregate_period_s is None:  # proc. parameters (if we have saved proc. data then when aggregating we are not processing)
                args += ([
                    '--max_dict',
                    'M[xyz]:4096',
                    # Note: for Baranov's prog 4096 is not suited
                    # '--timerange_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n,"  # not works - use kwarg
                    # '--timerange_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
                    '--split_period',
                    '1D'
                ] if subs_made else [
                    '--bad_p_at_bursts_starts_peroiod',
                    '1H',
                ])
            # csv splitted by 1day (default for no avg) and monolith csv if aggregate_period_s==600
            if aggregate_period_s not in cfg['out'][
                    'aggregate_period_s_not_to_text']:  # , 300, 600]:
                args += ['--text_path', str(db_path.parent / 'text_output')]
            kwarg = {
                'in': {
                    'min_date': cfg['filter']['min_date'][0],
                    'max_date': cfg['filter']['max_date'][0],
                    'timerange_zeroing': cfg['in']['timerange_zeroing'],
                    'azimuth_add': azimuth_add
                }
            }
            # If need all data to be combined one after one:
            # set_field_if_no(kwarg, 'in', {})
            # kwarg['in'].update({
            #
            #         'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0],
            #         'dates_min': min_date.values(),  # in table list order
            #         'dates_max': max_date.values(),  #
            #         })
            # set_field_if_no(kwarg, 'out', {})
            # kwarg['out'].update({'b_all_to_one_col': 'True'})

            incl_h5clc.main(args, **kwarg)

    # Calculate spectrograms.
    if st(3):  # Can be done at any time after step 1

        def raise_ni():
            raise NotImplementedError(
                'Can not proc probes having different fs in one run: you need to do it separately'
            )

        args = [
            Path(incl_h5clc.__file__).with_name(
                f'incl_h5spectrum{db_path.stem}.yaml'),
            # if no such file all settings are here
            '--db_path',
            str(db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')),
            '--tables_list',
            f"{cfg['in']['probes_prefix']}.*",  # inclinometers or wavegauges w\d\d  ## 'w02', 'incl.*',
            # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '',
            '--min_date',
            datetime64_str(cfg['filter']['min_date'][0]),
            '--max_date',
            datetime64_str(cfg['filter']['max_date']
                           [0]),  # '2019-09-09T16:31:00',  #17:00:00
            # '--max_dict', 'M[xyz]:4096',  # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity
            '--out.db_path',
            f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5",
            # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd',
            '--fs_float',
            f"{fs(probes[0], cfg['in']['probes_prefix'])}",
            # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni()
            #
            # '--timerange_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
            # '--verbose', 'DEBUG',
            # '--chunksize', '20000',
            '--b_interact',
            '0',
        ]
        if 'w' in cfg['in']['probes_prefix']:
            args += [
                '--split_period',
                '1H',
                '--dt_interval_minutes',
                '10',  # burst mode
                '--fmin',
                '0.0001',
                '--fmax',
                '4'
            ]
        else:
            args += [
                '--split_period',
                '2H',
                '--fmin',
                '0.0004',  #0.0004
                '--fmax',
                '1.05'
            ]

        incl_h5spectrum.main(args)

    # Draw in Veusz
    if st(4):
        b_images_only = True  # False
        pattern_path = db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz'  # r'vsz_5min\191126_0000_5m_w02.vsz'
        if not b_images_only:
            pattern_bytes_slice_old = re.escape(b'((5828756, 5830223, None),)')

        # Length of not adjacent intervals, s (set None to not allow)
        period = '1D'
        length = '5m'  # period  # '1D'

        dt_custom_s = pd_period_to_timedelta(
            length) if length != period else None  # None  #  60 * 5

        if True:
            # Load starts and assign ends
            t_intervals_start = pd.read_csv(
                cfg['in']['path_cruise'] /
                r'vsz+h5_proc\intervals_selected.txt',
                converters={
                    'time_start': lambda x: np.datetime64(x, 'ns')
                },
                index_col=0).index
            edges = (pd.DatetimeIndex(t_intervals_start),
                     pd.DatetimeIndex(t_intervals_start + dt_custom_s)
                     )  # np.zeros_like()
        else:
            # Generate periodic intervals
            t_interval_start, t_intervals_end = intervals_from_period(
                datetime_range=np.array(
                    [
                        cfg['filter']['min_date']['0'],
                        cfg['filter']['max_date']['0']
                    ],
                    # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'],
                    # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'],
                    # ['2018-11-16T15:19', '2018-12-14T14:35'],
                    # ['2018-10-22T12:30', '2018-10-27T06:30:00'],
                    'datetime64[s]'),
                period=period)
            edges = (pd.DatetimeIndex([t_interval_start
                                       ]).append(t_intervals_end[:-1]),
                     pd.DatetimeIndex(t_intervals_end))

        for i, probe in enumerate(probes):
            probe_name = f"{cfg['in']['probes_prefix']}{probe:02}"  # table name in db
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   edges[0].size)
            # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1):

            cfg_vp = {'veusze': None}
            for i_interval, (t_interval_start,
                             t_interval_end) in enumerate(zip(*edges),
                                                          start=1):

                # if i_interval < 23: #<= 0:  # TEMPORARY Skip this number of intervals
                #     continue
                if period != length:
                    t_interval_start = t_interval_end - pd.Timedelta(
                        dt_custom_s, 's')

                try:  # skipping absent probes
                    start_end = h5q_interval2coord(
                        db_path=str(db_path),
                        table=f'/{probe_name}',
                        t_interval=(t_interval_start, t_interval_end))
                    if not len(start_end):
                        break  # no data
                except KeyError:
                    break  # device name not in specified range, go to next name

                pattern_path_new = pattern_path.with_name(
                    f"{t_interval_start:%y%m%d_%H%M}_{length}_{probe_name}.vsz"
                )

                # Modify pattern file
                if not b_images_only:
                    probe_name_old = re.match('.*((?:incl|w)\d*).*',
                                              pattern_path.name).groups()[0]
                    bytes_slice = bytes(
                        '(({:d}, {:d}, None),)'.format(*(start_end +
                                                         np.int32([-1, 1]))),
                        'ascii')

                    def f_replace(line):
                        """
                        Replace in file
                        1. probe name
                        2. slice
                        """
                        # if i_interval == 1:
                        line, ok = re.subn(bytes(probe_name_old, 'ascii'),
                                           bytes(probe_name, 'ascii'), line)
                        if ok:  # can be only in same line
                            line = re.sub(pattern_bytes_slice_old, bytes_slice,
                                          line)
                        return line

                    if not rep_in_file(pattern_path,
                                       pattern_path_new,
                                       f_replace=f_replace):
                        l.warning('Veusz pattern not changed!')
                        # break
                    elif cfg_vp['veusze']:
                        cfg_vp['veusze'].Load(str(pattern_path_new))
                elif cfg_vp['veusze']:
                    cfg_vp['veusze'].Load(str(pattern_path_new))

                txt_time_range = \
                    """
                    "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \
                    """.format(t_interval_start, t_interval_end)
                print(f'{i_interval}. {txt_time_range}', end=' ')

                cfg_vp = veuszPropagate.main(
                    [
                        Path(veuszPropagate.__file__).parent.with_name(
                            'veuszPropagate.ini'),
                        # '--data_yield_prefix', '-',
                        '--path',
                        str(
                            db_path
                        ),  # use for custom loading from db and some source is required
                        '--tables_list',
                        f'/{probe_name}',  # 181022inclinometers/ \d*
                        '--pattern_path',
                        str(pattern_path_new),
                        # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz',
                        # str(db_path.parent / dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name
                        # '--before_next', 'restore_config',
                        # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{length}",
                        '--filename_fun',
                        f'lambda tbl: "{pattern_path_new.name}"',
                        '--add_custom_list',
                        'USEtime',  # nAveragePrefer',
                        '--add_custom_expressions_list',
                        txt_time_range,
                        # + """
                        # ", 5"
                        # """,
                        '--b_update_existed',
                        'True',
                        '--export_pages_int_list',
                        '1, 2',  # 0 for all '6, 7, 8',  #'1, 2, 3'
                        # '--export_dpi_int', '200',
                        '--export_format',
                        'emf',
                        '--b_interact',
                        '0',
                        '--b_images_only',
                        f'{b_images_only}',
                        '--return',
                        '<embedded_object>',  # reuse to not bloat memory
                    ],
                    veusze=cfg_vp['veusze'])
Beispiel #7
0
def main(new_arg=None):
    """
    1. Obtains command line arguments (for description see my_argparser()) that can be passed from new_arg and ini.file
    also.
    2. Loads device data of calibration in laboratory from cfg['in']['db_path']
    2. Calibrates configured by cfg['in']['channels'] channels ('accelerometer' and/or 'magnetometer'): soft iron
    3. Wrong implementation - not use cfg['in']['timerange_nord']! todo: Rotate compass using cfg['in']['timerange_nord']
    :param new_arg: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:

    :return:
    """

    global l

    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg:
        return
    if cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    l.info("%s(%s) channels: %s started. ", this_prog_basename(__file__),
           cfg['in']['tables'], cfg['in']['channels'])
    fig = None
    fig_filt = None
    channel = 'accelerometer'  # 'magnetometer'
    fig_save_dir_path = cfg['in']['db_path'].parent
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        if len(cfg['in']['tables']) == 1:
            cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0])
        coefs = {}
        for itbl, tbl in enumerate(cfg['in']['tables'], start=1):
            probe_number = int(re.findall('\d+', tbl)[0])
            l.info(f'{itbl}. {tbl}: ')
            if isinstance(cfg['in']['timerange'],
                          Mapping):  # individual interval for each table
                if probe_number in cfg['in']['timerange']:
                    timerange = cfg['in']['timerange'][probe_number]
                else:
                    timerange = None
            else:
                timerange = cfg['in'][
                    'timerange']  # same interval for each table
            a = load_hdf5_data(store, table=tbl, t_intervals=timerange)
            # iUseTime = np.searchsorted(stime, [np.array(s, 'datetime64[s]') for s in np.array(strTimeUse)])
            coefs[tbl] = {}
            for channel in cfg['in']['channels']:
                print(f' channel "{channel}"', end=' ')
                (col_str, coef_str) = channel_cols(channel)

                # filtering # col_str == 'A'?
                if True:
                    b_ok = np.zeros(a.shape[0], bool)
                    for component in ['x', 'y', 'z']:
                        b_ok |= is_works(
                            a[col_str + component],
                            noise=cfg['filter']['no_works_noise'][channel])
                    l.info('Filtered not working area: %2.1f%%',
                           (b_ok.size - b_ok.sum()) * 100 / b_ok.size)
                    # vec3d = np.column_stack(
                    #     (a[col_str + 'x'], a[col_str + 'y'], a[col_str + 'z']))[:, b_ok].T  # [slice(*iUseTime.flat)]
                    vec3d = a.loc[
                        b_ok, [col_str + 'x', col_str + 'y', col_str +
                               'z']].to_numpy(float).T
                    index = a.index[b_ok]

                    vec3d, b_ok, fig_filt = filter_channes(
                        vec3d,
                        index,
                        fig_filt,
                        fig_save_prefix=
                        f"{fig_save_dir_path / tbl}-'{channel}'",
                        blocks=cfg['filter']['blocks'],
                        offsets=cfg['filter']['offsets'],
                        std_smooth_sigma=cfg['filter']['std_smooth_sigma'])

                A, b = calibrate(vec3d)
                window_title = f"{tbl} '{channel}' channel ellipse"
                fig = calibrate_plot(vec3d,
                                     A,
                                     b,
                                     fig,
                                     window_title=window_title)
                fig.savefig(fig_save_dir_path / (window_title + '.png'),
                            dpi=300,
                            bbox_inches="tight")
                A_str, b_str = coef2str(A, b)
                l.info(
                    'Calibration coefficients calculated: \nA = \n%s\nb = \n%s',
                    A_str, b_str)
                coefs[tbl][channel] = {'A': A, 'b': b}

            # Zeroing Nord direction
            timerange_nord = cfg['in']['timerange_nord']
            if isinstance(timerange_nord, Mapping):
                timerange_nord = timerange_nord.get(probe_number)
            if timerange_nord:
                coefs[tbl]['M']['azimuth_shift_deg'] = zeroing_azimuth(
                    store, tbl, timerange_nord, calc_vel_flat_coef(coefs[tbl]),
                    cfg['in'])
            else:
                l.info('no zeroing Nord')
    # Write coefs
    for cfg_output in (['in', 'out'] if cfg['out'].get('db_path') else ['in']):
        l.info(f"Write to {cfg[cfg_output]['db_path']}")
        for itbl, tbl in enumerate(cfg['in']['tables'], start=1):
            # i_search = re.search('\d*$', tbl)
            # for channel in cfg['in']['channels']:
            #     (col_str, coef_str) = channel_cols(channel)
            #     dict_matrices = {f'//coef//{coef_str}//A': coefs[tbl][channel]['A'],
            #                      f'//coef//{coef_str}//C': coefs[tbl][channel]['b'],
            #                      }
            #     if channel == 'M':
            #         if coefs[tbl]['M'].get('azimuth_shift_deg'):
            #             dict_matrices[f'//coef//{coef_str}//azimuth_shift_deg'] = coefs[tbl]['M']['azimuth_shift_deg']
            #         # Coping probe number to coefficient to can manually check when copy manually
            #         if i_search:
            #             try:
            #                 dict_matrices['//coef//i'] = int(i_search.group(0))
            #             except Exception as e:
            #                 pass
            dict_matrices = dict_matrices_for_h5(coefs[tbl], tbl,
                                                 cfg['in']['channels'])
            h5copy_coef(None,
                        cfg[cfg_output]['db_path'],
                        tbl,
                        dict_matrices=dict_matrices)
Beispiel #8
0
def main(new_arg=None):
    """

    :param new_arg: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:
        'csv2h5_nav_supervisor.ini'
        'csv2h5_IdrRedas.ini'
        'csv2h5_Idronaut.ini'
    :return:
    """

    global l
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')
    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**{
                **cfg['in'], 'path': cfg['in']['db_path']
            },
                                      b_interact=cfg['program']['b_interact'])
        set_field_if_no(
            cfg['in'], 'tables_log', '{}/logFiles'
        )  # will be filled by each table from cfg['in']['tables']
        cfg['in']['query'] = query_time_range(**cfg['in'])
        set_field_if_no(cfg['out'], 'db_path', cfg['in']['db_path'])
        # cfg['out'] = init_file_names(cfg['out'], , path_field='db_path')
    except Ex_nothing_done as e:
        print(e.message)
        return ()

    # args = parser.parse_args()
    # args.verbose= args.verbose[0]
    # try:
    #     cfg= ini2dict(args.cfgFile)
    #     cfg['in']['cfgFile']= args.cfgFile
    # except IOError as e:
    #     print('\n==> '.join([a for a in e.args if isinstance(a,str)])) #e.message
    #     raise(e)
    # Open text log
    if 'log' in cfg['program'].keys():
        dir_create_if_need(os_path.dirname(cfg['program']['log']))
        flog = open(cfg['program']['log'], 'a+', encoding='cp1251')

    cfg['out']['log'] = OrderedDict({'fileName': None, 'fileChangeTime': None})

    # Prepare saving to csv
    if 'file_names_add_fun' in cfg['out']:
        file_names_add = eval(
            compile(cfg['out']['file_names_add_fun'], '', 'eval'))
    else:
        file_names_add = lambda i: '.csv'  # f'_{i}.csv'

    # Prepare data for output store and open it
    if cfg['out']['tables'] == ['None']:
        # will not write new data table and its log
        cfg['out']['tables'] = None
        # cfg['out']['tables_log'] = None  # for _runs cfg will be redefined (this only None case that have sense?)

    h5init(cfg['in'], cfg['out'])
    # store, dfLogOld = h5temp_open(**cfg['out'])

    cfg_fileN = os_path.splitext(cfg['in']['cfgFile'])[0]
    out_tables_log = cfg['out'].get('tables_log')
    if cfg_fileN.endswith('_runs') or (bool(out_tables_log)
                                       and 'logRuns' in out_tables_log[0]):

        # Will calculate only after filter  # todo: calculate derived parameters before were they are bad (or replace all of them if any bad?)
        func_before_cycle = lambda x: None
        func_before_filter = lambda df, log_row, cfg: df
        func_after_filter = lambda df, cfg: log_runs(df, cfg, cfg['out']['log']
                                                     )

        # this table will be added:
        cfg['out']['tables_log'] = [cfg['out']['tables'][0] + '/logRuns']
        cfg['out'][
            'b_log_ready'] = True  # to not apdate time range in h5_append()

        # Settings to not affect main data table and switch off not compatible options:
        cfg['out']['tables'] = []
        cfg['out'][
            'b_skip_if_up_to_date'] = False  # todo: If False check it: need delete all previous result of CTD_calc() or set min_time > its last log time. True not implemented?
        cfg['program'][
            'b_log_display'] = False  # can not display multiple rows log
        if 'b_save_images' in cfg['extract_runs']:
            cfg['extract_runs']['path_images'] = cfg['out'][
                'db_path'].with_name('_subproduct')
            dir_create_if_need(cfg['extract_runs']['path_images'])
    else:
        if 'brown' in cfg_fileN.lower():
            func_before_cycle = load_coef
            if 'Lat' in cfg['in']:
                func_before_filter = lambda *args, **kwargs: add_ctd_params(
                    process_brown(*args, **kwargs), kwargs['cfg'])
            else:
                func_before_filter = process_brown
        else:
            func_before_cycle = lambda x: None

            def ctd_coord_and_params(df: pd.DataFrame, log_row, cfg):
                coord_data_col_ensure(df, log_row)
                return add_ctd_params(df, cfg)

            func_before_filter = ctd_coord_and_params
        func_after_filter = lambda df, cfg: df  # nothing after filter

    func_before_cycle(cfg)  # prepare: usually assign data to cfg['for']
    if cfg['out'].get('path_csv'):
        dir_create_if_need(cfg['out']['path_csv'])
    # Load data Main circle #########################################
    # Open input store and cicle through input table log records
    qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')"
    iSt = 1

    dfLogOld, cfg['out']['db'], cfg['out'][
        'b_skip_if_up_to_date'] = h5temp_open(**cfg['out'])
    b_out_db_is_different = cfg['out']['db'] is not None and cfg['out'][
        'db_path_temp'] != cfg['in']['db_path']
    # Cycle for each table, for each row in log:
    # for path_csv in gen_names_and_log(cfg['out'], dfLogOld):
    with FakeContextIfOpen(
            lambda f: pd.HDFStore(f, mode='r'), cfg['in']['db_path'],
            None if b_out_db_is_different else cfg['out']['db']
    ) as cfg['in']['db']:  # not opens ['in']['db'] if already opened to write

        for tbl in cfg['in']['tables']:
            if False:  # Show table info
                nodes = sorted(
                    cfg['out']['db'].root.__members__)  # , key=number_key
                print(nodes)
            print(tbl, end='. ')

            df_log = cfg['in']['db'].select(cfg['in']['tables_log'].format(tbl)
                                            or tbl,
                                            where=cfg['in']['query'])
            if True:  # try:
                if 'log' in cfg['program'].keys():
                    nRows = df_log.rows.size
                    flog.writelines(datetime.now().strftime(
                        '\n\n%d.%m.%Y %H:%M:%S> processed ') + f'{nRows} row' +
                                    ('s:' if nRows > 1 else ':'))

                for ifile, r in enumerate(df_log.itertuples(),
                                          start=iSt):  # name=None
                    print('.', end='')
                    sys_stdout.flush()

                    path_raw = PurePath(r.fileName)
                    cfg['out']['log'].update(fileName=path_raw.name,
                                             fileChangeTime=r.fileChangeTime)
                    # save current state
                    cfg['in']['file_stem'] = cfg['out']['log'][
                        'fileName']  # for exmple to can extract date in subprogram
                    cfg['in']['fileChangeTime'] = cfg['out']['log'][
                        'fileChangeTime']

                    if cfg['in']['b_skip_if_up_to_date']:
                        have_older_data, have_duplicates = h5del_obsolete(
                            cfg['out'], cfg['out']['log'], dfLogOld)
                        if have_older_data:
                            continue
                        if have_duplicates:
                            cfg['out']['b_remove_duplicates'] = True
                    print('{}. {}'.format(ifile, path_raw.name), end=': ')

                    # Load data
                    qstr = qstr_trange_pattern.format(r.Index, r.DateEnd)
                    df_raw = cfg['in']['db'].select(tbl, qstr)
                    cols = df_raw.columns.tolist()

                    # cfg['in']['lat'] and ['lon'] may be need in add_ctd_params() if Lat not in df_raw
                    if 'Lat_en' in df_log.columns and 'Lat' not in cols:
                        cfg['in']['lat'] = np.nanmean((r.Lat_st, r.Lat_en))
                        cfg['in']['lon'] = np.nanmean((r.Lon_st, r.Lon_en))

                    df = func_before_filter(df_raw, log_row=r, cfg=cfg)

                    if df.size:  # size is zero means save only log but not data
                        # filter, updates cfg['out']['log']['rows']
                        df, _ = set_filterGlobal_minmax(
                            df, cfg['filter'], cfg['out']['log'])
                    if 'rows' not in cfg['out']['log']:
                        l.warning('no data!')
                        continue
                    elif isinstance(cfg['out']['log']['rows'], int):
                        print('filtered out {rows_filtered}, remains {rows}'.
                              format_map(cfg['out']['log']))
                        if cfg['out']['log']['rows']:
                            print('.', end='')
                        else:
                            l.warning('no data!')
                            continue

                    df = func_after_filter(df, cfg=cfg)

                    # Append to Store
                    h5_append(cfg['out'],
                              df,
                              cfg['out']['log'],
                              log_dt_from_utc=cfg['in']['dt_from_utc'])

                    # Copy to csv
                    if cfg['out'].get('path_csv'):
                        fname = '{:%y%m%d_%H%M}-{:%d_%H%M}'.format(
                            r.Index, r.DateEnd) + file_names_add(ifile)
                        if not 'data_columns' in cfg['out']:
                            cfg['out']['data_columns'] = slice(0,
                                                               -1)  # all cols
                        df.to_csv(  # [cfg['out']['data_columns']]
                            cfg['out']['path_csv'] / fname,
                            date_format=cfg['out']['text_date_format'],
                            float_format='%5.6g',
                            index_label='Time'
                        )  # to_string, line_terminator='\r\n'

                    # Log to screen (if not prohibited explicitly)
                    if cfg['out']['log'].get('Date0') is not None and (
                        ('b_log_display' not in cfg['program'])
                            or cfg['program']['b_log_display']):
                        str_log = '{fileName}:\t{Date0:%d.%m.%Y %H:%M:%S}-' \
                                  '{DateEnd:%d. %H:%M:%S%z}\t{rows}rows'.format_map(
                            cfg['out']['log'])  # \t{Lat}\t{Lon}\t{strOldVal}->\t{mag}
                        l.info(str_log)
                    else:
                        str_log = str(cfg['out']['log'].get('rows', '0'))
                    # Log to logfile
                    if 'log' in cfg['program'].keys():
                        flog.writelines('\n' + str_log)

    if b_out_db_is_different:
        try:
            if cfg['out']['tables'] is not None:
                print('')
                if cfg['out']['b_remove_duplicates']:
                    h5remove_duplicates(cfg['out'],
                                        cfg_table_keys=('tables',
                                                        'tables_log'))
                # Create full indexes. Must be done because of using ptprepack in h5move_tables() below
                l.debug('Create index')
                for tblName in (cfg['out']['tables'] +
                                cfg['out']['tables_log']):
                    try:
                        cfg['out']['db'].create_table_index(tblName,
                                                            columns=['index'],
                                                            kind='full')
                    except Exception as e:
                        l.warning(
                            ': table {}. Index not created - error'.format(
                                tblName), '\n==> '.join(
                                    [s for s in e.args if isinstance(s, str)]))
        except Exception as e:
            l.exception('The end. There are error ')

            import traceback, code
            from sys import exc_info as sys_exc_info
            tb = sys_exc_info()[2]  # type, value,
            traceback.print_exc()
            last_frame = lambda tb=tb: last_frame(tb.tb_next
                                                  ) if tb.tb_next else tb
            frame = last_frame().tb_frame
            ns = dict(frame.f_globals)
            ns.update(frame.f_locals)
            code.interact(local=ns)
        finally:

            cfg['out']['db'].close()
            if cfg['program']['log']:
                flog.close()
            if cfg['out']['db'].is_open:
                print('Wait store is closing...')
                sleep(2)

            failed_storages = h5move_tables(cfg['out'])
            print('Finishing...' if failed_storages else 'Ok.', end=' ')
            h5index_sort(
                cfg['out'],
                out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
                in_storages=failed_storages)
Beispiel #9
0
def main(new_arg=None):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')

    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**cfg['in'],
                                      b_interact=cfg['program']['b_interact'],
                                      cfg_search_parent=cfg['out'])
        h5init(cfg['in'], cfg['out'])
    except Ex_nothing_done as e:
        print(e.message)
        exit()

    df_dummy = pd.DataFrame(
        np.full(1,
                np.NaN,
                dtype=np.dtype({
                    'formats': ['float64', 'float64'],
                    'names': cfg['out']['tracks_cols'][1:]
                })),
        index=(pd.NaT, ))  # used for insert separator lines

    if 'routes_cols' not in cfg['in']:
        cfg['in']['routes_cols'] = cfg['in']['waypoints_cols']
    if 'routes_cols' not in cfg['out']:
        cfg['out']['routes_cols'] = cfg['out'][
            'waypoints_cols']  # cfg['in']['routes_cols']  #
    # Writing
    if True:  # try:
        l.warning('processing ' + str(cfg['in']['nfiles']) + ' file' +
                  's:' if cfg['in']['nfiles'] > 1 else ':')
        cfg['out']['log'] = {}
        set_field_if_no(cfg['out'], 'table_prefix',
                        PurePath(cfg['in']['path']).stem)
        cfg['out']['table_prefix'] = cfg['out']['table_prefix'].replace(
            '-', '')
        if len([t for t in cfg['out']['tables'] if len(t)]) > 1:
            cfg['out']['tables'] = \
                [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables']]
            cfg['out']['tables_log'] = \
                [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables_log']]

        tables = dict(zip(df_names, cfg['out']['tables']))
        tables_log = dict(zip(df_names, cfg['out']['tables_log']))
        # Can not save path to DB (useless?) so set  for this max file name length:
        set_field_if_no(cfg['out'], 'logfield_fileName_len', 50)
        cfg['out']['index_level2_cols'] = cfg['in']['routes_cols'][0]

        # ###############################################################
        # ## Cumulate all data in cfg['out']['path_temp'] ##################
        ## Main circle ############################################################
        for i1_file, path_gpx in h5_dispenser_and_names_gen(
                cfg['in'], cfg['out']):
            l.info('{}. {}: '.format(i1_file, path_gpx.name))
            # Loading data
            dfs = gpxConvert(cfg, path_gpx)
            print('write', end=': ')
            sys_stdout.flush()
            for key, df in dfs.items():
                if (not tables.get(key)) or df.empty:
                    continue
                elif key == 'tracks':
                    # Save last time to can filter next file
                    cfg['in']['time_last'] = df.index[-1]

                sort_time = False if key in {'waypoints', 'routes'} else None

                # monkey patching
                if 'tracker' in tables[key]:
                    # Also {} must be in tables[key]. todo: better key+'_fun_tracker' in cfg['out']?
                    # Trackers processing
                    trackers_numbers = {
                        '0-3106432': '1',
                        '0-2575092': '2',
                        '0-3124620': '3',
                        '0-3125300': '4',
                        '0-3125411': '5',
                        '0-3126104': '6'
                    }
                    tables_pattern = tables[key]
                    tables_log_pattern = tables_log[key]

                    df['comment'] = df['comment'].str.split(" @",
                                                            n=1,
                                                            expand=True)[0]
                    # split data and save to multipe tables
                    df_all = df.set_index(['comment', df.index])
                    for sn, n in trackers_numbers.items(
                    ):  # set(df_all.index.get_level_values(0))
                        try:
                            df = df_all.loc[sn]
                        except KeyError:
                            continue
                        # redefine saving parameters
                        cfg['out']['table'] = tables_pattern.format(
                            trackers_numbers[sn])
                        cfg['out']['table_log'] = tables_log_pattern.format(
                            trackers_numbers[sn])
                        call_with_valid_kwargs(df_filter_and_save_to_h5,
                                               df**cfg,
                                               input=cfg['in'],
                                               sort_time=sort_time)
                else:
                    cfg['out']['table'] = tables[key]
                    cfg['out']['table_log'] = tables_log[key]
                    call_with_valid_kwargs(df_filter_and_save_to_h5,
                                           df,
                                           **cfg,
                                           input=cfg['in'],
                                           sort_time=sort_time)

    # try:
    # if cfg['out']['b_remove_duplicates']:
    #     for tbls in cfg['out']['tables_have_wrote']:
    #         for tblName in tbls:
    #             cfg['out']['db'][tblName].drop_duplicates(keep='last', inplace= True)
    # print('Create index', end=', ')

    # create_table_index calls create_table which docs sais "cannot index Time64Col() or ComplexCol"
    # so load it, index, then save
    # level2_index = None
    # df = cfg['out']['db'][tblName] # last commented
    # df.set_index([navp_all_index, level2_index])
    # df.sort_index()

    # cfg['out']['db'][tblName].sort_index(inplace=True)

    # if df is not None:  # resave
    #     df_log = cfg['out']['db'][tblName]
    #     cfg['out']['db'].remove(tbls[0])
    #     cfg['out']['db'][tbls[0]] = df
    #     cfg['out']['db'][tbls[1]] = df_log

    try:
        pass
    except Exception as e:
        print('The end. There are error ', standard_error_info(e))

    #     import traceback, code
    #     from sys import exc_info as sys_exc_info
    #
    #     tb = sys_exc_info()[2]  # type, value,
    #     traceback.print_exc()
    #     last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb
    #     frame = last_frame().tb_frame
    #     ns = dict(frame.f_globals)
    #     ns.update(frame.f_locals)
    #     code.interact(local=ns)
    # finally:
    #     cfg['out']['db'].close()
    #     failed_storages= h5move_tables(cfg['out'], cfg['out']['tables_have_wrote'])

    try:
        failed_storages = h5move_tables(cfg['out'],
                                        tbl_names=cfg['out'].get(
                                            'tables_have_wrote', set()))
        print('Finishing...' if failed_storages else 'Ok.', end=' ')
        # Sort if have any processed data that needs it (not the case for the routes and waypoints), else don't because ``ptprepack`` not closes hdf5 source if it not finds data
        if cfg['in'].get('time_last'):
            cfg['out']['b_remove_duplicates'] = True
            h5index_sort(
                cfg['out'],
                out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
                in_storages=failed_storages,
                tables=cfg['out'].get('tables_have_wrote', set()))
    except Ex_nothing_done:
        print('ok')
Beispiel #10
0
def main(new_arg=None, veusze=None, **kwargs):
    """
    Initialise configuration and runs or returns routines
    cfg:
        ['program']['log'],
        'out'
        'in'
        'async'
    globals:
        load_vsz
        l

    :param new_arg:
    :param veusze: used to reuse veusz embedded object (thus to not leak memory)
    :return:
    """
    global l, load_vsz
    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    cfg['program']['log'] = l.root.handlers[
        0].baseFilename  # sinchronize obtained absolute file name

    print('\n' + this_prog_basename(__file__), 'started', end=' ')
    __name__ = '__main__'  # indicate to other functions that they are called from main

    if cfg['out'].get('paths'):
        if not cfg['out']['b_images_only']:
            raise NotImplementedError(
                'Provided out in not "b_images_only" mode!')
        cfg['out']['nfiles'] = len(cfg['out']['paths'])
        cfg['out']['path'] = cfg['out']['paths'][0]
        print(
            end=f"\n- {cfg['out']['nfiles']} output files to export images...")
        pass
    else:
        if cfg['out']['b_images_only']:
            print(
                'in images only mode. Output pattern: ')  # todo Export path: '
        else:
            print('. Output pattern and Data: ')

        try:
            # Using cfg['out'] to store pattern information
            if not Path(cfg['in']['pattern_path']).is_absolute():
                cfg['in']['pattern_path'] = Path(cfg['in']['path']).with_name(
                    str(cfg['in']['pattern_path']))
            cfg['out']['path'] = cfg['in']['pattern_path']
            cfg['out']['paths'], cfg['out']['nfiles'], cfg['out'][
                'path'] = init_file_names(**cfg['out'], b_interact=False)
        except Ex_nothing_done as e:
            if not cfg['out']['b_images_only']:
                l.warning(
                    f'{e.message} - no pattern. Specify it or use "b_images_only" mode!'
                )
                return  # or raise FileNotFoundError?

    if (cfg['out']['b_images_only'] and cfg['out']['paths']):
        cfg['in']['paths'] = cfg['out']['paths']  # have all we need to export
    else:
        try:
            cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
                'path'] = init_file_names(**cfg['in'],
                                          b_interact=cfg['program']
                                          ['b_interact'])
        except Ex_nothing_done as e:
            print(e.message)
            return  # or raise FileNotFoundError?
        except TypeError:  # expected str, bytes or os.PathLike object, not NoneType
            # cfg['in']['path'] is None. May be it is not need
            cfg['in']['paths'] = [cfg['in']['pattern_path']
                                  ]  # dummy for compatibility
            cfg['in']['nfiles'] = 1

    cfg['out']['export_dir'] = dir_from_cfg(cfg['out']['path'].parent,
                                            cfg['out']['export_dir'])

    if 'restore_config' in cfg['program']['before_next']:
        cfg['in_saved'] = cfg['in'].copy()
    # Next is commented because reloading is Ok: not need to Close()
    # if cfg['out']['b_images_only'] and not 'Close()' in cfg['program']['before_next']:
    #     cfg['program']['before_next'].append(
    #         'Close()')  # usually we need to load new file for export (not only modify previous file)
    if cfg['program']['export_timeout_s'] and export_images_timed:
        cfg['async'] = {
            'loop': asyncio.get_event_loop(),
            'export_timeout_s': cfg['program']['export_timeout_s']
        }
    else:
        cfg['async'] = {'loop': None}

    load_vsz = load_vsz_closure(cfg['program']['veusz_path'],
                                cfg['program']['load_timeout_s'],
                                cfg['program']['b_execute_vsz'])
    cfg['load_vsz'] = load_vsz
    cfg['co'] = {}
    if cfg['in']['table_log'] and cfg['in']['path'].suffix == '.h5' and not (
            cfg['out']['b_images_only'] and len(cfg['in']['paths']) > 1):
        # load data by ranges from table log rows
        cfg['in']['db_path'] = cfg['in']['path']
        in_fulls = h5log_names_gen(cfg['in'])
    elif cfg['in']['tables']:
        # tables instead files
        in_fulls = ge_names_from_hdf5_paths(cfg)
    else:  # switch to use found vsz as source if need only export images (even with database source)
        in_fulls = ge_names(cfg)

    cor_savings = co_savings(cfg)
    cor_savings.send(None)
    nfiles = 0
    try:  # if True:
        path_prev = os_getcwd()
        os_chdir(cfg['out']['path'].parent)
        if cfg['program']['return'] == '<corutines_in_cfg>':
            cfg['co']['savings'] = cor_savings
            cfg['co']['gen_veusz_and_logs'] = load_to_veusz(in_fulls, cfg)
            cfg['co']['send_data'] = co_send_data(load_to_veusz, cfg,
                                                  cor_savings)
            return cfg  # return with link to generator function
        elif cfg['in'].get('data_yield_prefix'):
            # Cycle with obtaining Veusz data
            cfgin_update = None
            while True:  # for vsz_data, log in cor_send_data.send(cfgin_update):
                try:
                    vsz_data, log = co_send_data.send(cfgin_update)
                    nfiles += 1
                except (GeneratorExit, StopIteration, Ex_nothing_done):
                    break
                if 'f_custom_in_cycle' in cfg['program']:
                    cfgin_update = cfg['program']['f_custom_in_cycle'](
                        vsz_data, log)
        else:
            # Cycle without obtaining Veusz data (or implemented by user's cfg['program']['f_custom_in_cycle'])
            for veusze, log in load_to_veusz(in_fulls, cfg, veusze):
                file_name_r = Path(log['out_vsz_full']).relative_to(
                    cfg['out']['path'].parent)
                if cfg['program'].get('f_custom_in_cycle'):
                    cfgin_update = cfg['program']['f_custom_in_cycle'](veusze,
                                                                       log)
                    veusze_commands(veusze, cfgin_update, file_name_r)
                cor_savings.send((veusze, log))
                nfiles += 1
            cor_savings.close()
            if cfg['program']['return'] != '<embedded_object>':
                veusze = None  # to note that it is closed in cor_savings.close()
        print(f'{nfiles} processed. ok>')

        pass
    except Exception as e:
        l.exception('Not good')
        return  # or raise FileNotFoundError?
    finally:
        if cfg['async']['loop']:
            cfg['async']['loop'].close()
        os_chdir(path_prev)
        if veusze and cfg['program']['return'] == '<end>':
            veusze.Close()
            veusze.WaitForClose()
            veusze = None
        elif cfg['program']['return'] == '<embedded_object>':
            cfg['veusze'] = veusze
            return cfg
Beispiel #11
0
def main(new_arg=None, **kwargs):
    global l

    if __package__ is None:
        from sys import path as sys_path
        from os import path as os_path
        sys_path.append(
            os_path.dirname(os_path.dirname(os_path.abspath(__file__))))

    from utils2init import prep

    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)

    # Input files
    default_input_filemask = '*.xml'
    inD, namesFE, nFiles, outD, outF, outE, bWrite2dir, msgFile = prep(
        {
            'path': cfg['in']['path'],
            'out_path': cfg['out']['path']
        }, default_input_filemask)

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    l.warning('\n' + this_prog_basename(__file__) + ' started. ')

    l.warning(msgFile)
    # set_field_if_no(cfg['out'], 'dt_between_track_segments', 99999)

    gpx = parse_smses(cfg)
    try:
        f = open(cfg['in']['path'].with_suffix('.gpx'), 'w')
        bMissedCoordTo0 = 'b_missed_coord_to_zeros' in cfg['process'] and cfg[
            'process']['b_missed_coord_to_zeros']
        if bMissedCoordTo0:
            for p in gpx.walk(only_points=True):
                if p.latitude is None or p.longitude is None:
                    p.latitude = '0'  # float('NaN') #0
                    p.longitude = '0'  # float('NaN') #0
                # if p_prev==p:
                # p.delete
                # p_prev= p

        # gpx.add_missing_data() #remove_empty()
        f.write(gpx.to_xml())
        print('ok')
    except Ex_nothing_done as e:
        print(e.message)
    except Exception as e:
        msg_option = f'The end. There are error {standard_error_info(e)}'
        print(msg_option)
        try:
            err_msg = e.msg
            l.error(' '.join([err_msg, msg_option]))
        except AttributeError:
            l.error(msg_option)
    finally:
        f.close()
        try:
            # if not bWrite2dir:
            #     fp_out.close()
            # l.handlers[0].flush()
            logging.shutdown()
        except:
            pass
Beispiel #12
0
def main(new_arg=None, **kwargs):
    """

    :param new_arg: list of strings, command line arguments
    :kwargs: dicts for each section: to overwrite values in them (overwrites even high priority values, other values remains)
    Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:
        'csv2h5_nav_supervisor.ini'
        'csv2h5_IdrRedas.ini'
        'csv2h5_Idronaut.ini'

    :return:
    """
    global l

    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')
    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**cfg['in'],
                                      b_interact=cfg['program']['b_interact'])
    except Ex_nothing_done as e:
        print(e.message)
        return ()

    bOld_FF00FF = False
    # if 'TermGrunt' in sys.argv[1] FF00FF' in str(cfg['in']['path']):  # 'TermGrunt.h5'  ? args.path.endswith ('bin'):
    #     bOld_FF00FF = True
    #     cfg['in'].update({
    #     'header': 'TERM',
    #     'dt_from_utc': timedelta(hours=-1),
    #     'fs': 1, 'b_time_fromtimestamp': True,
    #     'b_time_fromtimestamp_source': False})
    # else:  # 'Katran.h5'
    #     cfg['in'].update({
    #     'delimiter_hex': '000000E6',
    #     'header': 'P, Temp, Cond',
    #     'dt_from_utc': timedelta(hours=0),
    #     'fs': 10, 'b_time_fromtimestamp': False,
    #     'b_time_fromtimestamp_source': False})

    set_field_if_no(
        cfg['in'], 'dtype', 'uint{:d}'.format(2**(3 + np.searchsorted(
            2**np.array([3, 4, 5, 6, 7]) > np.array(
                8 * (cfg['in']['data_word_len'] - 1)), 1))))

    # Prepare cpecific format loading and writing
    set_field_if_no(cfg['in'], 'coltime', [])
    cfg['in'] = init_input_cols(cfg['in'])
    cfg['out']['names'] = np.array(cfg['in']['dtype'].names)[ \
        cfg['in']['cols_loaded_save_b']]
    cfg['out']['formats'] = [
        cfg['in']['dtype'].fields[n][0] for n in cfg['out']['names']
    ]
    cfg['out']['dtype'] = np.dtype({
        'formats': cfg['out']['formats'],
        'names': cfg['out']['names']
    })
    h5init(cfg['in'], cfg['out'])

    # cfg['Period'] = 1.0 / cfg['in']['fs']  # instead Second can use Milli / Micro / Nano:
    # cfg['pdPeriod'] = pd.to_timedelta(cfg['Period'], 's')
    # #pd.datetools.Second(cfg['Period'])\
    #     if 1 % cfg['in']['fs'] == 0 else\
    #     pd.datetools.Nano(cfg['Period'] * 1e9)

    # log table of loaded files. columns: Start time, file name, and its index in array off all loaded data:
    log_item = cfg['out']['log'] = {
    }  # fields will have: 'fileName': None, 'fileChangeTime': None, 'rows': 0

    strLog = ''
    # from collections import namedtuple
    # type_log_files = namedtuple('type_log_files', ['label','iStart'])
    # log.sort(axis=0, order='log_item['Date0']')#sort files by time

    dfLogOld, cfg['out']['db'], cfg['out'][
        'b_skip_if_up_to_date'] = h5temp_open(**cfg['out'])
    if 'log' in cfg['program'].keys():
        f = open(PurePath(sys_argv[0]).parent / cfg['program']['log'],
                 'a',
                 encoding='cp1251')
        f.writelines(
            datetime.now().strftime('\n\n%d.%m.%Y %H:%M:%S> processed ' +
                                    str(cfg['in']['nfiles']) + ' file' +
                                    's:' if cfg['in']['nfiles'] > 1 else ':'))
    b_remove_duplicates = False  # normally no duplicates but will if detect
    # Config specially for readBinFramed
    set_field_if_no(cfg['in'], 'b_byte_order_is_big_endian', True)
    set_field_if_no(cfg['in'], 'b_baklan', False)
    set_field_if_no(cfg['in'], 'b_time_fromtimestamp_source', False)
    cfg['out']['fs'] = cfg['in']['fs']
    if True:
        ## Main circle ############################################################
        for i1_file, path_in in h5_dispenser_and_names_gen(
                cfg['in'], cfg['out']):
            l.info('{}. {}: '.format(i1_file, path_in.name))

            # Loading data
            if bOld_FF00FF:
                V = readFF00FF(path_in, cfg)
                iFrame = np.arange(len(V))
            else:
                V, iFrame = readBinFramed(path_in, cfg['in'])
            if ('b_time_fromtimestamp' in cfg['in'] and cfg['in']['b_time_fromtimestamp']) or \
                    ('b_time_fromtimestamp_source' in cfg['in'] and cfg['in']['b_time_fromtimestamp_source']):
                path_in_rec = os_path.join(
                    'd:\\workData\\_source\\BalticSea\\151021_T1Grunt_Pregol\\_source\\not_corrected',
                    os_path.basename(path_in)[:-3] + 'txt'
                ) if cfg['in']['b_time_fromtimestamp_source'] else path_in
                log_item['Date0'] = datetime.fromtimestamp(
                    os_path.getmtime(path_in_rec))  # getctime is bad
                log_item['Date0'] -= iFrame[-1] * timedelta(
                    seconds=1 / cfg['in']['fs']
                )  # use for computer filestamp at end of recording
            else:
                log_item['Date0'] = datetime.strptime(
                    path_in.stem, cfg['in']['filename2timestart_format'])
            log_item['Date0'] += cfg['in']['dt_from_utc']
            tim = log_item['Date0'] + iFrame * timedelta(
                seconds=1 / cfg['in']['fs']
            )  # tim = pd.date_range(log_item['Date0'], periods=np.size(V, 0), freq=cfg['pdPeriod'])
            df = pd.DataFrame(
                V.view(dtype=cfg['out']['dtype']),  # np.uint16
                columns=cfg['out']['names'],
                index=tim)
            # pd.DataFrame(V, columns=cfg['out']['names'], dtype=cfg['out']['formats'], index=tim)
            if df.empty:  # log['rows']==0
                print('No data => skip file')
                continue

            df, tim = set_filterGlobal_minmax(df,
                                              cfg_filter=cfg['filter'],
                                              log=log_item,
                                              dict_to_save_last_time=cfg['in'])
            if log_item['rows_filtered']:
                print('filtered out {}, remains {}'.format(
                    log_item['rows_filtered'], log_item['rows']))
            if not log_item['rows']:
                l.warning('no data! => skip file')
                continue
            elif log_item['rows']:
                print(
                    '.', end=''
                )  # , divisions=d.divisions), divisions=pd.date_range(tim[0], tim[-1], freq='1D')
            else:
                l.warning('no data! => skip file')
                continue

            # Append to Store
            h5_append(cfg['out'], df.astype('int32'), log_item)

            if 'txt' in cfg['program'].keys():  # can be saved as text too
                np.savetxt(cfg['program']['txt'],
                           V,
                           delimiter='\t',
                           newline='\n',
                           header=cfg['in']['header'] + log_item['fileName'],
                           fmt='%d',
                           comments='')

    try:
        if b_remove_duplicates:
            for tblName in (cfg['out']['table'] +
                            cfg['out']['tableLog_names']):
                cfg['out']['db'][tblName].drop_duplicates(
                    keep='last', inplace=True)  # subset='fileName',?
        if len(strLog):
            print('Create index', end=', ')
            for tblName in (cfg['out']['table'] +
                            cfg['out']['tableLog_names']):
                cfg['out']['db'].create_table_index(tblName,
                                                    columns=['index'],
                                                    kind='full')
        else:
            print('done nothing')
    except Exception as e:
        l.exception('The end. There are error ')

        import traceback, code
        from sys import exc_info as sys_exc_info

        tb = sys_exc_info()[2]  # type, value,
        traceback.print_exc()
        last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb
        frame = last_frame().tb_frame
        ns = dict(frame.f_globals)
        ns.update(frame.f_locals)
        code.interact(local=ns)
    # sort index if have any processed data (needed because ``ptprepack`` not closses hdf5 source if it not finds data)
    if cfg['in'].get('time_last'):
        failed_storages = h5move_tables(cfg['out'])
        print('Ok.', end=' ')
        h5index_sort(
            cfg['out'],
            out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
            in_storages=failed_storages)
Beispiel #13
0
def main(new_arg=None, **kwargs):
    """

    :param new_arg: list of strings, command line arguments
    :kwargs: dicts of dictcts (for each ini section): specified values overwrites ini values
    """

    # global l
    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    if not cfg['program']:
        return  # usually error of unrecognized arguments displayed
    cfg['in']['db_coefs'] = Path(cfg['in']['db_coefs'])
    for path_field in ['db_coefs', 'path_cruise']:
        if not cfg['in'][path_field].is_absolute():
            cfg['in'][path_field] = (
                cfg['in']['cfgFile'].parent / cfg['in'][path_field]
            ).resolve().absolute()  # cfg['in']['cfgFile'].parent /

    def constant_factory(val):
        def default_val():
            return val

        return default_val

    for lim in ('min_date', 'max_date'):
        # convert keys to int because they must be comparable to probes_int_list (for command line arguments keys are allways strings, in yaml you can set string or int)
        _ = {int(k): v for k, v in cfg['filter'][lim].items()}
        cfg['filter'][lim] = defaultdict(constant_factory(_.get(0)), _)

    l = init_logging(logging, None, None, 'INFO')
    #l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])

    if True:  # False. Experimental speedup but takes memory
        from dask.cache import Cache
        cache = Cache(2e9)  # Leverage two gigabytes of memory
        cache.register()  # Turn cache on globally

    #if __debug__:
    # # because there was errors on debug when default scheduler used
    # cfg['program']['dask_scheduler'] = 'synchronous'

    if cfg['program']['dask_scheduler']:
        if cfg['program']['dask_scheduler'] == 'distributed':
            from dask.distributed import Client
            # cluster = dask.distributed.LocalCluster(n_workers=2, threads_per_worker=1, memory_limit="5.5Gb")
            client = Client(processes=False)
            # navigate to http://localhost:8787/status to see the diagnostic dashboard if you have Bokeh installed
            # processes=False: avoide inter-worker communication for computations releases the GIL (numpy, da.array)  # without is error
        else:
            if cfg['program']['dask_scheduler'] == 'synchronous':
                l.warning('using "synchronous" scheduler for debugging')
            import dask
            dask.config.set(scheduler=cfg['program']['dask_scheduler'])

    # Run steps :
    st.start = cfg['program']['step_start']
    st.end = cfg['program']['step_end']
    st.go = True

    if not cfg['out'][
            'db_name']:  # set name by 'path_cruise' name or parent if it has digits at start. priority for name  is  "*inclinometer*"
        for p in (lambda p: [p, p.parent])(cfg['in']['path_cruise']):
            m = re.match('(^[\d_]*).*', p.name)
            if m:
                break
        cfg['out']['db_name'] = f"{m.group(1).strip('_')}incl.h5"

    dir_incl = next((d for d in cfg['in']['path_cruise'].glob('*inclinometer*')
                     if d.is_dir()), cfg['in']['path_cruise'])
    db_path = dir_incl / '_raw' / cfg['out']['db_name']

    # ---------------------------------------------------------------------------------------------
    # def fs(probe, name):
    #     if 'w' in name.lower():  # Baranov's wavegauge electronic
    #         return 10  # 5
    #     return 5
    # if probe < 20 or probe in [23, 29, 30, 32, 33]:  # 30 [4, 11, 5, 12] + [1, 7, 13, 30]
    #     return 5
    # if probe in [21, 25, 26] + list(range(28, 35)):
    #     return 8.2
    # return 4.8

    def datetime64_str(time_str: Optional[str] = None) -> np.ndarray:
        """
        Reformat time_str to ISO 8601 or to 'NaT'. Used here for input in funcs that converts str to numpy.datetime64
        :param time_str: May be 'NaT'
        :return: ndarray of strings (tested for 1 element only) formatted by numpy.
        """
        return np.datetime_as_string(np.datetime64(time_str, 's'))

    probes = cfg['in']['probes'] or range(
        1, 41)  # sets default range, specify your values before line ---
    raw_root, probe_is_incl = re.subn('INCL_?', 'INKL_',
                                      cfg['in']['probes_prefix'].upper())

    # some parameters that depends of probe type (indicated by probes_prefix)
    p_type = defaultdict(
        # baranov's format
        constant_factory({
            'correct_fun':
            partial(correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^\r?(?P<use>20\d{2}(\t\d{1,2}){5}(\t\d{5}){8}).*',
                        b'^.+'
                    ]),
            'fs':
            10,
            'format':
            'Baranov',
        }),
        {
            (lambda x: x if x.startswith('incl') else 'incl')(cfg['in']['probes_prefix']):
            {
                'correct_fun':
                partial(
                    correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,6}){6}(,\d{1,2}\.\d{2})(,\-?\d{1,3}\.\d{2})).*',
                        b'^.+'
                    ]),
                'fs':
                5,
                'format':
                'Kondrashov',
            },
            'voln': {
                'correct_fun':
                partial(
                    correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,8})(,\-?\d{1,2}\.\d{2}){2}).*',
                        b'^.+'
                    ]),
                'fs':
                5,
                #'tbl_prefix': 'w',
                'format':
                'Kondrashov',
            }
        })

    if st(1, 'Save inclinometer or wavegage data from ASCII to HDF5'):
        # Note: Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need)

        i_proc_probe = 0  # counter of processed probes
        i_proc_file = 0  # counter of processed files
        # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]':

        raw_parent = dir_incl / '_raw'  # raw_parent /=
        if cfg['in']['raw_subdir'] is None:
            cfg['in']['raw_subdir'] = ''

        dir_out = raw_parent / re.sub(r'[.\\/ *?]', '_',
                                      cfg['in']['raw_subdir'])

        # sub replaces multilevel subdirs to 1 level that correct_fun() can only make

        def dt_from_utc_2000(probe):
            """ Correct time of probes started without time setting. Raw date must start from  2000-01-01T00:00"""
            return (
                datetime(year=2000, month=1, day=1) -
                cfg['in']['time_start_utc'][probe]
            ) if cfg['in']['time_start_utc'].get(probe) else timedelta(0)

        # convert cfg['in']['dt_from_utc'] keys to int

        cfg['in']['dt_from_utc'] = {
            int(p): v
            for p, v in cfg['in']['dt_from_utc'].items()
        }
        # convert cfg['in']['t_start_utc'] to cfg['in']['dt_from_utc'] and keys to int
        cfg['in']['dt_from_utc'].update(    # overwriting the 'time_start_utc' where already exist
            {int(p): dt_from_utc_2000(p) for p, v in cfg['in']['time_start_utc'].items()}
            )
        # make cfg['in']['dt_from_utc'][0] be default value
        cfg['in']['dt_from_utc'] = defaultdict(
            constant_factory(cfg['in']['dt_from_utc'].pop(0, timedelta(0))),
            cfg['in']['dt_from_utc'])

        for probe in probes:
            raw_found = []
            raw_pattern_file = str(
                Path(glob.escape(cfg['in']['raw_subdir'])) /
                cfg['in']['raw_pattern'].format(prefix=raw_root, number=probe))
            correct_fun = p_type[cfg['in']['probes_prefix']]['correct_fun']
            # if not archive:
            if (not re.match(r'.*(\.zip|\.rar)$', cfg['in']['raw_subdir'],
                             re.IGNORECASE)) and raw_parent.is_dir():
                raw_found = list(raw_parent.glob(raw_pattern_file))
            if not raw_found:
                # Check if already have corrected files for probe generated by correct_txt(). If so then just use them
                raw_found = list(
                    dir_out.glob(
                        f"{cfg['in']['probes_prefix']}{probe:0>2}.txt"))
                if raw_found:
                    print('corrected csv file', [r.name for r in raw_found],
                          'found')
                    correct_fun = lambda x, dir_out: x
                elif not cfg['in']['raw_subdir']:
                    continue

            for file_in in (raw_found or open_csv_or_archive_of_them(
                    raw_parent, binary_mode=False, pattern=raw_pattern_file)):
                file_in = correct_fun(file_in, dir_out=dir_out)
                if not file_in:
                    continue
                tbl = file_in.stem  # f"{cfg['in']['probes_prefix']}{probe:0>2}"
                # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w',  # correct name
                #              re.sub('^[\d_]*|\*', '', file_in.stem).lower()),  # remove date-prefix if in name
                csv2h5(
                    [
                        str(
                            Path(__file__).parent / 'ini' /
                            f"csv_{'inclin' if probe_is_incl else 'wavegage'}_{p_type[cfg['in']['probes_prefix']]['format']}.ini"
                        ),
                        '--path',
                        str(file_in),
                        '--blocksize_int',
                        '50_000_000',  # 50Mbt
                        '--table',
                        tbl,
                        '--db_path',
                        str(db_path),
                        # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'),
                        # '--b_raise_on_err', '0',  # ?
                        '--b_interact',
                        '0',
                        '--fs_float',
                        str(p_type[cfg['in']['probes_prefix']]
                            ['fs']),  #f'{fs(probe, file_in.stem)}',
                        '--dt_from_utc_seconds',
                        str(cfg['in']['dt_from_utc'][probe].total_seconds()),
                        '--b_del_temp_db',
                        '1',
                    ] +
                    (['--csv_specific_param_dict', 'invert_magnitometr: True']
                     if probe_is_incl else []),
                    **{
                        'filter': {
                            'min_date':
                            cfg['filter']['min_date'].get(
                                probe, np.datetime64(0, 'ns')),
                            'max_date':
                            cfg['filter']['max_date'].get(
                                probe, np.datetime64('now', 'ns')
                            ),  # simple 'now' works in sinchronious mode
                        }
                    })

                # Get coefs:
                l.info(
                    f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}"
                )
                try:
                    h5copy_coef(cfg['in']['db_coefs'], db_path, tbl)
                except KeyError as e:  # Unable to open object (component not found)
                    l.warning(
                        'No coefs to copy?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                except OSError as e:
                    l.warning(
                        'Not found DB with coefs?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                i_proc_file += 1
            else:
                print('no', raw_pattern_file, end=', ')
            i_proc_probe += 1
        print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.')

    if st(2, 'Calculate physical parameters and average'):
        kwarg = {
            'in': {
                'min_date': cfg['filter']['min_date'][0],
                'max_date': cfg['filter']['max_date'][0],
                'time_range_zeroing': cfg['in']['time_range_zeroing']
            },
            'proc': {}
        }
        # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5
        if not cfg['out']['aggregate_period_s']:
            cfg['out']['aggregate_period_s'] = [
                None, 2, 600, 7200 if probe_is_incl else 3600
            ]

        if cfg['in']['azimuth_add']:
            if 'Lat' in cfg['in']['azimuth_add']:
                # add magnetic declination,° for used coordinates
                # todo: get time
                kwarg['proc']['azimuth_add'] = mag_dec(
                    cfg['in']['azimuth_add']['Lat'],
                    cfg['in']['azimuth_add']['Lon'],
                    datetime(2020, 9, 10),
                    depth=-1)
            else:
                kwarg['proc']['azimuth_add'] = 0
            if 'constant' in cfg['in']['azimuth_add']:
                # and add constant. For example, subtruct declination at the calibration place if it was applied
                kwarg['proc']['azimuth_add'] += cfg['in']['azimuth_add'][
                    'constant']  # add -6.656 to account for calibration in Kaliningrad (mag deg = 6.656°)

        for aggregate_period_s in cfg['out']['aggregate_period_s']:
            if aggregate_period_s is None:
                db_path_in = db_path
                db_path_out = dir_incl / f'{db_path.stem}_proc_noAvg.h5'
            else:
                db_path_in = dir_incl / f'{db_path.stem}_proc_noAvg.h5'
                db_path_out = dir_incl / f'{db_path.stem}_proc.h5'  # or separately: '_proc{aggregate_period_s}.h5'

            # 'incl.*|w\d*'  inclinometers or wavegauges w\d\d # 'incl09':
            tables_list_regex = f"{cfg['in']['probes_prefix'].replace('voln', 'w')}.*"
            if cfg['in']['probes']:
                tables_list_regex += "(?:{})".format('|'.join(
                    '{:0>2}'.format(p) for p in cfg['in']['probes']))

            args = [
                '../../empty.yml',  # all settings are here, so to not print 'using default configuration' we use some existed empty file
                '--db_path',
                str(db_path_in),
                '--tables_list',
                tables_list_regex,
                '--aggregate_period',
                f'{aggregate_period_s}S' if aggregate_period_s else '',
                '--out.db_path',
                str(db_path_out),
                '--table',
                f'V_incl_bin{aggregate_period_s}'
                if aggregate_period_s else 'V_incl',
                '--verbose',
                'INFO',  #'DEBUG' get many numba messages
                '--b_del_temp_db',
                '1',
                # '--calc_version', 'polynom(force)',  # depreshiated
                # '--chunksize', '20000',
                # '--not_joined_h5_path', f'{db_path.stem}_proc.h5',
            ]

            if aggregate_period_s is None:  # proc. parameters (if we have saved proc. data then when aggregating we are not processing)
                # Note: for Baranov's prog 4096 is not suited:
                args += ([
                    '--max_dict',
                    'M[xyz]:4096',
                    # '--time_range_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n,"  # not works - use kwarg
                    # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
                    '--split_period',
                    '1D'
                ] if probe_is_incl else [
                    '--bad_p_at_bursts_starts_peroiod',
                    '1H',
                ])
                # csv splitted by 1day (default for no avg) else csv is monolith
            if aggregate_period_s not in cfg['out'][
                    'aggregate_period_s_not_to_text']:  # , 300, 600]:
                args += ['--text_path', str(dir_incl / 'text_output')]
            # If need all data to be combined one after one:
            # set_field_if_no(kwarg, 'in', {})
            # kwarg['in'].update({
            #
            #         'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0],
            #         'dates_min': min_date.values(),  # in table list order
            #         'dates_max': max_date.values(),  #
            #         })
            # set_field_if_no(kwarg, 'out', {})
            # kwarg['out'].update({'b_all_to_one_col': 'True'})

            incl_h5clc.main(args, **kwarg)

    if st(3, 'Calculate spectrograms'):  # Can be done at any time after step 1
        min_Pressure = 7

        # add dict dates_min like {probe: parameter} of incl_clc to can specify param to each probe
        def raise_ni():
            raise NotImplementedError(
                'Can not proc probes having different fs in one run: you need to do it separately'
            )

        args = [
            Path(incl_h5clc.__file__).with_name(
                f'incl_h5spectrum{db_path.stem}.yaml'),
            # if no such file all settings are here
            '--db_path',
            str(dir_incl / f'{db_path.stem}_proc_noAvg.h5'),
            '--tables_list',
            f"{cfg['in']['probes_prefix']}.*",  # inclinometers or wavegauges w\d\d  ## 'w02', 'incl.*',
            # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '',
            '--min_date',
            datetime64_str(cfg['filter']['min_date'][0]),
            '--max_date',
            datetime64_str(cfg['filter']['max_date']
                           [0]),  # '2019-09-09T16:31:00',  #17:00:00
            '--min_Pressure',
            f'{min_Pressure}',
            # '--max_dict', 'M[xyz]:4096',  # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity
            '--out.db_path',
            f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5",
            # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd',
            '--fs_float',
            str(p_type[cfg['in']['probes_prefix']]
                ['fs']),  # f"{fs(probes[0], cfg['in']['probes_prefix'])}",
            # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni()
            #
            # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
            # '--verbose', 'DEBUG',
            # '--chunksize', '20000',
            '--b_interact',
            '0',
        ]
        if probe_is_incl:
            args += [
                '--split_period',
                '2H',
                '--fmin',
                '0.0004',  #0.0004
                '--fmax',
                '1.05'
            ]
        else:
            args += [
                '--split_period',
                '1H',
                '--dt_interval_minutes',
                '15',  # set this if burst mode to the burst interval
                '--fmin',
                '0.0001',
                '--fmax',
                '4',
                #'--min_Pressure', '-1e15',  # to not load NaNs
            ]

        incl_h5spectrum.main(args)

    if st(4, 'Draw in Veusz'):
        pattern_path = dir_incl / r'processed_h5,vsz/201202-210326incl_proc#28.vsz'
        # r'\201202_1445incl_proc#03_pattern.vsz'  #'
        # db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz'  # r'vsz_5min\191126_0000_5m_w02.vsz'

        b_images_only = False
        # importing in vsz index slices replacing:
        pattern_str_slice_old = None

        # Length of not adjacent intervals, s (set None to not allow)
        # pandas interval in string or tuple representation '1D' of period between intervals and interval to draw
        period_str = '0s'  # '1D'  #  dt
        dt_str = '0s'  # '5m'
        file_intervals = None

        period = to_offset(period_str).delta
        dt = to_offset(dt_str).delta  # timedelta(0)  #  60 * 5

        if file_intervals and period and dt:

            # Load starts and assign ends
            t_intervals_start = pd.read_csv(
                cfg['in']['path_cruise'] /
                r'vsz+h5_proc\intervals_selected.txt',
                converters={
                    'time_start': lambda x: np.datetime64(x, 'ns')
                },
                index_col=0).index
            edges = (pd.DatetimeIndex(t_intervals_start),
                     pd.DatetimeIndex(t_intervals_start + dt_custom_s)
                     )  # np.zeros_like()
        elif period and dt:
            # Generate periodic intervals
            t_interval_start, t_intervals_end = intervals_from_period(
                datetime_range=np.array(
                    [
                        cfg['filter']['min_date']['0'],
                        cfg['filter']['max_date']['0']
                    ],
                    # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'],
                    # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'],
                    # ['2018-11-16T15:19', '2018-12-14T14:35'],
                    # ['2018-10-22T12:30', '2018-10-27T06:30:00'],
                    'datetime64[s]'),
                period=period)
            edges = (pd.DatetimeIndex([t_interval_start
                                       ]).append(t_intervals_end[:-1]),
                     pd.DatetimeIndex(t_intervals_end))
        else:  # [min, max] edges for each probe
            edges_dict = {
                pr:
                [cfg['filter']['min_date'][pr], cfg['filter']['max_date'][pr]]
                for pr in probes
            }

        cfg_vp = {'veusze': None}
        for i, probe in enumerate(probes):
            # cfg_vp = {'veusze': None}
            if edges_dict:  # custom edges for each probe
                edges = [pd.DatetimeIndex([t]) for t in edges_dict[probe]]

            # substr in file to rerplace probe_name_in_pattern (see below).
            probe_name = f"_{cfg['in']['probes_prefix'].replace('incl', 'i')}{probe:02}"
            tbl = None  # f"/{cfg['in']['probes_prefix']}{probe:02}"  # to check probe data exist in db else will not check
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   edges[0].size)
            # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1):

            for i_interval, (t_interval_start,
                             t_interval_end) in enumerate(zip(*edges),
                                                          start=1):

                # if i_interval < 23: #<= 0:  # TEMPORARY Skip this number of intervals
                #     continue
                if period and period != dt:
                    t_interval_start = t_interval_end - pd.Timedelta(
                        dt_custom_s, 's')

                if tbl:
                    try:  # skipping absent probes
                        start_end = h5q_interval2coord(
                            db_path=str(db_path),
                            table=tbl,
                            t_interval=(t_interval_start, t_interval_end))
                        if not len(start_end):
                            break  # no data
                    except KeyError:
                        break  # device name not in specified range, go to next name

                pattern_path_new = pattern_path.with_name(''.join([
                    f'{t_interval_start:%y%m%d_%H%M}',
                    f'_{dt_str}' if dt else '', f'{probe_name}.vsz'
                ]))

                # Modify pattern file
                if not b_images_only:
                    pattern_type, pattern_number = re.match(
                        r'.*(incl|w)_proc?#?(\d*).*',
                        pattern_path.name).groups()
                    probe_name_in_pattern = f"_{pattern_type.replace('incl', 'i')}{pattern_number}"

                    def f_replace(line):
                        """
                        Replace in file
                        1. probe name
                        2. slice
                        """
                        # if i_interval == 1:
                        line, ok = re.subn(probe_name_in_pattern, probe_name,
                                           line)
                        if ok and pattern_str_slice_old:  # can be only in same line
                            str_slice = '(({:d}, {:d}, None),)'.format(
                                *(start_end +
                                  np.int32([-1, 1])))  # bytes(, 'ascii')
                            line = re.sub(pattern_str_slice_old, str_slice,
                                          line)
                        return line

                    if not rep_in_file(pattern_path,
                                       pattern_path_new,
                                       f_replace=f_replace,
                                       binary_mode=False):
                        l.warning('Veusz pattern not changed!'
                                  )  # may be ok if we need draw pattern
                        # break
                    elif cfg_vp['veusze']:
                        cfg_vp['veusze'].Load(str(pattern_path_new))
                elif cfg_vp['veusze']:
                    cfg_vp['veusze'].Load(str(pattern_path_new))

                txt_time_range = \
                    """
                    "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \
                    """.format(t_interval_start, t_interval_end)
                print(f'{i_interval}. {txt_time_range}', end=' ')

                cfg_vp = veuszPropagate.main(
                    [
                        Path(veuszPropagate.__file__).parent.with_name(
                            'veuszPropagate.ini'),
                        # '--data_yield_prefix', '-',

                        # '--path', str(db_path),  # if custom loading from db and some source is required
                        '--tables_list',
                        '',  # switches to search vsz-files only # f'/{probe_name}',  # 181022inclinometers/ \d*
                        '--pattern_path',
                        str(pattern_path_new),
                        # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz',
                        # str(dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name
                        # '--before_next', 'restore_config',
                        # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{dt}",
                        '--filename_fun',
                        f'lambda tbl: "{pattern_path_new.name}"',
                        '--add_custom_list',
                        f'USEtime__',  # f'USEtime{probe_name}', nAveragePrefer',
                        '--add_custom_expressions_list',
                        txt_time_range,
                        # + """
                        # ", 5"
                        # """,
                        '--b_update_existed',
                        'True',
                        '--export_pages_int_list',
                        '0',  # 0 for all '6, 7, 8',  #'1, 2, 3'
                        # '--export_dpi_int', '200',
                        '--export_format',
                        'jpg',  #'emf',
                        '--b_interact',
                        '0',
                        '--b_images_only',
                        f'{b_images_only}',
                        '--return',
                        '<embedded_object>',  # reuse to not bloat memory
                        '--b_execute_vsz',
                        'True',
                        '--before_next',
                        'Close()'  # Close() need if b_execute_vsz many files
                    ],
                    veusze=cfg_vp['veusze'])

    if st(40, f'Draw in Veusz by loader-drawer.vsz method'):
        # save all vsz files that uses separate code

        from os import chdir as os_chdir
        dt_s = 300
        cfg['in'][
            'pattern_path'] = db_path.parent / f'vsz_{dt_s:d}s' / '~pattern~.vsz'

        time_starts = pd.read_csv(
            db_path.parent / r'processed_h5,vsz' / 'intervals_selected.txt',
            index_col=0,
            parse_dates=True,
            date_parser=lambda x: pd.to_datetime(x, format='%Y-%m-%dT%H:%M:%S'
                                                 )).index

        pattern_code = cfg['in']['pattern_path'].read_bytes(
        )  # encoding='utf-8'
        path_vsz_all = []
        for i, probe in enumerate(probes):
            probe_name = f"{cfg['in']['probes_prefix']}{probe:02}"  # table name in db
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   time_starts.size)
            for i_interval, time_start in enumerate(time_starts, start=1):
                path_vsz = cfg['in']['pattern_path'].with_name(
                    f"{time_start:%y%m%d_%H%M}_{probe_name.replace('incl','i')}.vsz"
                )
                # copy file to path_vsz
                path_vsz.write_bytes(pattern_code)  # replaces 1st row
                path_vsz_all.append(path_vsz)

        os_chdir(cfg['in']['pattern_path'].parent)
        veuszPropagate.main(
            [
                'ini/veuszPropagate.ini',
                '--path',
                str(cfg['in']['pattern_path'].with_name(
                    '??????_????_*.vsz')),  # db_path),
                '--pattern_path',
                f"{cfg['in']['pattern_path']}_",
                # here used to auto get export dir only. may not be _not existed file path_ if ['out']['paths'] is provided
                # '--table_log', f'/{device}/logRuns',
                # '--add_custom_list', f'{device_veusz_prefix}USE_time_search_runs',  # 'i3_USE_timeRange',
                # '--add_custom_expressions',
                # """'[["{log_row[Index]:%Y-%m-%dT%H:%M:%S}", "{log_row[DateEnd]:%Y-%m-%dT%H:%M:%S}"]]'""",
                # '--export_pages_int_list', '1', #'--b_images_only', 'True'
                '--b_interact',
                '0',
                '--b_update_existed',
                'True',  # todo: delete_overlapped
                '--b_images_only',
                'True',
                '--load_timeout_s_float',
                str(cfg['program']['load_timeout_s'])
                # '--min_time', '2020-07-08T03:35:00',
            ],
            **{'out': {
                'paths': path_vsz_all
            }})

    if st(50, 'Export from existed Veusz files in dir'):
        pattern_parent = db_path.parent  # r'vsz_5min\191126_0000_5m_w02.vsz''
        pattern_path = str(pattern_parent / r'processed_h5,vsz' /
                           '??????incl_proc#[1-9][0-9].vsz')  # [0-2,6-9]
        veuszPropagate.main([
            'ini/veuszPropagate.ini',
            '--path',
            pattern_path,
            '--pattern_path',
            pattern_path,
            # '--export_pages_int_list', '1', #'--b_images_only', 'True'
            '--b_interact',
            '0',
            '--b_update_existed',
            'True',  # todo: delete_overlapped
            '--b_images_only',
            'True',
            '--load_timeout_s_float',
            str(cfg['program']['load_timeout_s']),
            '--b_execute_vsz',
            'True',
            '--before_next',
            'Close()'  # Close() need if b_execute_vsz many files
        ])
Beispiel #14
0
def main(new_arg=None, veusze=None):
    """
    Note: if vsz data source have 'Ag_old_inv' variable then not invert coef. Else invert to use in vsz which not invert coefs
    :param new_arg:
    :return:
    """
    global l
    p = veuszPropagate.my_argparser()
    p_groups = {
        g.title: g
        for g in p._action_groups if g.title.split(' ')[-1] != 'arguments'
    }  # skips special argparse groups
    p_groups['in'].add(
        '--channels_list',
        help=
        'channels needed zero calibration: "magnetometer" or "M" for magnetometer and any else for accelerometer, use "M, A" for both, empty to skip '
    )
    p_groups['in'].add(
        '--widget',
        help=
        'path to Veusz widget property which contains coefficients. For example "/fitV(force)/grid1/graph/fit1/values"'
    )
    p_groups['in'].add(
        '--data_for_coef',
        default='max_incl_of_fit_t',
        help=
        'Veusz data to use as coef. If used with widget then this data is appended to data from widget'
    )

    p_groups['out'].add('--out.path', help='path to db where write coef')
    p_groups['out'].add(
        '--re_tbl_from_vsz_name',
        help=
        'regex to extract hdf5 table name from to Veusz file name (last used "\D*\d*")'
        # ? why not simly specify table name?
    )
    # todo:  "b_update_existed" arg will be used here for exported images. Check whether False works or prevent open vsz

    cfg = cfg_from_args(p, new_arg)

    if not Path(cfg['program']['log']).is_absolute():
        cfg['program']['log'] = str(
            Path(__file__).parent.joinpath(
                cfg['program']['log']))  # l.root.handlers[0].baseFilename
    if not cfg:
        return
    if new_arg == '<return_cfg>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    veuszPropagate.l = l
    print('\n' + this_prog_basename(__file__), 'started', end=' ')
    if cfg['out']['b_images_only']:
        print('in images only mode.')
    try:
        print('Output pattern ')
        # Using cfg['out'] to store pattern information
        if not Path(cfg['in']['pattern_path']).is_absolute():
            cfg['in']['pattern_path'] = str(cfg['in']['path'].parent.joinpath(
                cfg['in']['pattern_path']))
        set_field_if_no(cfg['out'], 'path', cfg['in']['pattern_path'])
        cfg['out']['paths'], cfg['out']['nfiles'], cfg['out'][
            'path'] = init_file_names(**cfg['out'],
                                      b_interact=cfg['program']['b_interact'])
    except Ex_nothing_done as e:
        print(e.message, ' - no pattern')
        return  # or raise FileNotFoundError?
    try:
        print(end='Data ')
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(
                **cfg['in'], b_interact=False)  # do not bother user 2nd time
    except Ex_nothing_done as e:
        print(e.message)
        return  # or raise FileNotFoundError?
    if not cfg['out']['export_dir']:
        cfg['out']['export_dir'] = Path(cfg['out']['path']).parent
    if cfg['program']['before_next'] and 'restore_config' in cfg['program'][
            'before_next']:
        cfg['in_saved'] = cfg['in'].copy()
    # cfg['loop'] = asyncio.get_event_loop()
    # cfg['export_timeout_s'] = 600
    cfg['out']['export_dir'] = dir_from_cfg(cfg['out']['path'].parent,
                                            cfg['out']['export_dir'])

    veuszPropagate.load_vsz = veuszPropagate.load_vsz_closure(
        cfg['program']['veusz_path'],
        b_execute_vsz=cfg['program']['b_execute_vsz'])
    gen_veusz_and_logs = veuszPropagate.load_to_veusz(
        veuszPropagate.ge_names(cfg), cfg, veusze)

    names_get = ['Inclination_mean_use1', 'logVext1_m__s'
                 ]  # \, 'Inclination_mean_use2', 'logVext2_m__s'
    names_get_fits = ['fit']  # , 'fit2'
    vsz_data = {n: [] for n in names_get}
    for n in names_get_fits:
        vsz_data[n] = []

    # prepare collecting all coef in text also
    names_get_txt_results = ['fit1result']  # , 'fit2result'
    txt_results = {n: {} for n in names_get_txt_results}

    i_file = 0
    for veusze, log in gen_veusz_and_logs:
        if not veusze:
            continue
        i_file += 1
        print(i_file)
        if cfg['out']['re_tbl_from_vsz_name']:
            table = cfg['out']['re_tbl_from_vsz_name'].match(
                log['out_name']).group()
        else:
            table = re.sub(
                '^[\d_]*', '',
                log['out_name'])  # delete all first digits (date part)

        for n in names_get:
            vsz_data[n].append(veusze.GetData(n)[0])
        for n in [cfg['in']['data_for_coef']]:
            vsz_data[n] = list(veusze.GetData(n)[0])

        # Save velocity coefficients into //{table}//coef//Vabs{i} where i - fit number enumeretad from 0
        for i, name_out in enumerate(names_get_fits):  # ['fit1', 'fit2']
            coef = veusze.Get(
                cfg['in']['widget']
            )  # veusze.Root['fitV(inclination)']['grid1']['graph'][name_out].values.val
            if 'a' in coef:
                coef_list = [
                    coef[k] for k in ['d', 'c', 'b', 'a'] if k in coef
                ]
            else:
                coef_list = [
                    coef[k] for k in sorted(coef.keys(), key=digits_first)
                ]
            if cfg['in']['data_for_coef']:
                coef_list += vsz_data[cfg['in']['data_for_coef']]

            vsz_data[name_out].append(coef_list)
            h5copy_coef(None,
                        cfg['out']['path'],
                        table,
                        dict_matrices={
                            f'//coef//Vabs{i}':
                            coef_list,
                            f'//coef//date':
                            np.float64([
                                np.NaN,
                                np.datetime64(datetime.now()).astype(np.int64)
                            ])
                        })
            # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list)
            txt_results[names_get_txt_results[i]][table] = str(coef)

        # Zeroing matrix - calculated in Veusz by rotation on old0pitch old0roll
        Rcor = veusze.GetData(
            'Rcor'
        )[0]  # zeroing angles tuned by "USEcalibr0V_..." in Veusz Custom definitions

        if len(cfg['in']['channels']):
            l.info(
                'Applying zero calibration matrix of peach = {} and roll = {} degrees'
                .format(np.rad2deg(veusze.GetData('old0pitch')[0][0]),
                        np.rad2deg(veusze.GetData('old0roll')[0][0])))
            with h5py.File(cfg['out']['path'], 'a') as h5:
                for channel in cfg['in']['channels']:
                    (col_str, coef_str) = channel_cols(channel)
                    # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list), dict_matrices={'//coef//' + coef_str + '//A': coefs[tbl][channel]['A'], '//coef//' + coef_str + '//C': coefs[tbl][channel]['b']})

                    # Currently used inclinometers have electronics rotated on 180deg. Before we inserted additional
                    # rotation operation in Veusz by inverting A_old. Now we want iclude this information in database coef only.
                    try:  # Checking that A_old_inv exist
                        A_old_inv = veusze.GetData('Ag_old_inv')
                        is_old_used = True  # Rcor is not account for electronic is rotated.
                    except KeyError:
                        is_old_used = False  # Rcor is account for rotated electronic.

                    if is_old_used:  # The rotation is done in vsz (A_old in vsz is inverted) so need rotate it back to
                        # use in vsz without such invertion

                        # Rotate on 180 deg (note: this is not inversion)
                        A_old_inv = h5[f'//{table}//coef//{coef_str}//A'][...]
                        A_old = np.dot(A_old_inv,
                                       [[1, 0, 0], [0, -1, 0], [0, 0, -1]
                                        ])  # adds 180 deg to roll
                    else:
                        A_old = h5[f'//{table}//coef//{coef_str}//A'][...]
                    # A_old now accounts for rotated electronic

                    A = np.dot(Rcor, A_old)
                    h5copy_coef(None,
                                h5,
                                table,
                                dict_matrices={f'//coef//{coef_str}//A': A})

        # veusze.Root['fitV(inclination)']['grid1']['graph2'][name_out].function.val
        print(vsz_data)
        veuszPropagate.export_images(
            veusze,
            cfg['out'],
            f"_{log['out_name']}",
            b_skip_if_exists=not cfg['out']['b_update_existed'])

        # vsz_data = veusz_data(veusze, cfg['in']['data_yield_prefix'])
        # # caller do some processing of data and gives new cfg:
        # cfgin_update = yield(vsz_data, log)  # to test run veusze.Save('-.vsz')
        # cfg['in'].update(cfgin_update)  # only update of cfg.in.add_custom_expressions is tested
        # if cfg['in']['add_custom']:
        #     for n, e in zip(cfg['in']['add_custom'], cfg['in']['add_custom_expressions']):
        #         veusze.AddCustom('definition', n, e, mode='replace')
        # #cor_savings.send((veusze, log))
        #
        #
        #
        #

    # veusze.Save(str(path_vsz_save), mode='hdf5')  # veusze.Save(str(path_vsz_save)) saves time with bad resolution
    print(f'Ok')
    print(txt_results)
    for n in names_get:
        pd.DataFrame.from_dict(
            dict(zip(list(txt_results['fit1result'].keys()),
                     vsz_data[n]))).to_csv(
                         Path(cfg['out']['path']).with_name(
                             f'average_for_fitting-{n}.txt'),
                         sep='\t',
                         header=txt_results['fit1result'].keys,
                         mode='a')
    return {**vsz_data, 'veusze': veusze}
Beispiel #15
0
def main(new_arg=None, **kwargs):
    """
    Accumulats results of differen source tables in 2D netcdf matrices of each result parameter.
    :param new_arg:
    :return:
    Spectrum parameters used (taken from nitime/algorithems/spectral.py):
        NW : float, by default set to 4: that corresponds to bandwidth of 4 times the fundamental frequency
        The normalized half-bandwidth of the data tapers, indicating a
        multiple of the fundamental frequency of the DFT (Fs/N).
        Common choices are n/2, for n >= 4. This parameter is unitless
        and more MATLAB compatible. As an alternative, set the BW
        parameter in Hz. See Notes on bandwidth.

        BW : float
        The sampling-relative bandwidth of the data tapers, in Hz.

        adaptive : {True/False}
           Use an adaptive weighting routine to combine the PSD estimates of
           different tapers.
        low_bias : {True/False}
           Rather than use 2NW tapers, only use the tapers that have better than
           90% spectral concentration within the bandwidth (still using
           a maximum of 2NW tapers)
    Notes
    -----

    The bandwidth of the windowing function will determine the number
    tapers to use. This parameters represents trade-off between frequency
    resolution (lower main lobe BW for the taper) and variance reduction
    (higher BW and number of averaged estimates). Typically, the number of
    tapers is calculated as 2x the bandwidth-to-fundamental-frequency
    ratio, as these eigenfunctions have the best energy concentration.

    Result file is nc format that is Veusz compatible hdf5 format. If file exists it will be overwited

    todo: best may be is use DBMT: Dynamic Bayesian Multitaper (matlab code downloaded from git)
    """
    global l

    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    init_logging(logging, None, cfg['program']['log'],
                 cfg['program']['verbose'])
    l = logging.getLogger(prog)

    multitaper.warn = l.warning  # module is not installed but copied. so it can not import this dependace

    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**{
                **cfg['in'], 'path': cfg['in']['db_path']
            },
                                      b_interact=cfg['program']['b_interact'])
    except Ex_nothing_done as e:
        print(e.message)
        return ()
    print('\n' + prog, end=' started. ')

    cfg['in']['columns'] = ['Ve', 'Vn', 'Pressure']
    # minimum time between blocks, required in filt_data_dd() for data quality control messages:
    cfg['in'][
        'dt_between_bursts'] = None  # If None report any interval bigger then min(1st, 2nd)
    cfg['in']['dt_hole_warning'] = np.timedelta64(2, 's')

    cfg_out = cfg['out']
    if 'split_period' in cfg['out']:
        cfg['proc']['dt_interval'] = np.timedelta64(
            cfg['proc']['dt_interval'] if cfg['proc']['dt_interval'] else
            pd_period_to_timedelta(cfg['out']['split_period']))
        if (not cfg['proc']['overlap']) and \
                (cfg['proc']['dt_interval'] == np.timedelta64(pd_period_to_timedelta(cfg['out']['split_period']))):
            cfg['proc']['overlap'] = 0.5
    else:
        cfg['proc']['dt_interval'] = np.timedelta64(cfg['proc']['dt_interval'])
        # cfg['proc']['dt_interval'] = np.timedelta64('5', 'm') * 24
        cfg['proc']['time_intervals_start'] = np.array(
            cfg['proc']['time_intervals_center'],
            np.datetime64) - cfg['proc']['dt_interval'] / 2

    cfg_out['chunksize'] = cfg['in']['chunksize']
    h5init(cfg['in'], cfg_out)
    # cfg_out_table = cfg_out['table']  need? save because will need to change
    cfg_out['save_proc_tables'] = True  # False

    # cfg['proc'] = {}
    prm = cfg['proc']
    prm['adaptive'] = True  # pmtm spectrum param

    prm['fs'] = cfg['in']['fs']
    prm['bandwidth'] = 8 / cfg['proc']['dt_interval'].astype(
        'timedelta64[s]').astype(
            'float'
        )  # 8 * 2 * prm['fs']/34000  # 4 * 2 * 5/34000 ~= 4 * 2 * fs / N
    prm['low_bias'] = True

    nc_root = netCDF4.Dataset(
        Path(cfg_out['db_path']).with_suffix('.nc'), 'w', format='NETCDF4'
    )  # (for some types may need 'NETCDF4_CLASSIC' to use CLASSIC format for Views compability)
    nc_psd = nc_root.createGroup(cfg_out['table'])
    nc_psd.createDimension('time', None)
    nc_psd.createDimension('value', 1)
    nc_psd.createVariable('time_good_min', 'f8', ('value', ))
    nc_psd.createVariable('time_good_max', 'f8', ('value', ))
    nc_psd.createVariable('time_interval', 'f4', ('value', ))
    if cfg['out'].get('split_period'):
        # nv_time_interval = nc_psd.createVariable('time_interval', 'f8', ('time',), zlib=False)
        nc_psd.variables['time_interval'][:] = pd_period_to_timedelta(
            cfg['out']['split_period']).delta
    else:
        nc_psd.variables['time_interval'][:] = cfg['proc']['dt_interval']
    # Dataframe of accumulating results: adding result columns in cycle with appending source table name to column names
    dfs_all = None
    # Initialasing variables to search data time range of calculated
    time_good_min = pd.Timestamp.max
    time_good_max = pd.Timestamp.min
    prm['length'] = None
    nv_vars_for_tbl = {}
    tbl_prev = ''
    itbl = 0
    for df, tbl_in, dataname in h5_velocity_by_intervals_gen(cfg, cfg_out):
        tbl = tbl_in.replace('incl', '_i')
        # _, (df, tbl, dataname) in h5_dispenser_and_names_gen(cfg['in'], cfg_out, fun_gen=h5_velocity_by_intervals_gen):

        # interpolate to regular grid
        df = df.resample(timedelta(seconds=1 / prm['fs'])).interpolate()

        len_data_cur = df.shape[0]
        if tbl_prev != tbl:
            itbl += 1
            l.info('%s: len=%s', dataname, len_data_cur)
        l.info('    %s. Writing to "%s"', itbl, tbl)

        # Prepare
        if prm['length'] is None:
            # 1st time
            prm['length'] = len_data_cur
            prm.update(
                psd_mt_params(**prm,
                              dt=float(np.median(np.diff(df.index.values))) /
                              1e9))
            nc_psd.createDimension('freq', len(prm['freqs']))
            # nv_... - variables to be used as ``NetCDF variables``
            nv_freq = nc_psd.createVariable('freq',
                                            'f4', ('freq', ),
                                            zlib=True)
            nv_freq[:] = prm['freqs']
            check_fs = 1e9 / np.median(np.diff(df.index.values)).item()
            if prm.get('fs'):
                np.testing.assert_almost_equal(prm['fs'],
                                               check_fs,
                                               decimal=7,
                                               err_msg='',
                                               verbose=True)
            else:
                prm['fs'] = check_fs
        elif prm['length'] != len_data_cur:
            prm['length'] = len_data_cur
            try:
                prm['dpss'], prm['eigvals'], prm['adaptive_if_can'] = \
                    multitaper._compute_mt_params(prm['length'], prm['fs'], prm['bandwidth'],
                                                  prm['low_bias'], prm['adaptive'])
            except (ModuleNotFoundError, ValueError) as e:
                # l.error() already reported as multitaper.warn is reassignred to l.warning()
                prm['eigvals'] = np.int32([0])
            prm['weights'] = np.sqrt(prm['eigvals'])[np.newaxis, :, np.newaxis]
            # l.warning('new length (%s) is different to last (%s)', len_data_cur, prm['length'])

        if tbl not in nc_psd.groups:
            nc_tbl = nc_psd.createGroup(tbl)
            cols = set()
            if 'Pressure' in df.columns:
                cols.add('Pressure')
                nc_tbl.createVariable('Pressure',
                                      'f4', (
                                          'time',
                                          'freq',
                                      ),
                                      zlib=True)
            if 'Ve' in df.columns:
                cols.update(['Ve', 'Vn'])
                nc_tbl.createVariable('Ve',
                                      'f4', (
                                          'time',
                                          'freq',
                                      ),
                                      zlib=True)
                nc_tbl.createVariable('Vn',
                                      'f4', (
                                          'time',
                                          'freq',
                                      ),
                                      zlib=True)
            nc_tbl.createVariable('time_start', 'f8', ('time', ), zlib=True)
            nc_tbl.createVariable('time_end', 'f8', ('time', ), zlib=True)
            out_row = 0
        nc_tbl.variables['time_start'][out_row], nc_tbl.variables['time_end'][
            out_row] = df.index[[0, -1]].values

        # Calculate PSD
        if prm['eigvals'].any():
            for var_name in cols:

                nc_tbl.variables[var_name][
                    out_row, :] = call_with_valid_kwargs(
                        psd_mt, df[var_name], **prm)[0, :]
            if time_good_min.to_numpy('<M8[ns]') > df.index[0].to_numpy(
                    '<M8[ns]'
            ):  # to_numpy() get values to avoid tz-naive/aware comparing restrictions
                time_good_min = df.index[0]
            if time_good_max.to_numpy('<M8[ns]') < df.index[-1].to_numpy(
                    '<M8[ns]'):
                time_good_max = df.index[-1]
        else:
            for var_name in cols:
                nc_tbl.variables[var_name][out_row, :] = np.NaN

        out_row += 1

        # if cfg_out['save_proc_tables']:
        #     # ds_psd.to_netcdf('d:\\WorkData\\BlackSea\\190210\\190210incl_proc-psd_test.nc', format='NETCDF4_CLASSIC')
        #     #f.to_hdf('d:\\WorkData\\BlackSea\\190210\\190210incl_proc-psd_test.h5', 'psd', format='fixed')
        #     # tables_have_write.append(tbl)
        #     try:
        #         h5_append_to(df_psd, tbl, cfg_out, msg='save (temporary)', print_ok=None)
        #     except HDF5ExtError:
        #         cfg_out['save_proc_tables'] = False
        #         l.warning('too very many colums for "table" format but "fixed" is not updateble so store result in memory 1st')
        #
        #
        #
        # df_cur = df_psd[['PSD_Vn', 'PSD_Ve']].rename(
        #     columns={'PSD_Ve': 'PSD_Ve' + tbl[-2:], 'PSD_Vn': 'PSD_Vn' + tbl[-2:]}).compute()
        # if dfs_all is None:
        #     dfs_all = df_cur
        # else:
        #     dfs_all = dfs_all.join(df_cur, how='outer')  # , rsuffix=tbl[-2:] join not works on dask

        # if itbl == len(cfg['in']['tables']):  # after last cycle. Need incide because of actions when exit generator
        #     h5_append_to(dfs_all, cfg_out_table, cfg_out, msg='save accumulated data', print_ok='Ok.')

    # nv_time_start_query = nc_psd.createVariable('time_start_query', 'f8', ('time',), zlib=True)
    # nv_time_start_query[:] = cfg['in']['time_intervals_start'].to_numpy(dtype="datetime64[ns]") \
    #     if isinstance(cfg['in']['time_intervals_start'], pd.DatetimeIndex) else cfg['in']['time_intervals_start']

    nc_psd.variables['time_good_min'][:] = np.array(time_good_min.value,
                                                    'M8[ns]')
    nc_psd.variables['time_good_max'][:] = np.array(time_good_max.value,
                                                    'M8[ns]')
    # failed_storages = h5move_tables(cfg_out)
    print('Ok.', end=' ')
    nc_root.close()