def main_init(cfg: DictConfig) -> DictConfig: """ Common startup initializer :param cfg: :return: """ #global lf # if cfg.search_path is not None: # override_path = hydra.utils.to_absolute_path(cfg.search_path) # override_conf = OmegaConf.load(override_path) # cfg = OmegaConf.merge(cfg, override_conf) print("Working directory : {}".format(os.getcwd())) print(OmegaConf.to_yaml(cfg)) # cfg = cfg_from_args(argparser_files(), **kwargs) if not cfg.program.return_: print('Can not initialise') return cfg elif cfg.program.return_ == '<cfg_from_args>': # to help testing return cfg hydra.verbose = 1 if cfg.program.verbose == 'DEBUG' else 0 # made compatible to my old cfg print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg.input, b_interact=cfg.program.b_interact) except Ex_nothing_done as e: print(e.message) return () return cfg
def main(new_arg): cfg = cfg_from_args(my_argparser(), new_arg) if not cfg: return if cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) return () # cfg = {'in': {}} # cfg['in']['path'] = \ # r'd:\workData\BalticSea\181005_ABP44\navigation\2018-10-06tracks_copy.gpx' # r'd:\WorkData\_experiment\_2017\tracker\170502.gpx' # r'd:\workData\_experiment\2016\GPS_tracker\sms_backup\sms-20160225135922.gpx' for ifile, nameFull in enumerate(cfg['in']['paths'], start=1): print('{}. {}'.format(ifile, nameFull), end=', ') gpx2csv(nameFull)
def main_cfg(cfg: DictConfig): # hydra required arg, not use when call """ ---------------------------- Add data from CSV-like files to Pandas HDF5 store*.h5 ---------------------------- """ #print(OmegaConf.to_yaml(cfg)) global lf # cfg = cfg_from_args(argparser_files(), **kwargs) if not cfg.program.return_: print('Can not initialise') return cfg elif cfg.program.return_ == '<cfg_from_args>': # to help testing return cfg lf = LoggingStyleAdapter(init_logging(logging, None, cfg.program.log, cfg.rogram.verbose)) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in']['path'] = init_file_names( **cfg['in'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) return () return cfg
def main_init_input_file(cfg_t, cs_store_name, in_file_field='db_path'): """ - finds input files paths - renames cfg['input'] to cfg['in'] and fills its field 'cfgFile' to cs_store_name :param cfg_t: :param cs_store_name: :param in_file_field: :return: """ cfg_in = cfg_t.pop('input') cfg_in['cfgFile'] = cs_store_name try: # with omegaconf.open_dict(cfg_in): cfg_in['paths'], cfg_in['nfiles'], cfg_in['path'] = init_file_names( **{ **cfg_in, 'path': cfg_in[in_file_field] }, b_interact=cfg_t['program']['b_interact']) except Ex_nothing_done as e: print(e.message) cfg_t['in'] = cfg_in return cfg_t except FileNotFoundError as e: # print('Initialisation error:', e.message, 'Calling arguments:', sys.argv) raise cfg_t['in'] = cfg_in return cfg_t
def main(new_arg=None): new_arg = [ r'.\h5toGpx_CTDs.ini', '--db_path', r'd:\workData\BalticSea\170614_ANS34\170614Strahov.h5', '--path', r'd:\workData\BalticSea\170614_ANS34\Baklan\2017*p1d5.txt', '--gpx_names_fun_format', '+{:02d}', '--gpx_symbols_list', "'Navaid, Orange'" ] # 'db_path', r'd:\workData\BalticSea\171003_ANS36\171003Strahov.h5' cfg = cfg_from_args(my_argparser(), new_arg) if not cfg: return if new_arg == '<return_cfg>': # to help testing return cfg print('\n' + this_prog_basename(__file__), 'started', end=' ') if not cfg['out']['path'].is_absolute(): cfg['out']['path'] = cfg['in']['db_path'].parent / cfg['out'][ 'path'] # set relative to cfg['in']['db_path'] try: print(end='Data ') cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in']) # may interact except Ex_nothing_done as e: print(e.message) return # or raise FileNotFoundError? itbl = 0 # compile functions if defined in cfg or assign default gpx_symbols = init_gpx_symbols_fun(cfg['out']) gpx_names_funs = ["i+1"] gpx_names_fun = eval( compile( "lambda i, row: '{}'.format({})".format( cfg['out']['gpx_names_fun_format'], gpx_names_funs[itbl]), [], 'eval')) tim = filename2date([f for f in ge_names(cfg)]) with pd.HDFStore(cfg['in']['db_path'], mode='r') as storeIn: # dfL = storeIn[tblD + '/logFiles'] nav2add = h5select(storeIn, cfg['in']['table_nav'], ['Lat', 'Lon', 'DepEcho'], tim)[0] rnav_df_join = nav2add.assign( itbl=itbl) # copy/append on first/next cycle # Save to gpx waypoints # if 'gpx_names_funs' in cfg['out'] and \ # len(cfg['out']['gpx_names_funs'])>itbl: # # gpx_names = eval(compile('lambda i: str({})'.format( # cfg['out']['gpx_names_funs'][itbl]), [], 'eval')) # save_to_gpx(rnav_df_join[-len(nav2add):], cfg['out']['path'].with_name('fileNames'), gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols, cfg_proc=cfg['process'])
def main_cfg(cfg: DictConfig): global l # cfg = cfg_from_args(argparser_files(), **kwargs) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in']['path'] = init_file_names( **cfg['in'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) return () return cfg
will be sabstituted with correspondng input file names. ''') parser.add_argument( '-verbose', nargs=1, type=str, default=['INFO'], choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'], help='Verbosity of messages in log file') args = parser.parse_args() args.verbose = args.verbose[0] try: cfg = ini2dict(args.cfgFile) cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in']) except (IOError, Ex_nothing_done) as e: cfg = {'in': {'paths': [args.path], 'nfiles': 1}} # one file if not os_path.isfile(args.path): print('\n==> '.join([s for s in e.args if isinstance(s, str)])) # e.message raise (e) ############################################################################ # set_field_if_no(cfg['in'],'types',None) # set_field_if_no(cfg['skiprows'],'types') if cfg['in']['nfiles']: # ## Main circle ############################################################ for ifile, nameFull in enumerate(cfg['in']['paths'], start=1): nameFE = os_path.basename(nameFull)
def main(new_arg=None): """ :param new_arg: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: 'csv2h5_nav_supervisor.ini' 'csv2h5_IdrRedas.ini' 'csv2h5_Idronaut.ini' :return: """ global l cfg = cfg_from_args(my_argparser(), new_arg) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**{ **cfg['in'], 'path': cfg['in']['db_path'] }, b_interact=cfg['program']['b_interact']) set_field_if_no( cfg['in'], 'tables_log', '{}/logFiles' ) # will be filled by each table from cfg['in']['tables'] cfg['in']['query'] = query_time_range(**cfg['in']) set_field_if_no(cfg['out'], 'db_path', cfg['in']['db_path']) # cfg['out'] = init_file_names(cfg['out'], , path_field='db_path') except Ex_nothing_done as e: print(e.message) return () # args = parser.parse_args() # args.verbose= args.verbose[0] # try: # cfg= ini2dict(args.cfgFile) # cfg['in']['cfgFile']= args.cfgFile # except IOError as e: # print('\n==> '.join([a for a in e.args if isinstance(a,str)])) #e.message # raise(e) # Open text log if 'log' in cfg['program'].keys(): dir_create_if_need(os_path.dirname(cfg['program']['log'])) flog = open(cfg['program']['log'], 'a+', encoding='cp1251') cfg['out']['log'] = OrderedDict({'fileName': None, 'fileChangeTime': None}) # Prepare saving to csv if 'file_names_add_fun' in cfg['out']: file_names_add = eval( compile(cfg['out']['file_names_add_fun'], '', 'eval')) else: file_names_add = lambda i: '.csv' # f'_{i}.csv' # Prepare data for output store and open it if cfg['out']['tables'] == ['None']: # will not write new data table and its log cfg['out']['tables'] = None # cfg['out']['tables_log'] = None # for _runs cfg will be redefined (this only None case that have sense?) h5init(cfg['in'], cfg['out']) # store, dfLogOld = h5temp_open(**cfg['out']) cfg_fileN = os_path.splitext(cfg['in']['cfgFile'])[0] out_tables_log = cfg['out'].get('tables_log') if cfg_fileN.endswith('_runs') or (bool(out_tables_log) and 'logRuns' in out_tables_log[0]): # Will calculate only after filter # todo: calculate derived parameters before were they are bad (or replace all of them if any bad?) func_before_cycle = lambda x: None func_before_filter = lambda df, log_row, cfg: df func_after_filter = lambda df, cfg: log_runs(df, cfg, cfg['out']['log'] ) # this table will be added: cfg['out']['tables_log'] = [cfg['out']['tables'][0] + '/logRuns'] cfg['out'][ 'b_log_ready'] = True # to not apdate time range in h5_append() # Settings to not affect main data table and switch off not compatible options: cfg['out']['tables'] = [] cfg['out'][ 'b_skip_if_up_to_date'] = False # todo: If False check it: need delete all previous result of CTD_calc() or set min_time > its last log time. True not implemented? cfg['program'][ 'b_log_display'] = False # can not display multiple rows log if 'b_save_images' in cfg['extract_runs']: cfg['extract_runs']['path_images'] = cfg['out'][ 'db_path'].with_name('_subproduct') dir_create_if_need(cfg['extract_runs']['path_images']) else: if 'brown' in cfg_fileN.lower(): func_before_cycle = load_coef if 'Lat' in cfg['in']: func_before_filter = lambda *args, **kwargs: add_ctd_params( process_brown(*args, **kwargs), kwargs['cfg']) else: func_before_filter = process_brown else: func_before_cycle = lambda x: None def ctd_coord_and_params(df: pd.DataFrame, log_row, cfg): coord_data_col_ensure(df, log_row) return add_ctd_params(df, cfg) func_before_filter = ctd_coord_and_params func_after_filter = lambda df, cfg: df # nothing after filter func_before_cycle(cfg) # prepare: usually assign data to cfg['for'] if cfg['out'].get('path_csv'): dir_create_if_need(cfg['out']['path_csv']) # Load data Main circle ######################################### # Open input store and cicle through input table log records qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')" iSt = 1 dfLogOld, cfg['out']['db'], cfg['out'][ 'b_skip_if_up_to_date'] = h5temp_open(**cfg['out']) b_out_db_is_different = cfg['out']['db'] is not None and cfg['out'][ 'db_path_temp'] != cfg['in']['db_path'] # Cycle for each table, for each row in log: # for path_csv in gen_names_and_log(cfg['out'], dfLogOld): with FakeContextIfOpen( lambda f: pd.HDFStore(f, mode='r'), cfg['in']['db_path'], None if b_out_db_is_different else cfg['out']['db'] ) as cfg['in']['db']: # not opens ['in']['db'] if already opened to write for tbl in cfg['in']['tables']: if False: # Show table info nodes = sorted( cfg['out']['db'].root.__members__) # , key=number_key print(nodes) print(tbl, end='. ') df_log = cfg['in']['db'].select(cfg['in']['tables_log'].format(tbl) or tbl, where=cfg['in']['query']) if True: # try: if 'log' in cfg['program'].keys(): nRows = df_log.rows.size flog.writelines(datetime.now().strftime( '\n\n%d.%m.%Y %H:%M:%S> processed ') + f'{nRows} row' + ('s:' if nRows > 1 else ':')) for ifile, r in enumerate(df_log.itertuples(), start=iSt): # name=None print('.', end='') sys_stdout.flush() path_raw = PurePath(r.fileName) cfg['out']['log'].update(fileName=path_raw.name, fileChangeTime=r.fileChangeTime) # save current state cfg['in']['file_stem'] = cfg['out']['log'][ 'fileName'] # for exmple to can extract date in subprogram cfg['in']['fileChangeTime'] = cfg['out']['log'][ 'fileChangeTime'] if cfg['in']['b_skip_if_up_to_date']: have_older_data, have_duplicates = h5del_obsolete( cfg['out'], cfg['out']['log'], dfLogOld) if have_older_data: continue if have_duplicates: cfg['out']['b_remove_duplicates'] = True print('{}. {}'.format(ifile, path_raw.name), end=': ') # Load data qstr = qstr_trange_pattern.format(r.Index, r.DateEnd) df_raw = cfg['in']['db'].select(tbl, qstr) cols = df_raw.columns.tolist() # cfg['in']['lat'] and ['lon'] may be need in add_ctd_params() if Lat not in df_raw if 'Lat_en' in df_log.columns and 'Lat' not in cols: cfg['in']['lat'] = np.nanmean((r.Lat_st, r.Lat_en)) cfg['in']['lon'] = np.nanmean((r.Lon_st, r.Lon_en)) df = func_before_filter(df_raw, log_row=r, cfg=cfg) if df.size: # size is zero means save only log but not data # filter, updates cfg['out']['log']['rows'] df, _ = set_filterGlobal_minmax( df, cfg['filter'], cfg['out']['log']) if 'rows' not in cfg['out']['log']: l.warning('no data!') continue elif isinstance(cfg['out']['log']['rows'], int): print('filtered out {rows_filtered}, remains {rows}'. format_map(cfg['out']['log'])) if cfg['out']['log']['rows']: print('.', end='') else: l.warning('no data!') continue df = func_after_filter(df, cfg=cfg) # Append to Store h5_append(cfg['out'], df, cfg['out']['log'], log_dt_from_utc=cfg['in']['dt_from_utc']) # Copy to csv if cfg['out'].get('path_csv'): fname = '{:%y%m%d_%H%M}-{:%d_%H%M}'.format( r.Index, r.DateEnd) + file_names_add(ifile) if not 'data_columns' in cfg['out']: cfg['out']['data_columns'] = slice(0, -1) # all cols df.to_csv( # [cfg['out']['data_columns']] cfg['out']['path_csv'] / fname, date_format=cfg['out']['text_date_format'], float_format='%5.6g', index_label='Time' ) # to_string, line_terminator='\r\n' # Log to screen (if not prohibited explicitly) if cfg['out']['log'].get('Date0') is not None and ( ('b_log_display' not in cfg['program']) or cfg['program']['b_log_display']): str_log = '{fileName}:\t{Date0:%d.%m.%Y %H:%M:%S}-' \ '{DateEnd:%d. %H:%M:%S%z}\t{rows}rows'.format_map( cfg['out']['log']) # \t{Lat}\t{Lon}\t{strOldVal}->\t{mag} l.info(str_log) else: str_log = str(cfg['out']['log'].get('rows', '0')) # Log to logfile if 'log' in cfg['program'].keys(): flog.writelines('\n' + str_log) if b_out_db_is_different: try: if cfg['out']['tables'] is not None: print('') if cfg['out']['b_remove_duplicates']: h5remove_duplicates(cfg['out'], cfg_table_keys=('tables', 'tables_log')) # Create full indexes. Must be done because of using ptprepack in h5move_tables() below l.debug('Create index') for tblName in (cfg['out']['tables'] + cfg['out']['tables_log']): try: cfg['out']['db'].create_table_index(tblName, columns=['index'], kind='full') except Exception as e: l.warning( ': table {}. Index not created - error'.format( tblName), '\n==> '.join( [s for s in e.args if isinstance(s, str)])) except Exception as e: l.exception('The end. There are error ') import traceback, code from sys import exc_info as sys_exc_info tb = sys_exc_info()[2] # type, value, traceback.print_exc() last_frame = lambda tb=tb: last_frame(tb.tb_next ) if tb.tb_next else tb frame = last_frame().tb_frame ns = dict(frame.f_globals) ns.update(frame.f_locals) code.interact(local=ns) finally: cfg['out']['db'].close() if cfg['program']['log']: flog.close() if cfg['out']['db'].is_open: print('Wait store is closing...') sleep(2) failed_storages = h5move_tables(cfg['out']) print('Finishing...' if failed_storages else 'Ok.', end=' ') h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages)
def main(new_arg=None): cfg = cfg_from_args(my_argparser(), new_arg) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program']['b_interact'], cfg_search_parent=cfg['out']) h5init(cfg['in'], cfg['out']) except Ex_nothing_done as e: print(e.message) exit() df_dummy = pd.DataFrame( np.full(1, np.NaN, dtype=np.dtype({ 'formats': ['float64', 'float64'], 'names': cfg['out']['tracks_cols'][1:] })), index=(pd.NaT, )) # used for insert separator lines if 'routes_cols' not in cfg['in']: cfg['in']['routes_cols'] = cfg['in']['waypoints_cols'] if 'routes_cols' not in cfg['out']: cfg['out']['routes_cols'] = cfg['out'][ 'waypoints_cols'] # cfg['in']['routes_cols'] # # Writing if True: # try: l.warning('processing ' + str(cfg['in']['nfiles']) + ' file' + 's:' if cfg['in']['nfiles'] > 1 else ':') cfg['out']['log'] = {} set_field_if_no(cfg['out'], 'table_prefix', PurePath(cfg['in']['path']).stem) cfg['out']['table_prefix'] = cfg['out']['table_prefix'].replace( '-', '') if len([t for t in cfg['out']['tables'] if len(t)]) > 1: cfg['out']['tables'] = \ [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables']] cfg['out']['tables_log'] = \ [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables_log']] tables = dict(zip(df_names, cfg['out']['tables'])) tables_log = dict(zip(df_names, cfg['out']['tables_log'])) # Can not save path to DB (useless?) so set for this max file name length: set_field_if_no(cfg['out'], 'logfield_fileName_len', 50) cfg['out']['index_level2_cols'] = cfg['in']['routes_cols'][0] # ############################################################### # ## Cumulate all data in cfg['out']['path_temp'] ################## ## Main circle ############################################################ for i1_file, path_gpx in h5_dispenser_and_names_gen( cfg['in'], cfg['out']): l.info('{}. {}: '.format(i1_file, path_gpx.name)) # Loading data dfs = gpxConvert(cfg, path_gpx) print('write', end=': ') sys_stdout.flush() for key, df in dfs.items(): if (not tables.get(key)) or df.empty: continue elif key == 'tracks': # Save last time to can filter next file cfg['in']['time_last'] = df.index[-1] sort_time = False if key in {'waypoints', 'routes'} else None # monkey patching if 'tracker' in tables[key]: # Also {} must be in tables[key]. todo: better key+'_fun_tracker' in cfg['out']? # Trackers processing trackers_numbers = { '0-3106432': '1', '0-2575092': '2', '0-3124620': '3', '0-3125300': '4', '0-3125411': '5', '0-3126104': '6' } tables_pattern = tables[key] tables_log_pattern = tables_log[key] df['comment'] = df['comment'].str.split(" @", n=1, expand=True)[0] # split data and save to multipe tables df_all = df.set_index(['comment', df.index]) for sn, n in trackers_numbers.items( ): # set(df_all.index.get_level_values(0)) try: df = df_all.loc[sn] except KeyError: continue # redefine saving parameters cfg['out']['table'] = tables_pattern.format( trackers_numbers[sn]) cfg['out']['table_log'] = tables_log_pattern.format( trackers_numbers[sn]) call_with_valid_kwargs(df_filter_and_save_to_h5, df**cfg, input=cfg['in'], sort_time=sort_time) else: cfg['out']['table'] = tables[key] cfg['out']['table_log'] = tables_log[key] call_with_valid_kwargs(df_filter_and_save_to_h5, df, **cfg, input=cfg['in'], sort_time=sort_time) # try: # if cfg['out']['b_remove_duplicates']: # for tbls in cfg['out']['tables_have_wrote']: # for tblName in tbls: # cfg['out']['db'][tblName].drop_duplicates(keep='last', inplace= True) # print('Create index', end=', ') # create_table_index calls create_table which docs sais "cannot index Time64Col() or ComplexCol" # so load it, index, then save # level2_index = None # df = cfg['out']['db'][tblName] # last commented # df.set_index([navp_all_index, level2_index]) # df.sort_index() # cfg['out']['db'][tblName].sort_index(inplace=True) # if df is not None: # resave # df_log = cfg['out']['db'][tblName] # cfg['out']['db'].remove(tbls[0]) # cfg['out']['db'][tbls[0]] = df # cfg['out']['db'][tbls[1]] = df_log try: pass except Exception as e: print('The end. There are error ', standard_error_info(e)) # import traceback, code # from sys import exc_info as sys_exc_info # # tb = sys_exc_info()[2] # type, value, # traceback.print_exc() # last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb # frame = last_frame().tb_frame # ns = dict(frame.f_globals) # ns.update(frame.f_locals) # code.interact(local=ns) # finally: # cfg['out']['db'].close() # failed_storages= h5move_tables(cfg['out'], cfg['out']['tables_have_wrote']) try: failed_storages = h5move_tables(cfg['out'], tbl_names=cfg['out'].get( 'tables_have_wrote', set())) print('Finishing...' if failed_storages else 'Ok.', end=' ') # Sort if have any processed data that needs it (not the case for the routes and waypoints), else don't because ``ptprepack`` not closes hdf5 source if it not finds data if cfg['in'].get('time_last'): cfg['out']['b_remove_duplicates'] = True h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages, tables=cfg['out'].get('tables_have_wrote', set())) except Ex_nothing_done: print('ok')
def main(new_arg=None, veusze=None, **kwargs): """ Initialise configuration and runs or returns routines cfg: ['program']['log'], 'out' 'in' 'async' globals: load_vsz l :param new_arg: :param veusze: used to reuse veusz embedded object (thus to not leak memory) :return: """ global l, load_vsz cfg = cfg_from_args(my_argparser(), new_arg, **kwargs) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) cfg['program']['log'] = l.root.handlers[ 0].baseFilename # sinchronize obtained absolute file name print('\n' + this_prog_basename(__file__), 'started', end=' ') __name__ = '__main__' # indicate to other functions that they are called from main if cfg['out'].get('paths'): if not cfg['out']['b_images_only']: raise NotImplementedError( 'Provided out in not "b_images_only" mode!') cfg['out']['nfiles'] = len(cfg['out']['paths']) cfg['out']['path'] = cfg['out']['paths'][0] print( end=f"\n- {cfg['out']['nfiles']} output files to export images...") pass else: if cfg['out']['b_images_only']: print( 'in images only mode. Output pattern: ') # todo Export path: ' else: print('. Output pattern and Data: ') try: # Using cfg['out'] to store pattern information if not Path(cfg['in']['pattern_path']).is_absolute(): cfg['in']['pattern_path'] = Path(cfg['in']['path']).with_name( str(cfg['in']['pattern_path'])) cfg['out']['path'] = cfg['in']['pattern_path'] cfg['out']['paths'], cfg['out']['nfiles'], cfg['out'][ 'path'] = init_file_names(**cfg['out'], b_interact=False) except Ex_nothing_done as e: if not cfg['out']['b_images_only']: l.warning( f'{e.message} - no pattern. Specify it or use "b_images_only" mode!' ) return # or raise FileNotFoundError? if (cfg['out']['b_images_only'] and cfg['out']['paths']): cfg['in']['paths'] = cfg['out']['paths'] # have all we need to export else: try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program'] ['b_interact']) except Ex_nothing_done as e: print(e.message) return # or raise FileNotFoundError? except TypeError: # expected str, bytes or os.PathLike object, not NoneType # cfg['in']['path'] is None. May be it is not need cfg['in']['paths'] = [cfg['in']['pattern_path'] ] # dummy for compatibility cfg['in']['nfiles'] = 1 cfg['out']['export_dir'] = dir_from_cfg(cfg['out']['path'].parent, cfg['out']['export_dir']) if 'restore_config' in cfg['program']['before_next']: cfg['in_saved'] = cfg['in'].copy() # Next is commented because reloading is Ok: not need to Close() # if cfg['out']['b_images_only'] and not 'Close()' in cfg['program']['before_next']: # cfg['program']['before_next'].append( # 'Close()') # usually we need to load new file for export (not only modify previous file) if cfg['program']['export_timeout_s'] and export_images_timed: cfg['async'] = { 'loop': asyncio.get_event_loop(), 'export_timeout_s': cfg['program']['export_timeout_s'] } else: cfg['async'] = {'loop': None} load_vsz = load_vsz_closure(cfg['program']['veusz_path'], cfg['program']['load_timeout_s'], cfg['program']['b_execute_vsz']) cfg['load_vsz'] = load_vsz cfg['co'] = {} if cfg['in']['table_log'] and cfg['in']['path'].suffix == '.h5' and not ( cfg['out']['b_images_only'] and len(cfg['in']['paths']) > 1): # load data by ranges from table log rows cfg['in']['db_path'] = cfg['in']['path'] in_fulls = h5log_names_gen(cfg['in']) elif cfg['in']['tables']: # tables instead files in_fulls = ge_names_from_hdf5_paths(cfg) else: # switch to use found vsz as source if need only export images (even with database source) in_fulls = ge_names(cfg) cor_savings = co_savings(cfg) cor_savings.send(None) nfiles = 0 try: # if True: path_prev = os_getcwd() os_chdir(cfg['out']['path'].parent) if cfg['program']['return'] == '<corutines_in_cfg>': cfg['co']['savings'] = cor_savings cfg['co']['gen_veusz_and_logs'] = load_to_veusz(in_fulls, cfg) cfg['co']['send_data'] = co_send_data(load_to_veusz, cfg, cor_savings) return cfg # return with link to generator function elif cfg['in'].get('data_yield_prefix'): # Cycle with obtaining Veusz data cfgin_update = None while True: # for vsz_data, log in cor_send_data.send(cfgin_update): try: vsz_data, log = co_send_data.send(cfgin_update) nfiles += 1 except (GeneratorExit, StopIteration, Ex_nothing_done): break if 'f_custom_in_cycle' in cfg['program']: cfgin_update = cfg['program']['f_custom_in_cycle']( vsz_data, log) else: # Cycle without obtaining Veusz data (or implemented by user's cfg['program']['f_custom_in_cycle']) for veusze, log in load_to_veusz(in_fulls, cfg, veusze): file_name_r = Path(log['out_vsz_full']).relative_to( cfg['out']['path'].parent) if cfg['program'].get('f_custom_in_cycle'): cfgin_update = cfg['program']['f_custom_in_cycle'](veusze, log) veusze_commands(veusze, cfgin_update, file_name_r) cor_savings.send((veusze, log)) nfiles += 1 cor_savings.close() if cfg['program']['return'] != '<embedded_object>': veusze = None # to note that it is closed in cor_savings.close() print(f'{nfiles} processed. ok>') pass except Exception as e: l.exception('Not good') return # or raise FileNotFoundError? finally: if cfg['async']['loop']: cfg['async']['loop'].close() os_chdir(path_prev) if veusze and cfg['program']['return'] == '<end>': veusze.Close() veusze.WaitForClose() veusze = None elif cfg['program']['return'] == '<embedded_object>': cfg['veusze'] = veusze return cfg
def main(new_arg=None, **kwargs): """ :param new_arg: list of strings, command line arguments :kwargs: dicts for each section: to overwrite values in them (overwrites even high priority values, other values remains) Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: 'csv2h5_nav_supervisor.ini' 'csv2h5_IdrRedas.ini' 'csv2h5_Idronaut.ini' :return: """ global l cfg = cfg_from_args(my_argparser(), new_arg, **kwargs) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) return () bOld_FF00FF = False # if 'TermGrunt' in sys.argv[1] FF00FF' in str(cfg['in']['path']): # 'TermGrunt.h5' ? args.path.endswith ('bin'): # bOld_FF00FF = True # cfg['in'].update({ # 'header': 'TERM', # 'dt_from_utc': timedelta(hours=-1), # 'fs': 1, 'b_time_fromtimestamp': True, # 'b_time_fromtimestamp_source': False}) # else: # 'Katran.h5' # cfg['in'].update({ # 'delimiter_hex': '000000E6', # 'header': 'P, Temp, Cond', # 'dt_from_utc': timedelta(hours=0), # 'fs': 10, 'b_time_fromtimestamp': False, # 'b_time_fromtimestamp_source': False}) set_field_if_no( cfg['in'], 'dtype', 'uint{:d}'.format(2**(3 + np.searchsorted( 2**np.array([3, 4, 5, 6, 7]) > np.array( 8 * (cfg['in']['data_word_len'] - 1)), 1)))) # Prepare cpecific format loading and writing set_field_if_no(cfg['in'], 'coltime', []) cfg['in'] = init_input_cols(cfg['in']) cfg['out']['names'] = np.array(cfg['in']['dtype'].names)[ \ cfg['in']['cols_loaded_save_b']] cfg['out']['formats'] = [ cfg['in']['dtype'].fields[n][0] for n in cfg['out']['names'] ] cfg['out']['dtype'] = np.dtype({ 'formats': cfg['out']['formats'], 'names': cfg['out']['names'] }) h5init(cfg['in'], cfg['out']) # cfg['Period'] = 1.0 / cfg['in']['fs'] # instead Second can use Milli / Micro / Nano: # cfg['pdPeriod'] = pd.to_timedelta(cfg['Period'], 's') # #pd.datetools.Second(cfg['Period'])\ # if 1 % cfg['in']['fs'] == 0 else\ # pd.datetools.Nano(cfg['Period'] * 1e9) # log table of loaded files. columns: Start time, file name, and its index in array off all loaded data: log_item = cfg['out']['log'] = { } # fields will have: 'fileName': None, 'fileChangeTime': None, 'rows': 0 strLog = '' # from collections import namedtuple # type_log_files = namedtuple('type_log_files', ['label','iStart']) # log.sort(axis=0, order='log_item['Date0']')#sort files by time dfLogOld, cfg['out']['db'], cfg['out'][ 'b_skip_if_up_to_date'] = h5temp_open(**cfg['out']) if 'log' in cfg['program'].keys(): f = open(PurePath(sys_argv[0]).parent / cfg['program']['log'], 'a', encoding='cp1251') f.writelines( datetime.now().strftime('\n\n%d.%m.%Y %H:%M:%S> processed ' + str(cfg['in']['nfiles']) + ' file' + 's:' if cfg['in']['nfiles'] > 1 else ':')) b_remove_duplicates = False # normally no duplicates but will if detect # Config specially for readBinFramed set_field_if_no(cfg['in'], 'b_byte_order_is_big_endian', True) set_field_if_no(cfg['in'], 'b_baklan', False) set_field_if_no(cfg['in'], 'b_time_fromtimestamp_source', False) cfg['out']['fs'] = cfg['in']['fs'] if True: ## Main circle ############################################################ for i1_file, path_in in h5_dispenser_and_names_gen( cfg['in'], cfg['out']): l.info('{}. {}: '.format(i1_file, path_in.name)) # Loading data if bOld_FF00FF: V = readFF00FF(path_in, cfg) iFrame = np.arange(len(V)) else: V, iFrame = readBinFramed(path_in, cfg['in']) if ('b_time_fromtimestamp' in cfg['in'] and cfg['in']['b_time_fromtimestamp']) or \ ('b_time_fromtimestamp_source' in cfg['in'] and cfg['in']['b_time_fromtimestamp_source']): path_in_rec = os_path.join( 'd:\\workData\\_source\\BalticSea\\151021_T1Grunt_Pregol\\_source\\not_corrected', os_path.basename(path_in)[:-3] + 'txt' ) if cfg['in']['b_time_fromtimestamp_source'] else path_in log_item['Date0'] = datetime.fromtimestamp( os_path.getmtime(path_in_rec)) # getctime is bad log_item['Date0'] -= iFrame[-1] * timedelta( seconds=1 / cfg['in']['fs'] ) # use for computer filestamp at end of recording else: log_item['Date0'] = datetime.strptime( path_in.stem, cfg['in']['filename2timestart_format']) log_item['Date0'] += cfg['in']['dt_from_utc'] tim = log_item['Date0'] + iFrame * timedelta( seconds=1 / cfg['in']['fs'] ) # tim = pd.date_range(log_item['Date0'], periods=np.size(V, 0), freq=cfg['pdPeriod']) df = pd.DataFrame( V.view(dtype=cfg['out']['dtype']), # np.uint16 columns=cfg['out']['names'], index=tim) # pd.DataFrame(V, columns=cfg['out']['names'], dtype=cfg['out']['formats'], index=tim) if df.empty: # log['rows']==0 print('No data => skip file') continue df, tim = set_filterGlobal_minmax(df, cfg_filter=cfg['filter'], log=log_item, dict_to_save_last_time=cfg['in']) if log_item['rows_filtered']: print('filtered out {}, remains {}'.format( log_item['rows_filtered'], log_item['rows'])) if not log_item['rows']: l.warning('no data! => skip file') continue elif log_item['rows']: print( '.', end='' ) # , divisions=d.divisions), divisions=pd.date_range(tim[0], tim[-1], freq='1D') else: l.warning('no data! => skip file') continue # Append to Store h5_append(cfg['out'], df.astype('int32'), log_item) if 'txt' in cfg['program'].keys(): # can be saved as text too np.savetxt(cfg['program']['txt'], V, delimiter='\t', newline='\n', header=cfg['in']['header'] + log_item['fileName'], fmt='%d', comments='') try: if b_remove_duplicates: for tblName in (cfg['out']['table'] + cfg['out']['tableLog_names']): cfg['out']['db'][tblName].drop_duplicates( keep='last', inplace=True) # subset='fileName',? if len(strLog): print('Create index', end=', ') for tblName in (cfg['out']['table'] + cfg['out']['tableLog_names']): cfg['out']['db'].create_table_index(tblName, columns=['index'], kind='full') else: print('done nothing') except Exception as e: l.exception('The end. There are error ') import traceback, code from sys import exc_info as sys_exc_info tb = sys_exc_info()[2] # type, value, traceback.print_exc() last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb frame = last_frame().tb_frame ns = dict(frame.f_globals) ns.update(frame.f_locals) code.interact(local=ns) # sort index if have any processed data (needed because ``ptprepack`` not closses hdf5 source if it not finds data) if cfg['in'].get('time_last'): failed_storages = h5move_tables(cfg['out']) print('Ok.', end=' ') h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages)
def main(new_arg=None, veusze=None): """ Note: if vsz data source have 'Ag_old_inv' variable then not invert coef. Else invert to use in vsz which not invert coefs :param new_arg: :return: """ global l p = veuszPropagate.my_argparser() p_groups = { g.title: g for g in p._action_groups if g.title.split(' ')[-1] != 'arguments' } # skips special argparse groups p_groups['in'].add( '--channels_list', help= 'channels needed zero calibration: "magnetometer" or "M" for magnetometer and any else for accelerometer, use "M, A" for both, empty to skip ' ) p_groups['in'].add( '--widget', help= 'path to Veusz widget property which contains coefficients. For example "/fitV(force)/grid1/graph/fit1/values"' ) p_groups['in'].add( '--data_for_coef', default='max_incl_of_fit_t', help= 'Veusz data to use as coef. If used with widget then this data is appended to data from widget' ) p_groups['out'].add('--out.path', help='path to db where write coef') p_groups['out'].add( '--re_tbl_from_vsz_name', help= 'regex to extract hdf5 table name from to Veusz file name (last used "\D*\d*")' # ? why not simly specify table name? ) # todo: "b_update_existed" arg will be used here for exported images. Check whether False works or prevent open vsz cfg = cfg_from_args(p, new_arg) if not Path(cfg['program']['log']).is_absolute(): cfg['program']['log'] = str( Path(__file__).parent.joinpath( cfg['program']['log'])) # l.root.handlers[0].baseFilename if not cfg: return if new_arg == '<return_cfg>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) veuszPropagate.l = l print('\n' + this_prog_basename(__file__), 'started', end=' ') if cfg['out']['b_images_only']: print('in images only mode.') try: print('Output pattern ') # Using cfg['out'] to store pattern information if not Path(cfg['in']['pattern_path']).is_absolute(): cfg['in']['pattern_path'] = str(cfg['in']['path'].parent.joinpath( cfg['in']['pattern_path'])) set_field_if_no(cfg['out'], 'path', cfg['in']['pattern_path']) cfg['out']['paths'], cfg['out']['nfiles'], cfg['out'][ 'path'] = init_file_names(**cfg['out'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message, ' - no pattern') return # or raise FileNotFoundError? try: print(end='Data ') cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names( **cfg['in'], b_interact=False) # do not bother user 2nd time except Ex_nothing_done as e: print(e.message) return # or raise FileNotFoundError? if not cfg['out']['export_dir']: cfg['out']['export_dir'] = Path(cfg['out']['path']).parent if cfg['program']['before_next'] and 'restore_config' in cfg['program'][ 'before_next']: cfg['in_saved'] = cfg['in'].copy() # cfg['loop'] = asyncio.get_event_loop() # cfg['export_timeout_s'] = 600 cfg['out']['export_dir'] = dir_from_cfg(cfg['out']['path'].parent, cfg['out']['export_dir']) veuszPropagate.load_vsz = veuszPropagate.load_vsz_closure( cfg['program']['veusz_path'], b_execute_vsz=cfg['program']['b_execute_vsz']) gen_veusz_and_logs = veuszPropagate.load_to_veusz( veuszPropagate.ge_names(cfg), cfg, veusze) names_get = ['Inclination_mean_use1', 'logVext1_m__s' ] # \, 'Inclination_mean_use2', 'logVext2_m__s' names_get_fits = ['fit'] # , 'fit2' vsz_data = {n: [] for n in names_get} for n in names_get_fits: vsz_data[n] = [] # prepare collecting all coef in text also names_get_txt_results = ['fit1result'] # , 'fit2result' txt_results = {n: {} for n in names_get_txt_results} i_file = 0 for veusze, log in gen_veusz_and_logs: if not veusze: continue i_file += 1 print(i_file) if cfg['out']['re_tbl_from_vsz_name']: table = cfg['out']['re_tbl_from_vsz_name'].match( log['out_name']).group() else: table = re.sub( '^[\d_]*', '', log['out_name']) # delete all first digits (date part) for n in names_get: vsz_data[n].append(veusze.GetData(n)[0]) for n in [cfg['in']['data_for_coef']]: vsz_data[n] = list(veusze.GetData(n)[0]) # Save velocity coefficients into //{table}//coef//Vabs{i} where i - fit number enumeretad from 0 for i, name_out in enumerate(names_get_fits): # ['fit1', 'fit2'] coef = veusze.Get( cfg['in']['widget'] ) # veusze.Root['fitV(inclination)']['grid1']['graph'][name_out].values.val if 'a' in coef: coef_list = [ coef[k] for k in ['d', 'c', 'b', 'a'] if k in coef ] else: coef_list = [ coef[k] for k in sorted(coef.keys(), key=digits_first) ] if cfg['in']['data_for_coef']: coef_list += vsz_data[cfg['in']['data_for_coef']] vsz_data[name_out].append(coef_list) h5copy_coef(None, cfg['out']['path'], table, dict_matrices={ f'//coef//Vabs{i}': coef_list, f'//coef//date': np.float64([ np.NaN, np.datetime64(datetime.now()).astype(np.int64) ]) }) # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list) txt_results[names_get_txt_results[i]][table] = str(coef) # Zeroing matrix - calculated in Veusz by rotation on old0pitch old0roll Rcor = veusze.GetData( 'Rcor' )[0] # zeroing angles tuned by "USEcalibr0V_..." in Veusz Custom definitions if len(cfg['in']['channels']): l.info( 'Applying zero calibration matrix of peach = {} and roll = {} degrees' .format(np.rad2deg(veusze.GetData('old0pitch')[0][0]), np.rad2deg(veusze.GetData('old0roll')[0][0]))) with h5py.File(cfg['out']['path'], 'a') as h5: for channel in cfg['in']['channels']: (col_str, coef_str) = channel_cols(channel) # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list), dict_matrices={'//coef//' + coef_str + '//A': coefs[tbl][channel]['A'], '//coef//' + coef_str + '//C': coefs[tbl][channel]['b']}) # Currently used inclinometers have electronics rotated on 180deg. Before we inserted additional # rotation operation in Veusz by inverting A_old. Now we want iclude this information in database coef only. try: # Checking that A_old_inv exist A_old_inv = veusze.GetData('Ag_old_inv') is_old_used = True # Rcor is not account for electronic is rotated. except KeyError: is_old_used = False # Rcor is account for rotated electronic. if is_old_used: # The rotation is done in vsz (A_old in vsz is inverted) so need rotate it back to # use in vsz without such invertion # Rotate on 180 deg (note: this is not inversion) A_old_inv = h5[f'//{table}//coef//{coef_str}//A'][...] A_old = np.dot(A_old_inv, [[1, 0, 0], [0, -1, 0], [0, 0, -1] ]) # adds 180 deg to roll else: A_old = h5[f'//{table}//coef//{coef_str}//A'][...] # A_old now accounts for rotated electronic A = np.dot(Rcor, A_old) h5copy_coef(None, h5, table, dict_matrices={f'//coef//{coef_str}//A': A}) # veusze.Root['fitV(inclination)']['grid1']['graph2'][name_out].function.val print(vsz_data) veuszPropagate.export_images( veusze, cfg['out'], f"_{log['out_name']}", b_skip_if_exists=not cfg['out']['b_update_existed']) # vsz_data = veusz_data(veusze, cfg['in']['data_yield_prefix']) # # caller do some processing of data and gives new cfg: # cfgin_update = yield(vsz_data, log) # to test run veusze.Save('-.vsz') # cfg['in'].update(cfgin_update) # only update of cfg.in.add_custom_expressions is tested # if cfg['in']['add_custom']: # for n, e in zip(cfg['in']['add_custom'], cfg['in']['add_custom_expressions']): # veusze.AddCustom('definition', n, e, mode='replace') # #cor_savings.send((veusze, log)) # # # # # veusze.Save(str(path_vsz_save), mode='hdf5') # veusze.Save(str(path_vsz_save)) saves time with bad resolution print(f'Ok') print(txt_results) for n in names_get: pd.DataFrame.from_dict( dict(zip(list(txt_results['fit1result'].keys()), vsz_data[n]))).to_csv( Path(cfg['out']['path']).with_name( f'average_for_fitting-{n}.txt'), sep='\t', header=txt_results['fit1result'].keys, mode='a') return {**vsz_data, 'veusze': veusze}
def main(new_arg=None, **kwargs): """ Accumulats results of differen source tables in 2D netcdf matrices of each result parameter. :param new_arg: :return: Spectrum parameters used (taken from nitime/algorithems/spectral.py): NW : float, by default set to 4: that corresponds to bandwidth of 4 times the fundamental frequency The normalized half-bandwidth of the data tapers, indicating a multiple of the fundamental frequency of the DFT (Fs/N). Common choices are n/2, for n >= 4. This parameter is unitless and more MATLAB compatible. As an alternative, set the BW parameter in Hz. See Notes on bandwidth. BW : float The sampling-relative bandwidth of the data tapers, in Hz. adaptive : {True/False} Use an adaptive weighting routine to combine the PSD estimates of different tapers. low_bias : {True/False} Rather than use 2NW tapers, only use the tapers that have better than 90% spectral concentration within the bandwidth (still using a maximum of 2NW tapers) Notes ----- The bandwidth of the windowing function will determine the number tapers to use. This parameters represents trade-off between frequency resolution (lower main lobe BW for the taper) and variance reduction (higher BW and number of averaged estimates). Typically, the number of tapers is calculated as 2x the bandwidth-to-fundamental-frequency ratio, as these eigenfunctions have the best energy concentration. Result file is nc format that is Veusz compatible hdf5 format. If file exists it will be overwited todo: best may be is use DBMT: Dynamic Bayesian Multitaper (matlab code downloaded from git) """ global l cfg = cfg_from_args(my_argparser(), new_arg) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) l = logging.getLogger(prog) multitaper.warn = l.warning # module is not installed but copied. so it can not import this dependace try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**{ **cfg['in'], 'path': cfg['in']['db_path'] }, b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) return () print('\n' + prog, end=' started. ') cfg['in']['columns'] = ['Ve', 'Vn', 'Pressure'] # minimum time between blocks, required in filt_data_dd() for data quality control messages: cfg['in'][ 'dt_between_bursts'] = None # If None report any interval bigger then min(1st, 2nd) cfg['in']['dt_hole_warning'] = np.timedelta64(2, 's') cfg_out = cfg['out'] if 'split_period' in cfg['out']: cfg['proc']['dt_interval'] = np.timedelta64( cfg['proc']['dt_interval'] if cfg['proc']['dt_interval'] else pd_period_to_timedelta(cfg['out']['split_period'])) if (not cfg['proc']['overlap']) and \ (cfg['proc']['dt_interval'] == np.timedelta64(pd_period_to_timedelta(cfg['out']['split_period']))): cfg['proc']['overlap'] = 0.5 else: cfg['proc']['dt_interval'] = np.timedelta64(cfg['proc']['dt_interval']) # cfg['proc']['dt_interval'] = np.timedelta64('5', 'm') * 24 cfg['proc']['time_intervals_start'] = np.array( cfg['proc']['time_intervals_center'], np.datetime64) - cfg['proc']['dt_interval'] / 2 cfg_out['chunksize'] = cfg['in']['chunksize'] h5init(cfg['in'], cfg_out) # cfg_out_table = cfg_out['table'] need? save because will need to change cfg_out['save_proc_tables'] = True # False # cfg['proc'] = {} prm = cfg['proc'] prm['adaptive'] = True # pmtm spectrum param prm['fs'] = cfg['in']['fs'] prm['bandwidth'] = 8 / cfg['proc']['dt_interval'].astype( 'timedelta64[s]').astype( 'float' ) # 8 * 2 * prm['fs']/34000 # 4 * 2 * 5/34000 ~= 4 * 2 * fs / N prm['low_bias'] = True nc_root = netCDF4.Dataset( Path(cfg_out['db_path']).with_suffix('.nc'), 'w', format='NETCDF4' ) # (for some types may need 'NETCDF4_CLASSIC' to use CLASSIC format for Views compability) nc_psd = nc_root.createGroup(cfg_out['table']) nc_psd.createDimension('time', None) nc_psd.createDimension('value', 1) nc_psd.createVariable('time_good_min', 'f8', ('value', )) nc_psd.createVariable('time_good_max', 'f8', ('value', )) nc_psd.createVariable('time_interval', 'f4', ('value', )) if cfg['out'].get('split_period'): # nv_time_interval = nc_psd.createVariable('time_interval', 'f8', ('time',), zlib=False) nc_psd.variables['time_interval'][:] = pd_period_to_timedelta( cfg['out']['split_period']).delta else: nc_psd.variables['time_interval'][:] = cfg['proc']['dt_interval'] # Dataframe of accumulating results: adding result columns in cycle with appending source table name to column names dfs_all = None # Initialasing variables to search data time range of calculated time_good_min = pd.Timestamp.max time_good_max = pd.Timestamp.min prm['length'] = None nv_vars_for_tbl = {} tbl_prev = '' itbl = 0 for df, tbl_in, dataname in h5_velocity_by_intervals_gen(cfg, cfg_out): tbl = tbl_in.replace('incl', '_i') # _, (df, tbl, dataname) in h5_dispenser_and_names_gen(cfg['in'], cfg_out, fun_gen=h5_velocity_by_intervals_gen): # interpolate to regular grid df = df.resample(timedelta(seconds=1 / prm['fs'])).interpolate() len_data_cur = df.shape[0] if tbl_prev != tbl: itbl += 1 l.info('%s: len=%s', dataname, len_data_cur) l.info(' %s. Writing to "%s"', itbl, tbl) # Prepare if prm['length'] is None: # 1st time prm['length'] = len_data_cur prm.update( psd_mt_params(**prm, dt=float(np.median(np.diff(df.index.values))) / 1e9)) nc_psd.createDimension('freq', len(prm['freqs'])) # nv_... - variables to be used as ``NetCDF variables`` nv_freq = nc_psd.createVariable('freq', 'f4', ('freq', ), zlib=True) nv_freq[:] = prm['freqs'] check_fs = 1e9 / np.median(np.diff(df.index.values)).item() if prm.get('fs'): np.testing.assert_almost_equal(prm['fs'], check_fs, decimal=7, err_msg='', verbose=True) else: prm['fs'] = check_fs elif prm['length'] != len_data_cur: prm['length'] = len_data_cur try: prm['dpss'], prm['eigvals'], prm['adaptive_if_can'] = \ multitaper._compute_mt_params(prm['length'], prm['fs'], prm['bandwidth'], prm['low_bias'], prm['adaptive']) except (ModuleNotFoundError, ValueError) as e: # l.error() already reported as multitaper.warn is reassignred to l.warning() prm['eigvals'] = np.int32([0]) prm['weights'] = np.sqrt(prm['eigvals'])[np.newaxis, :, np.newaxis] # l.warning('new length (%s) is different to last (%s)', len_data_cur, prm['length']) if tbl not in nc_psd.groups: nc_tbl = nc_psd.createGroup(tbl) cols = set() if 'Pressure' in df.columns: cols.add('Pressure') nc_tbl.createVariable('Pressure', 'f4', ( 'time', 'freq', ), zlib=True) if 'Ve' in df.columns: cols.update(['Ve', 'Vn']) nc_tbl.createVariable('Ve', 'f4', ( 'time', 'freq', ), zlib=True) nc_tbl.createVariable('Vn', 'f4', ( 'time', 'freq', ), zlib=True) nc_tbl.createVariable('time_start', 'f8', ('time', ), zlib=True) nc_tbl.createVariable('time_end', 'f8', ('time', ), zlib=True) out_row = 0 nc_tbl.variables['time_start'][out_row], nc_tbl.variables['time_end'][ out_row] = df.index[[0, -1]].values # Calculate PSD if prm['eigvals'].any(): for var_name in cols: nc_tbl.variables[var_name][ out_row, :] = call_with_valid_kwargs( psd_mt, df[var_name], **prm)[0, :] if time_good_min.to_numpy('<M8[ns]') > df.index[0].to_numpy( '<M8[ns]' ): # to_numpy() get values to avoid tz-naive/aware comparing restrictions time_good_min = df.index[0] if time_good_max.to_numpy('<M8[ns]') < df.index[-1].to_numpy( '<M8[ns]'): time_good_max = df.index[-1] else: for var_name in cols: nc_tbl.variables[var_name][out_row, :] = np.NaN out_row += 1 # if cfg_out['save_proc_tables']: # # ds_psd.to_netcdf('d:\\WorkData\\BlackSea\\190210\\190210incl_proc-psd_test.nc', format='NETCDF4_CLASSIC') # #f.to_hdf('d:\\WorkData\\BlackSea\\190210\\190210incl_proc-psd_test.h5', 'psd', format='fixed') # # tables_have_write.append(tbl) # try: # h5_append_to(df_psd, tbl, cfg_out, msg='save (temporary)', print_ok=None) # except HDF5ExtError: # cfg_out['save_proc_tables'] = False # l.warning('too very many colums for "table" format but "fixed" is not updateble so store result in memory 1st') # # # # df_cur = df_psd[['PSD_Vn', 'PSD_Ve']].rename( # columns={'PSD_Ve': 'PSD_Ve' + tbl[-2:], 'PSD_Vn': 'PSD_Vn' + tbl[-2:]}).compute() # if dfs_all is None: # dfs_all = df_cur # else: # dfs_all = dfs_all.join(df_cur, how='outer') # , rsuffix=tbl[-2:] join not works on dask # if itbl == len(cfg['in']['tables']): # after last cycle. Need incide because of actions when exit generator # h5_append_to(dfs_all, cfg_out_table, cfg_out, msg='save accumulated data', print_ok='Ok.') # nv_time_start_query = nc_psd.createVariable('time_start_query', 'f8', ('time',), zlib=True) # nv_time_start_query[:] = cfg['in']['time_intervals_start'].to_numpy(dtype="datetime64[ns]") \ # if isinstance(cfg['in']['time_intervals_start'], pd.DatetimeIndex) else cfg['in']['time_intervals_start'] nc_psd.variables['time_good_min'][:] = np.array(time_good_min.value, 'M8[ns]') nc_psd.variables['time_good_max'][:] = np.array(time_good_max.value, 'M8[ns]') # failed_storages = h5move_tables(cfg_out) print('Ok.', end=' ') nc_root.close()
.format(*np.fromstring( txtYY_M_D_h_m_s_f, dtype=np.uint8, count=6, sep=','))) } cfg['in']['dt_from_utc'] = timedelta(0) cfg['in']['skiprows'] = 0 # cfg['in']['comments'], cfg['in']['coltime'] = 1 cfg['in']['b_raise_on_err'] = True except IOError as e: print('\n==> '.join([s for s in e.args if isinstance(s, str)])) # e.message raise (e) try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) exit() # Assign castom prep&proc based on args.cfgFile name ####################### fun_proc_loaded = None # Assign default proc below column assinment # if cfg['in']['cfgFile'].endswith('ADCP_WH'): # fun_proc_loaded = proc_loaded_ADCP_WH # Default time postload proc if fun_proc_loaded is None: if 'coldate' not in cfg['in']: # Time includes Date fun_proc_loaded = lambda a, cfg_in: \ a[cfg['in']['col_index_name']] else: # Time + Date fun_proc_loaded = lambda a, cfg_in: a['Date'] + np.array( np.int32(1000 * a[cfg['in']['col_index_name']]),