def h5move_and_sort(out): """ Moves from temporary storage and sorts `tables_have_wrote` tables and clears this list :param out: fields: tables_have_wrote: set of tuples of str, table names :return: """ failed_storages = h5move_tables(out, tbl_names=out['tables_have_wrote']) print('Finishing...' if failed_storages else 'Ok.', end=' ') # Sort if have any processed data, else don't because ``ptprepack`` not closes hdf5 source if it not finds data out['b_remove_duplicates'] = True h5index_sort(out, out_storage_name=f"{out['db_path'].stem}-resorted.h5", in_storages=failed_storages, tables=out['tables_have_wrote']) out['tables_have_wrote'] = set()
def do(cfg): """ :param new_arg: list of strings, command line arguments Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: 'csv2h5_nav_supervisor.ini' 'csv2h5_IdrRedas.ini' 'csv2h5_Idronaut.ini' :return: """ h5init(cfg.input, cfg.out) # OmegaConf.update(cfg, "in", cfg.input, merge=False) # error # to allow non primitive types (cfg.out['db']) and special words field names ('in'): cfg = OmegaConf.to_container(cfg) cfg['in'] = cfg.pop('input') cfg['in']['dt_from_utc'] = 0 cfg['out']['table_log'] = f"{cfg['out']['table']}/logFiles" cfg['out']['b_insert_separator'] = False in_cols = ('datetime', 'latitude', 'longitude', 'depth_meters') out_cols = ('Time', 'Lat', 'Lon', 'DepEcho') ## Main circle ############################################################ for i1_file, file in h5_dispenser_and_names_gen(cfg['in'], cfg['out']): lf.info('{}. {}: ', i1_file, file.name) # Loading data df = load_nmea(file, in_cols) df_rename_cols(df, in_cols, out_cols) lf.info('write: ') call_with_valid_kwargs(df_filter_and_save_to_h5, df, **cfg, input=cfg['in']) failed_storages = h5move_tables(cfg['out'], tbl_names=cfg['out']['tables_have_wrote']) print('Finishing...' if failed_storages else 'Ok.', end=' ') # Sort if have any processed data, else don't because ``ptprepack`` not closes hdf5 source if it not finds data if cfg['in'].get('time_last'): cfg['out']['b_remove_duplicates'] = True h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages, tables=cfg['out']['tables_have_wrote'])
def main(new_arg=None): """ :param new_arg: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: 'csv2h5_nav_supervisor.ini' 'csv2h5_IdrRedas.ini' 'csv2h5_Idronaut.ini' :return: """ global l cfg = cfg_from_args(my_argparser(), new_arg) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**{ **cfg['in'], 'path': cfg['in']['db_path'] }, b_interact=cfg['program']['b_interact']) set_field_if_no( cfg['in'], 'tables_log', '{}/logFiles' ) # will be filled by each table from cfg['in']['tables'] cfg['in']['query'] = query_time_range(**cfg['in']) set_field_if_no(cfg['out'], 'db_path', cfg['in']['db_path']) # cfg['out'] = init_file_names(cfg['out'], , path_field='db_path') except Ex_nothing_done as e: print(e.message) return () # args = parser.parse_args() # args.verbose= args.verbose[0] # try: # cfg= ini2dict(args.cfgFile) # cfg['in']['cfgFile']= args.cfgFile # except IOError as e: # print('\n==> '.join([a for a in e.args if isinstance(a,str)])) #e.message # raise(e) # Open text log if 'log' in cfg['program'].keys(): dir_create_if_need(os_path.dirname(cfg['program']['log'])) flog = open(cfg['program']['log'], 'a+', encoding='cp1251') cfg['out']['log'] = OrderedDict({'fileName': None, 'fileChangeTime': None}) # Prepare saving to csv if 'file_names_add_fun' in cfg['out']: file_names_add = eval( compile(cfg['out']['file_names_add_fun'], '', 'eval')) else: file_names_add = lambda i: '.csv' # f'_{i}.csv' # Prepare data for output store and open it if cfg['out']['tables'] == ['None']: # will not write new data table and its log cfg['out']['tables'] = None # cfg['out']['tables_log'] = None # for _runs cfg will be redefined (this only None case that have sense?) h5init(cfg['in'], cfg['out']) # store, dfLogOld = h5temp_open(**cfg['out']) cfg_fileN = os_path.splitext(cfg['in']['cfgFile'])[0] out_tables_log = cfg['out'].get('tables_log') if cfg_fileN.endswith('_runs') or (bool(out_tables_log) and 'logRuns' in out_tables_log[0]): # Will calculate only after filter # todo: calculate derived parameters before were they are bad (or replace all of them if any bad?) func_before_cycle = lambda x: None func_before_filter = lambda df, log_row, cfg: df func_after_filter = lambda df, cfg: log_runs(df, cfg, cfg['out']['log'] ) # this table will be added: cfg['out']['tables_log'] = [cfg['out']['tables'][0] + '/logRuns'] cfg['out'][ 'b_log_ready'] = True # to not apdate time range in h5_append() # Settings to not affect main data table and switch off not compatible options: cfg['out']['tables'] = [] cfg['out'][ 'b_skip_if_up_to_date'] = False # todo: If False check it: need delete all previous result of CTD_calc() or set min_time > its last log time. True not implemented? cfg['program'][ 'b_log_display'] = False # can not display multiple rows log if 'b_save_images' in cfg['extract_runs']: cfg['extract_runs']['path_images'] = cfg['out'][ 'db_path'].with_name('_subproduct') dir_create_if_need(cfg['extract_runs']['path_images']) else: if 'brown' in cfg_fileN.lower(): func_before_cycle = load_coef if 'Lat' in cfg['in']: func_before_filter = lambda *args, **kwargs: add_ctd_params( process_brown(*args, **kwargs), kwargs['cfg']) else: func_before_filter = process_brown else: func_before_cycle = lambda x: None def ctd_coord_and_params(df: pd.DataFrame, log_row, cfg): coord_data_col_ensure(df, log_row) return add_ctd_params(df, cfg) func_before_filter = ctd_coord_and_params func_after_filter = lambda df, cfg: df # nothing after filter func_before_cycle(cfg) # prepare: usually assign data to cfg['for'] if cfg['out'].get('path_csv'): dir_create_if_need(cfg['out']['path_csv']) # Load data Main circle ######################################### # Open input store and cicle through input table log records qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')" iSt = 1 dfLogOld, cfg['out']['db'], cfg['out'][ 'b_skip_if_up_to_date'] = h5temp_open(**cfg['out']) b_out_db_is_different = cfg['out']['db'] is not None and cfg['out'][ 'db_path_temp'] != cfg['in']['db_path'] # Cycle for each table, for each row in log: # for path_csv in gen_names_and_log(cfg['out'], dfLogOld): with FakeContextIfOpen( lambda f: pd.HDFStore(f, mode='r'), cfg['in']['db_path'], None if b_out_db_is_different else cfg['out']['db'] ) as cfg['in']['db']: # not opens ['in']['db'] if already opened to write for tbl in cfg['in']['tables']: if False: # Show table info nodes = sorted( cfg['out']['db'].root.__members__) # , key=number_key print(nodes) print(tbl, end='. ') df_log = cfg['in']['db'].select(cfg['in']['tables_log'].format(tbl) or tbl, where=cfg['in']['query']) if True: # try: if 'log' in cfg['program'].keys(): nRows = df_log.rows.size flog.writelines(datetime.now().strftime( '\n\n%d.%m.%Y %H:%M:%S> processed ') + f'{nRows} row' + ('s:' if nRows > 1 else ':')) for ifile, r in enumerate(df_log.itertuples(), start=iSt): # name=None print('.', end='') sys_stdout.flush() path_raw = PurePath(r.fileName) cfg['out']['log'].update(fileName=path_raw.name, fileChangeTime=r.fileChangeTime) # save current state cfg['in']['file_stem'] = cfg['out']['log'][ 'fileName'] # for exmple to can extract date in subprogram cfg['in']['fileChangeTime'] = cfg['out']['log'][ 'fileChangeTime'] if cfg['in']['b_skip_if_up_to_date']: have_older_data, have_duplicates = h5del_obsolete( cfg['out'], cfg['out']['log'], dfLogOld) if have_older_data: continue if have_duplicates: cfg['out']['b_remove_duplicates'] = True print('{}. {}'.format(ifile, path_raw.name), end=': ') # Load data qstr = qstr_trange_pattern.format(r.Index, r.DateEnd) df_raw = cfg['in']['db'].select(tbl, qstr) cols = df_raw.columns.tolist() # cfg['in']['lat'] and ['lon'] may be need in add_ctd_params() if Lat not in df_raw if 'Lat_en' in df_log.columns and 'Lat' not in cols: cfg['in']['lat'] = np.nanmean((r.Lat_st, r.Lat_en)) cfg['in']['lon'] = np.nanmean((r.Lon_st, r.Lon_en)) df = func_before_filter(df_raw, log_row=r, cfg=cfg) if df.size: # size is zero means save only log but not data # filter, updates cfg['out']['log']['rows'] df, _ = set_filterGlobal_minmax( df, cfg['filter'], cfg['out']['log']) if 'rows' not in cfg['out']['log']: l.warning('no data!') continue elif isinstance(cfg['out']['log']['rows'], int): print('filtered out {rows_filtered}, remains {rows}'. format_map(cfg['out']['log'])) if cfg['out']['log']['rows']: print('.', end='') else: l.warning('no data!') continue df = func_after_filter(df, cfg=cfg) # Append to Store h5_append(cfg['out'], df, cfg['out']['log'], log_dt_from_utc=cfg['in']['dt_from_utc']) # Copy to csv if cfg['out'].get('path_csv'): fname = '{:%y%m%d_%H%M}-{:%d_%H%M}'.format( r.Index, r.DateEnd) + file_names_add(ifile) if not 'data_columns' in cfg['out']: cfg['out']['data_columns'] = slice(0, -1) # all cols df.to_csv( # [cfg['out']['data_columns']] cfg['out']['path_csv'] / fname, date_format=cfg['out']['text_date_format'], float_format='%5.6g', index_label='Time' ) # to_string, line_terminator='\r\n' # Log to screen (if not prohibited explicitly) if cfg['out']['log'].get('Date0') is not None and ( ('b_log_display' not in cfg['program']) or cfg['program']['b_log_display']): str_log = '{fileName}:\t{Date0:%d.%m.%Y %H:%M:%S}-' \ '{DateEnd:%d. %H:%M:%S%z}\t{rows}rows'.format_map( cfg['out']['log']) # \t{Lat}\t{Lon}\t{strOldVal}->\t{mag} l.info(str_log) else: str_log = str(cfg['out']['log'].get('rows', '0')) # Log to logfile if 'log' in cfg['program'].keys(): flog.writelines('\n' + str_log) if b_out_db_is_different: try: if cfg['out']['tables'] is not None: print('') if cfg['out']['b_remove_duplicates']: h5remove_duplicates(cfg['out'], cfg_table_keys=('tables', 'tables_log')) # Create full indexes. Must be done because of using ptprepack in h5move_tables() below l.debug('Create index') for tblName in (cfg['out']['tables'] + cfg['out']['tables_log']): try: cfg['out']['db'].create_table_index(tblName, columns=['index'], kind='full') except Exception as e: l.warning( ': table {}. Index not created - error'.format( tblName), '\n==> '.join( [s for s in e.args if isinstance(s, str)])) except Exception as e: l.exception('The end. There are error ') import traceback, code from sys import exc_info as sys_exc_info tb = sys_exc_info()[2] # type, value, traceback.print_exc() last_frame = lambda tb=tb: last_frame(tb.tb_next ) if tb.tb_next else tb frame = last_frame().tb_frame ns = dict(frame.f_globals) ns.update(frame.f_locals) code.interact(local=ns) finally: cfg['out']['db'].close() if cfg['program']['log']: flog.close() if cfg['out']['db'].is_open: print('Wait store is closing...') sleep(2) failed_storages = h5move_tables(cfg['out']) print('Finishing...' if failed_storages else 'Ok.', end=' ') h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages)
def main(new_arg=None): cfg = cfg_from_args(my_argparser(), new_arg) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program']['b_interact'], cfg_search_parent=cfg['out']) h5init(cfg['in'], cfg['out']) except Ex_nothing_done as e: print(e.message) exit() df_dummy = pd.DataFrame( np.full(1, np.NaN, dtype=np.dtype({ 'formats': ['float64', 'float64'], 'names': cfg['out']['tracks_cols'][1:] })), index=(pd.NaT, )) # used for insert separator lines if 'routes_cols' not in cfg['in']: cfg['in']['routes_cols'] = cfg['in']['waypoints_cols'] if 'routes_cols' not in cfg['out']: cfg['out']['routes_cols'] = cfg['out'][ 'waypoints_cols'] # cfg['in']['routes_cols'] # # Writing if True: # try: l.warning('processing ' + str(cfg['in']['nfiles']) + ' file' + 's:' if cfg['in']['nfiles'] > 1 else ':') cfg['out']['log'] = {} set_field_if_no(cfg['out'], 'table_prefix', PurePath(cfg['in']['path']).stem) cfg['out']['table_prefix'] = cfg['out']['table_prefix'].replace( '-', '') if len([t for t in cfg['out']['tables'] if len(t)]) > 1: cfg['out']['tables'] = \ [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables']] cfg['out']['tables_log'] = \ [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables_log']] tables = dict(zip(df_names, cfg['out']['tables'])) tables_log = dict(zip(df_names, cfg['out']['tables_log'])) # Can not save path to DB (useless?) so set for this max file name length: set_field_if_no(cfg['out'], 'logfield_fileName_len', 50) cfg['out']['index_level2_cols'] = cfg['in']['routes_cols'][0] # ############################################################### # ## Cumulate all data in cfg['out']['path_temp'] ################## ## Main circle ############################################################ for i1_file, path_gpx in h5_dispenser_and_names_gen( cfg['in'], cfg['out']): l.info('{}. {}: '.format(i1_file, path_gpx.name)) # Loading data dfs = gpxConvert(cfg, path_gpx) print('write', end=': ') sys_stdout.flush() for key, df in dfs.items(): if (not tables.get(key)) or df.empty: continue elif key == 'tracks': # Save last time to can filter next file cfg['in']['time_last'] = df.index[-1] sort_time = False if key in {'waypoints', 'routes'} else None # monkey patching if 'tracker' in tables[key]: # Also {} must be in tables[key]. todo: better key+'_fun_tracker' in cfg['out']? # Trackers processing trackers_numbers = { '0-3106432': '1', '0-2575092': '2', '0-3124620': '3', '0-3125300': '4', '0-3125411': '5', '0-3126104': '6' } tables_pattern = tables[key] tables_log_pattern = tables_log[key] df['comment'] = df['comment'].str.split(" @", n=1, expand=True)[0] # split data and save to multipe tables df_all = df.set_index(['comment', df.index]) for sn, n in trackers_numbers.items( ): # set(df_all.index.get_level_values(0)) try: df = df_all.loc[sn] except KeyError: continue # redefine saving parameters cfg['out']['table'] = tables_pattern.format( trackers_numbers[sn]) cfg['out']['table_log'] = tables_log_pattern.format( trackers_numbers[sn]) call_with_valid_kwargs(df_filter_and_save_to_h5, df**cfg, input=cfg['in'], sort_time=sort_time) else: cfg['out']['table'] = tables[key] cfg['out']['table_log'] = tables_log[key] call_with_valid_kwargs(df_filter_and_save_to_h5, df, **cfg, input=cfg['in'], sort_time=sort_time) # try: # if cfg['out']['b_remove_duplicates']: # for tbls in cfg['out']['tables_have_wrote']: # for tblName in tbls: # cfg['out']['db'][tblName].drop_duplicates(keep='last', inplace= True) # print('Create index', end=', ') # create_table_index calls create_table which docs sais "cannot index Time64Col() or ComplexCol" # so load it, index, then save # level2_index = None # df = cfg['out']['db'][tblName] # last commented # df.set_index([navp_all_index, level2_index]) # df.sort_index() # cfg['out']['db'][tblName].sort_index(inplace=True) # if df is not None: # resave # df_log = cfg['out']['db'][tblName] # cfg['out']['db'].remove(tbls[0]) # cfg['out']['db'][tbls[0]] = df # cfg['out']['db'][tbls[1]] = df_log try: pass except Exception as e: print('The end. There are error ', standard_error_info(e)) # import traceback, code # from sys import exc_info as sys_exc_info # # tb = sys_exc_info()[2] # type, value, # traceback.print_exc() # last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb # frame = last_frame().tb_frame # ns = dict(frame.f_globals) # ns.update(frame.f_locals) # code.interact(local=ns) # finally: # cfg['out']['db'].close() # failed_storages= h5move_tables(cfg['out'], cfg['out']['tables_have_wrote']) try: failed_storages = h5move_tables(cfg['out'], tbl_names=cfg['out'].get( 'tables_have_wrote', set())) print('Finishing...' if failed_storages else 'Ok.', end=' ') # Sort if have any processed data that needs it (not the case for the routes and waypoints), else don't because ``ptprepack`` not closes hdf5 source if it not finds data if cfg['in'].get('time_last'): cfg['out']['b_remove_duplicates'] = True h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages, tables=cfg['out'].get('tables_have_wrote', set())) except Ex_nothing_done: print('ok')
nodes = parent_group.__members__ childs = [f'/{tbl}/{g}' for g in nodes if (g != 'table') and (g != '_i_table')] if childs: print('found {} childs of {}. Copying...'.format(len(nodes), tbl)) for child in childs: sr._handle.copy_node(child, newparent=sw.get_storer(tbl).group, recursive=True, overwrite=True) sw.flush() # .flush(fsync=True sleep(8) # write tables back with sorted index store_in, store_out = store_out, store_in h5move_tables({ 'db_path_temp': store_in, 'db_path': store_out, 'tables': [tbl], 'tables_log': [], 'addargs': ['--checkCSI'] }, col_sort='Time' ) #'navigation/logFiles' will be copied as child store_out = str(Path(store_out).with_name('sort_man.h5')) with pd.HDFStore(store_in, 'r') as sr, pd.HDFStore(store_out, 'w') as sw: df = sr[tbl] df.sort_index().to_hdf(sw, tbl, format='table', data_columns=True, append=False, index=False) sw.create_table_index(tbl, columns=['index'], kind='full') store_in, store_out = store_out, str(Path(store_out).with_name('sort_man_ptp.h5')) h5move_tables({ 'db_path_temp': store_in,
def main(new_arg=None, **kwargs): """ :param new_arg: list of strings, command line arguments :kwargs: dicts for each section: to overwrite values in them (overwrites even high priority values, other values remains) Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: 'csv2h5_nav_supervisor.ini' 'csv2h5_IdrRedas.ini' 'csv2h5_Idronaut.ini' :return: """ global l cfg = cfg_from_args(my_argparser(), new_arg, **kwargs) if not cfg or not cfg['program'].get('return'): print('Can not initialise') return cfg elif cfg['program']['return'] == '<cfg_from_args>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) print('\n' + this_prog_basename(__file__), end=' started. ') try: cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names(**cfg['in'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message) return () bOld_FF00FF = False # if 'TermGrunt' in sys.argv[1] FF00FF' in str(cfg['in']['path']): # 'TermGrunt.h5' ? args.path.endswith ('bin'): # bOld_FF00FF = True # cfg['in'].update({ # 'header': 'TERM', # 'dt_from_utc': timedelta(hours=-1), # 'fs': 1, 'b_time_fromtimestamp': True, # 'b_time_fromtimestamp_source': False}) # else: # 'Katran.h5' # cfg['in'].update({ # 'delimiter_hex': '000000E6', # 'header': 'P, Temp, Cond', # 'dt_from_utc': timedelta(hours=0), # 'fs': 10, 'b_time_fromtimestamp': False, # 'b_time_fromtimestamp_source': False}) set_field_if_no( cfg['in'], 'dtype', 'uint{:d}'.format(2**(3 + np.searchsorted( 2**np.array([3, 4, 5, 6, 7]) > np.array( 8 * (cfg['in']['data_word_len'] - 1)), 1)))) # Prepare cpecific format loading and writing set_field_if_no(cfg['in'], 'coltime', []) cfg['in'] = init_input_cols(cfg['in']) cfg['out']['names'] = np.array(cfg['in']['dtype'].names)[ \ cfg['in']['cols_loaded_save_b']] cfg['out']['formats'] = [ cfg['in']['dtype'].fields[n][0] for n in cfg['out']['names'] ] cfg['out']['dtype'] = np.dtype({ 'formats': cfg['out']['formats'], 'names': cfg['out']['names'] }) h5init(cfg['in'], cfg['out']) # cfg['Period'] = 1.0 / cfg['in']['fs'] # instead Second can use Milli / Micro / Nano: # cfg['pdPeriod'] = pd.to_timedelta(cfg['Period'], 's') # #pd.datetools.Second(cfg['Period'])\ # if 1 % cfg['in']['fs'] == 0 else\ # pd.datetools.Nano(cfg['Period'] * 1e9) # log table of loaded files. columns: Start time, file name, and its index in array off all loaded data: log_item = cfg['out']['log'] = { } # fields will have: 'fileName': None, 'fileChangeTime': None, 'rows': 0 strLog = '' # from collections import namedtuple # type_log_files = namedtuple('type_log_files', ['label','iStart']) # log.sort(axis=0, order='log_item['Date0']')#sort files by time dfLogOld, cfg['out']['db'], cfg['out'][ 'b_skip_if_up_to_date'] = h5temp_open(**cfg['out']) if 'log' in cfg['program'].keys(): f = open(PurePath(sys_argv[0]).parent / cfg['program']['log'], 'a', encoding='cp1251') f.writelines( datetime.now().strftime('\n\n%d.%m.%Y %H:%M:%S> processed ' + str(cfg['in']['nfiles']) + ' file' + 's:' if cfg['in']['nfiles'] > 1 else ':')) b_remove_duplicates = False # normally no duplicates but will if detect # Config specially for readBinFramed set_field_if_no(cfg['in'], 'b_byte_order_is_big_endian', True) set_field_if_no(cfg['in'], 'b_baklan', False) set_field_if_no(cfg['in'], 'b_time_fromtimestamp_source', False) cfg['out']['fs'] = cfg['in']['fs'] if True: ## Main circle ############################################################ for i1_file, path_in in h5_dispenser_and_names_gen( cfg['in'], cfg['out']): l.info('{}. {}: '.format(i1_file, path_in.name)) # Loading data if bOld_FF00FF: V = readFF00FF(path_in, cfg) iFrame = np.arange(len(V)) else: V, iFrame = readBinFramed(path_in, cfg['in']) if ('b_time_fromtimestamp' in cfg['in'] and cfg['in']['b_time_fromtimestamp']) or \ ('b_time_fromtimestamp_source' in cfg['in'] and cfg['in']['b_time_fromtimestamp_source']): path_in_rec = os_path.join( 'd:\\workData\\_source\\BalticSea\\151021_T1Grunt_Pregol\\_source\\not_corrected', os_path.basename(path_in)[:-3] + 'txt' ) if cfg['in']['b_time_fromtimestamp_source'] else path_in log_item['Date0'] = datetime.fromtimestamp( os_path.getmtime(path_in_rec)) # getctime is bad log_item['Date0'] -= iFrame[-1] * timedelta( seconds=1 / cfg['in']['fs'] ) # use for computer filestamp at end of recording else: log_item['Date0'] = datetime.strptime( path_in.stem, cfg['in']['filename2timestart_format']) log_item['Date0'] += cfg['in']['dt_from_utc'] tim = log_item['Date0'] + iFrame * timedelta( seconds=1 / cfg['in']['fs'] ) # tim = pd.date_range(log_item['Date0'], periods=np.size(V, 0), freq=cfg['pdPeriod']) df = pd.DataFrame( V.view(dtype=cfg['out']['dtype']), # np.uint16 columns=cfg['out']['names'], index=tim) # pd.DataFrame(V, columns=cfg['out']['names'], dtype=cfg['out']['formats'], index=tim) if df.empty: # log['rows']==0 print('No data => skip file') continue df, tim = set_filterGlobal_minmax(df, cfg_filter=cfg['filter'], log=log_item, dict_to_save_last_time=cfg['in']) if log_item['rows_filtered']: print('filtered out {}, remains {}'.format( log_item['rows_filtered'], log_item['rows'])) if not log_item['rows']: l.warning('no data! => skip file') continue elif log_item['rows']: print( '.', end='' ) # , divisions=d.divisions), divisions=pd.date_range(tim[0], tim[-1], freq='1D') else: l.warning('no data! => skip file') continue # Append to Store h5_append(cfg['out'], df.astype('int32'), log_item) if 'txt' in cfg['program'].keys(): # can be saved as text too np.savetxt(cfg['program']['txt'], V, delimiter='\t', newline='\n', header=cfg['in']['header'] + log_item['fileName'], fmt='%d', comments='') try: if b_remove_duplicates: for tblName in (cfg['out']['table'] + cfg['out']['tableLog_names']): cfg['out']['db'][tblName].drop_duplicates( keep='last', inplace=True) # subset='fileName',? if len(strLog): print('Create index', end=', ') for tblName in (cfg['out']['table'] + cfg['out']['tableLog_names']): cfg['out']['db'].create_table_index(tblName, columns=['index'], kind='full') else: print('done nothing') except Exception as e: l.exception('The end. There are error ') import traceback, code from sys import exc_info as sys_exc_info tb = sys_exc_info()[2] # type, value, traceback.print_exc() last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb frame = last_frame().tb_frame ns = dict(frame.f_globals) ns.update(frame.f_locals) code.interact(local=ns) # sort index if have any processed data (needed because ``ptprepack`` not closses hdf5 source if it not finds data) if cfg['in'].get('time_last'): failed_storages = h5move_tables(cfg['out']) print('Ok.', end=' ') h5index_sort( cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages)
def main(config: ConfigType) -> None: # with pd.HDFStore(cfg_in['db_path'], mode='r') as store: df = store[cfg_in['table']][cfg_in['min_date']:cfg_in['max_date']] k = store.get_node(f"{cfg_in['table']}/coef")[cfg_in['col']].read() n_rows_before = df.shape[0] lf.info('Loaded data {0[0]} - {0[1]}: {1} rows. Filtering {2[col_out]}...', df.index[[0, -1]], n_rows_before, config) # print(f"Loaded data {df.index[0]} - {df.index[-1]}: {n_rows_before} rows. Filtering {cfg_in['col']}...") p_name = config['col_out'] df[p_name] = np.polyval(k, df[cfg_in['col']]) # Battery compensation kBat = [1.7314032932363, -11.9301097967443] df[p_name] -= np.polyval(kBat, df['Battery']) MIN_P = 6 # P filtered below: to not delete spikes that may be used to find other spikes using ~constant period if config['cols_order']: df = df.loc[:, config['cols_order']] else: df.drop(cfg_in['col'], axis='columns', inplace=True) i_burst, mean_burst_size, max_hole = i_bursts_starts(df.index) i_col = df.columns.get_loc(p_name) if cfg_in['b_show']: from matplotlib import pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.grid(True, alpha=.85, color='white', axis='y', linestyle='-') fig.subplots_adjust(top=.89) fig.show() else: ax = None # 'db': store cfg_out = { 'table': cfg_in['table'], 'table_log': f"{cfg_in['table']}/logFiles", 'log': {}, 'db_path': Path(config['db_path']) if 'db_path' in config else (cfg_in['db_path'].with_name(f"{cfg_in['db_path'].stem}_filt_s.h5")) } def h5_names_gen(cfg_in, cfg_out: Mapping[str, Any], **kwargs) -> Iterator[None]: #cfg_out['log']['fileName'] = pname.name[-cfg_out['logfield_fileName_len']:-4] cfg_out['log']['fileChangeTime'] = datetime.fromtimestamp( cfg_in['db_path'].stat().st_mtime) yield None h5init(cfg_in, cfg_out) # cfg_in for full path if cfg_out['db_path'] only name n_rows_after = 0 #with pd.HDFStore(out_path) as store: #, mode='w' for _, _ in h5_dispenser_and_names_gen( cfg_in, cfg_out, h5_names_gen): # handles temporary db for h5_append() try: if h5remove_table(cfg_out['db'], cfg_in['table']): lf.info('previous table removed') except Exception as e: # no such table? pass for st, en in pairwise(i_burst): cfg_out['log']['fileName'] = str(st) sl = slice(st, en) ind_ok = filter_periodic_spike(df.iloc[sl, i_col], ax=ax) # Filtering bad_p = df.loc[ind_ok, p_name] < MIN_P n_bad = bad_p.sum() if n_bad: lf.info('filtering {} > {}: deleting {} values in frame {}', p_name, MIN_P, n_bad, pattern_log_dt.format(*df.index[[0, -1]])) ind_ok = ind_ok[~bad_p] if not ind_ok.size: continue # df.loc[bad_p, p_name] = np.NaN # save result h5_append(cfg_out, df.loc[ind_ok], cfg_out['log']) n_rows_after += ind_ok.size # Temporary db to compressed db with pandas index if n_rows_after: # check needed because ``ptprepack`` in h5index_sort() not closes hdf5 source if it not finds data failed_storages = h5move_tables(cfg_out) print('Ok.', end=' ') h5index_sort(cfg_out, out_storage_name=f"{cfg_out['db_path'].stem}-resorted.h5", in_storages=failed_storages) lf.info( f'Removed {n_rows_before - n_rows_after} rows. Saved {n_rows_after} rows to {cfg_out["db_path"]}...' )
with pd.HDFStore(path_db.with_name('_not_sorted.h5')) as store_tmp: try: del store_tmp[tbl_log] except KeyError: pass df_log.to_hdf(store_tmp, tbl_log, append=True, data_columns=True, format='table', dropna=True, index=False) h5move_tables({ 'db_path_temp': path_db.with_name('_not_sorted.h5'), 'db_path': path_db, 'tables': [tbl_log], 'tables_log': [], 'addargs': ['--checkCSI', '--verbose'] }) # Now run step 30 with veuszPropagate seting: '--b_update_existed', 'False' to save only modified vsz/images. After that delete old vsz and its images if False: #st(40) # may not comment always because can not delete same time more than once # Deletng bad runs from DB: import pandas as pd # find bad runs that have time: time_in_bad_run_any = ['2018-10-16T19:35:41+00:00'] tbl = f'/{device}' tbl_log = tbl + '/logRuns' print('Deletng bad runs from DB: tables: {}, {} run with time {}'.format(
sw[child_w] = sr[child_r] tables_have_wrote_cur.append(child_w) else: print('found {} cr of {}. Copying...'.format(len(nodes_cr), tbl)) for child_r in cr: st._handle.copy_node(child_r, newparent=sw.get_storer(tbl).group, recursive=True, overwrite=True) tables_have_wrote.add(tuple(tables_have_wrote_cur)) st.flush() # .flush(fsync=True sleep(8) # write tables with sorted index try: failed_storages = h5move_tables({ 'db_path_temp': store_out_temp, 'db_path': store_out, 'addargs': ['--overwrite'] # '--checkCSI' }, tbl_names=tables_have_wrote, # col_sort='Time' # must exist ) # 'navigation/logFiles' will be copied as child except Ex_nothing_done as e: print('Tables not moved') except RuntimeError: # not captured raise if False: store_out = str(Path(store_out).with_name('sort_man.h5')) with pd.HDFStore(store_in, 'r') as sr, pd.HDFStore(store_out, 'w') as sw: df = sr[tbl] df.sort_index().to_hdf(sw, tbl, format='table', data_columns=True, append=False, index=False) sw.create_table_index(tbl, columns=['index'], kind='full')
# Append to Store if df.empty: # log['rows']==0 print('No data => skip file') h5_append(cfg['out'], df, log) b_appended = True except Exception as e: b_appended = False finally: store.close() if b_appended: if store.is_open: print('Wait store is closing...') # from time import sleep # sleep(2) failed_storages = h5move_tables(cfg['out']) print('Ok.', end=' ') h5index_sort(cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5", in_storages=failed_storages) # @+node:korzh.20180520131532.4: ** garbage # def main_gabage(): # print('\n' + this_prog_basename(__file__), end=' started. ') # try: # cfg['in']= init_file_names(cfg['in']) # except Ex_nothing_done as e: # print(e.message) # return() # # fGi = lambda Ax,Ay,Az,Ag,Cg,i: np.dot(Ag.T, (np.column_stack((Ax, Ay, Az))[ # slice(*i)] - Cg[0,:]).T)