def main(config: ConfigType) -> None: """ ---------------------------- Calculates coefficients from data of Pandas HDF5 store*.h5 and saves them back ---------------------------- 1. Obtains command line arguments (for description see my_argparser()) that can be passed from new_arg and ini.file also. 2. Loads device data of calibration in laboratory from hdf5 database (cfg['in']['db_path']) 2. Calibrates configured by cfg['in']['channels'] channels ('accelerometer' and/or 'magnetometer'): soft iron 3. Wrong implementation - not use cfg['in']['timerange_nord']! todo: Rotate compass using cfg['in']['timerange_nord'] :param config: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: """ global cfg, l cfg = main_init(config, cs_store_name, __file__=None) cfg = main_init_input_file(cfg, cs_store_name) # input data tables may be defined by 'probes_prefix' and 'probes' fields of cfg['in'] if cfg['in']['probes'] or not len(cfg['in']['tables']): if cfg['in']['probes']: cfg['in']['tables'] = [ f"{cfg['in']['probes_prefix']}{probe:0>2}" for probe in cfg['in']['probes'] ] elif cfg['in']['probes_prefix']: cfg['in']['tables'] = [f"{cfg['in']['probes_prefix']}.*"] # else: # default config # cfg['in']['tables'] = ['.*'] #h5init(cfg['in'], cfg['out']) #cfg['out']['dt_from_utc'] = 0 # cfg = cfg_from_args(my_argparser(), new_arg) lf.info("{:s}({:s}) for channels: {} started. ", this_prog_basename(__file__), ', '.join(cfg['in']['tables']), cfg['in']['channels']) fig = None fig_filt = None fig_save_dir_path = cfg['in']['db_path'].parent with pd.HDFStore(cfg['in']['db_path'], mode='r') as store: if len(cfg['in']['tables']) == 1: cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0]) coefs = {} for itbl, tbl in enumerate(cfg['in']['tables'], start=1): probe_number = int(re.findall('\d+', tbl)[0]) lf.info(f'{itbl}. {tbl}: ') if isinstance(cfg['in']['timerange'], Mapping): # individual interval for each table if probe_number in cfg['in']['timerange']: timerange = cfg['in']['timerange'][probe_number] else: timerange = None else: timerange = cfg['in'][ 'timerange'] # same interval for each table a = load_hdf5_data(store, table=tbl, t_intervals=timerange) # iUseTime = np.searchsorted(stime, [np.array(s, 'datetime64[s]') for s in np.array(strTimeUse)]) # Calibrate channels of 'accelerometer' or/and 'magnetometer' coefs[tbl] = {} for channel in cfg['in']['channels']: print(f' channel "{channel}"', end=' ') (col_str, coef_str) = channel_cols(channel) # filtering # col_str == 'A'? if True: b_ok = np.zeros(a.shape[0], bool) for component in ['x', 'y', 'z']: b_ok |= is_works( a[col_str + component], noise=cfg['filter']['no_works_noise'][channel]) lf.info('Filtered not working area: {:2.1f}%', (b_ok.size - b_ok.sum()) * 100 / b_ok.size) # vec3d = np.column_stack( # (a[col_str + 'x'], a[col_str + 'y'], a[col_str + 'z']))[:, b_ok].T # [slice(*iUseTime.flat)] vec3d = a.loc[ b_ok, [col_str + 'x', col_str + 'y', col_str + 'z']].to_numpy(float).T index = a.index[b_ok] vec3d, b_ok, fig_filt = filter_channes( vec3d, index, fig_filt, fig_save_prefix= f"{fig_save_dir_path / tbl}-'{channel}'", blocks=cfg['filter']['blocks'], offsets=cfg['filter']['offsets'], std_smooth_sigma=cfg['filter']['std_smooth_sigma']) A, b = calibrate(vec3d) window_title = f"{tbl} '{channel}' channel ellipse" fig = calibrate_plot(vec3d, A, b, fig, window_title=window_title) fig.savefig(fig_save_dir_path / (window_title + '.png'), dpi=300, bbox_inches="tight") A_str, b_str = coef2str(A, b) lf.info( 'Calibration coefficients calculated: \nA = \n{:s}\nb = \n{:s}', A_str, b_str) coefs[tbl][channel] = {'A': A, 'b': b} # Zeroing Nord direction timerange_nord = cfg['in']['timerange_nord'] if isinstance(timerange_nord, Mapping): timerange_nord = timerange_nord.get(probe_number) if timerange_nord: coefs[tbl]['M']['azimuth_shift_deg'] = zeroing_azimuth( store, tbl, timerange_nord, calc_vel_flat_coef(coefs[tbl]), cfg['in']) else: lf.info('not zeroing North') # Write coefs to each of output tables named same as input for cfg_output in (['in', 'out'] if cfg['out'].get('db_path') else ['in']): lf.info('Writing to {}', cfg[cfg_output]['db_path']) for itbl, tbl in enumerate(cfg['in']['tables'], start=1): # i_search = re.search('\d*$', tbl) # for channel in cfg['in']['channels']: # (col_str, coef_str) = channel_cols(channel) # dict_matrices = {f'//coef//{coef_str}//A': coefs[tbl][channel]['A'], # f'//coef//{coef_str}//C': coefs[tbl][channel]['b'], # } # if channel == 'M': # if coefs[tbl]['M'].get('azimuth_shift_deg'): # dict_matrices[f'//coef//{coef_str}//azimuth_shift_deg'] = coefs[tbl]['M']['azimuth_shift_deg'] # # Coping probe number to coefficient to can manually check when copy manually # if i_search: # try: # dict_matrices['//coef//i'] = int(i_search.group(0)) # except Exception as e: # pass dict_matrices = dict_matrices_for_h5(coefs[tbl], tbl, cfg['in']['channels']) h5copy_coef(None, cfg[cfg_output]['db_path'], tbl, dict_matrices=dict_matrices) print('Ok>', end=' ')
'--fs_float', f'{fs(probe, in_file.stem)}', # '--dt_from_utc_seconds', "{}".format(int((np.datetime64('00', 'Y') - np.datetime64(dt_from_utc[probe] # # ['19-06-24T10:19:00', '19-06-24T10:21:30'][i_proc_probe] # ))/np.timedelta64(1,'s'))) ] + (['--csv_specific_param_dict', 'invert_magnitometr: True'] if prefix == 'incl' else ['--cols_load_list', "yyyy,mm,dd,HH,MM,SS,P,U"])) # Get coefs: db_coefs = r'd:\WorkData\~configuration~\inclinometr\190710incl.h5' try: tbl = f'{prefix}{probe:0>2}' l.info( f"Adding coefficients to {db_path}/{tbl} from {db_coefs}") h5copy_coef(db_coefs, db_path, tbl) except KeyError as e: # Unable to open object (component not found) l.warning('Coef is not copied!') # todo write some dummy coefficients to can load Veusz patterns i_proc_file += 1 else: print(probe, end=': no, ') i_proc_probe += 1 print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.') # Calculate velocity and average if st(2): # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5 for aggregate_period_s in [ None, 2, 600, 3600 if 'w' in prefix else 7200 ]: # 2,, 7200 # 300, 600, [None], [None, 2, 600, 3600 if 'w' in prefix else 7200], [3600]
'--pattern_path', str(vsz_path), '--widget', '/fitV(incl)/grid1/graph/fit_t/values', # '/fitV(force)/grid1/graph/fit1/values', '--data_for_coef', 'max_incl_of_fit_t', '--out.path', str(db_path_tank), '--re_tbl_from_vsz_name', '\D*\d*', '--channels_list', 'M,A', '--b_update_existed', 'True', # to not skip. '--export_pages_int_list', '', #4 0 = all '--b_interact', 'False', '--b_execute_vsz', 'True', '--return', '<embedded_object>', # reuse to not bloat memory ], veusze=vsz_data['veusze']) if vsz_data is not None: # if step == 3: # to 1st db too # l = init_logging(logging, None) print(f"Adding coefficients to {db_path_calibr_scalling}/{tbl} from {db_path_tank}") h5copy_coef(db_path_tank, db_path_calibr_scalling, tbl, ok_to_replace_group=True) vsz_data['veusze'].Close() try: vsz_data['veusze'].WaitForClose() except AttributeError: # already 'NoneType' => closed ok pass else: vsz_data = {'veusze': None}
def main(new_arg=None, **kwargs): """ :param new_arg: list of strings, command line arguments :kwargs: dicts of dictcts (for each ini section): specified values overwrites ini values """ # global l cfg = cfg_from_args(my_argparser(), new_arg, **kwargs) cfg['in']['db_coefs'] = Path(cfg['in']['db_coefs']) for path_field in ['db_coefs', 'path_cruise']: if not cfg['in'][path_field].is_absolute(): cfg['in'][path_field] = ( cfg['in']['cfgFile'].parent / cfg['in'][path_field] ).resolve().absolute() # cfg['in']['cfgFile'].parent / def constant_factory(val): def default_val(): return val return default_val for lim in ('min_date', 'max_date'): cfg['filter'][lim] = defaultdict( constant_factory(cfg['filter'][lim].get( '0', cfg['filter'][lim].get(0))), cfg['filter'][lim]) l = init_logging(logging, None, None, 'INFO') #l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) if True: # False. Experimental speedup but takes memory from dask.cache import Cache cache = Cache(2e9) # Leverage two gigabytes of memory cache.register() # Turn cache on globally if cfg['program']['dask_scheduler']: if cfg['program']['dask_scheduler'] == 'distributed': from dask.distributed import Client client = Client( processes=False ) # navigate to http://localhost:8787/status to see the diagnostic dashboard if you have Bokeh installed # processes=False: avoide inter-worker communication for computations releases the GIL (numpy, da.array) # without is error else: if cfg['program']['dask_scheduler'] == 'synchronous': l.warning('using "synchronous" scheduler for debugging') import dask dask.config.set(scheduler=cfg['program']['dask_scheduler']) # Run steps : st.start = cfg['program']['step_start'] st.end = cfg['program']['step_end'] st.go = True if not cfg['out'][ 'db_name']: # set name by 'path_cruise' name or parent if it has digits at start. priority for name is "*inclinometer*" for p in (lambda p: [p, p.parent])(cfg['in']['path_cruise']): m = re.match('(^[\d_]*).*', p.name) if m: break cfg['out']['db_name'] = f"{m.group(1).strip('_')}incl.h5" cfg['in']['path_cruise'].glob('*inclinometer*') dir_incl = next((d for d in cfg['in']['path_cruise'].glob('*inclinometer*') if d.is_dir()), cfg['in']['path_cruise']) db_path = dir_incl / cfg['out']['db_name'] # --------------------------------------------------------------------------------------------- def fs(probe, name): return 5 # if 'w' in name.lower(): # Baranov's wavegauge electronic # return 5 # 10 # if probe < 20 or probe in [23, 29, 30, 32, 33]: # 30 [4, 11, 5, 12] + [1, 7, 13, 30] # return 5 # if probe in [21, 25, 26] + list(range(28, 35)): # return 8.2 # return 4.8 def datetime64_str(time_str: Optional[str] = None) -> np.ndarray: """ Reformat time_str to ISO 8601 or to 'NaT'. Used here for input in funcs that converts str to numpy.datetime64 :param time_str: May be 'NaT' :return: ndarray of strings (tested for 1 element only) formatted by numpy. """ return np.datetime_as_string(np.datetime64(time_str, 's')) probes = cfg['in']['probes'] or range( 1, 41) # sets default range, specify your values before line --- raw_root, subs_made = re.subn('INCL_?', 'INKL_', cfg['in']['probes_prefix'].upper()) if st( 1 ): # Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need) i_proc_probe = 0 # counter of processed probes i_proc_file = 0 # counter of processed files # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]': raw_parent = dir_incl / '_raw' dir_out = raw_parent / re.sub( r'[.\\/ ]', '_', cfg['in']['raw_subdir'] ) # sub replaces multilevel subdirs to 1 level that correct_fun() can only make raw_parent /= cfg['in']['raw_subdir'] for probe in probes: raw_found = [] raw_pattern_file = cfg['in']['raw_pattern'].format(prefix=raw_root, number=probe) correct_fun = partial( correct_kondrashov_txt if subs_made else correct_baranov_txt, dir_out=dir_out) # if not archive: if (not '.zip' in cfg['in']['raw_subdir'].lower() and not '.rar' in cfg['in']['raw_subdir'].lower()) or raw_parent.is_dir(): raw_found = list(raw_parent.glob(raw_pattern_file)) if not raw_found: # Check if already have corrected files for probe generated by correct_kondrashov_txt(). If so then just use them raw_found = list( raw_parent.glob( f"{cfg['in']['probes_prefix']}{probe:0>2}.txt")) if raw_found: print('corrected csv file', [r.name for r in raw_found], 'found') correct_fun = lambda x: x elif not cfg['in']['raw_subdir']: continue for file_in in (raw_found or open_csv_or_archive_of_them( raw_parent, binary_mode=False, pattern=raw_pattern_file)): file_in = correct_fun(file_in) if not file_in: continue tbl = f"{cfg['in']['probes_prefix']}{probe:0>2}" # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w', # correct name # re.sub('^[\d_]*|\*', '', file_in.stem).lower()), # remove date-prefix if in name csv2h5( [ str( Path(__file__).parent / 'ini' / f"csv_inclin_{'Kondrashov' if subs_made else 'Baranov'}.ini" ), '--path', str(file_in), '--blocksize_int', '50_000_000', # 50Mbt '--table', tbl, '--db_path', str(db_path), # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'), # '--b_raise_on_err', '0', # ? '--b_interact', '0', '--fs_float', f'{fs(probe, file_in.stem)}', '--dt_from_utc_seconds', str(cfg['in']['dt_from_utc'].total_seconds()), '--b_del_temp_db', '1', ] + (['--csv_specific_param_dict', 'invert_magnitometr: True'] if subs_made else ['--cols_load_list', "yyyy,mm,dd,HH,MM,SS,P,U"]), **{ 'filter': { 'min_date': cfg['filter']['min_date'][probe], 'max_date': cfg['filter']['max_date'][probe], } }) # Get coefs: l.info( f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}" ) try: h5copy_coef(cfg['in']['db_coefs'], db_path, tbl) except KeyError as e: # Unable to open object (component not found) l.warning( 'No coefs to copy?' ) # write some dummy coefficients to can load Veusz patterns: h5copy_coef(None, db_path, tbl, dict_matrices=dict_matrices_for_h5(tbl=tbl)) except OSError as e: l.warning( 'Not found DB with coefs?' ) # write some dummy coefficients to can load Veusz patterns: h5copy_coef(None, db_path, tbl, dict_matrices=dict_matrices_for_h5(tbl=tbl)) i_proc_file += 1 else: print('no', raw_pattern_file, end=', ') i_proc_probe += 1 print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.') # Calculate velocity and average if st(2): # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5 if not cfg['out']['aggregate_period_s']: cfg['out']['aggregate_period_s'] = [ None, 2, 600, 3600 if 'w' in cfg['in']['probes_prefix'] else 7200 ] if cfg['in']['azimuth_add']: if 'Lat' in cfg['in']['azimuth_add']: from datetime import datetime # add magnetic declination,° for used coordinates # todo: get time azimuth_add = mag_dec(cfg['in']['azimuth_add']['Lat'], cfg['in']['azimuth_add']['Lon'], datetime(2020, 9, 10), depth=-1) else: azimuth_add = 0 if 'constant' in cfg['in']['azimuth_add']: # and add constant. For example, subtruct declination at the calibration place if it was applied azimuth_add += cfg['in']['azimuth_add'][ 'constant'] # add -6.65644183° to account for calibration in Kaliningrad for aggregate_period_s in cfg['out']['aggregate_period_s']: if aggregate_period_s is None: db_path_in = db_path db_path_out = db_path.with_name( f'{db_path.stem}_proc_noAvg.h5') else: db_path_in = db_path.with_name(f'{db_path.stem}_proc_noAvg.h5') db_path_out = f'{db_path.stem}_proc.h5' # or separately: '_proc{aggregate_period_s}.h5' args = [ Path(incl_h5clc.__file__).with_name( f'incl_h5clc_{db_path.stem}.yaml'), # if no such file all settings are here '--db_path', str(db_path_in), # ! 'incl.*|w\d*' inclinometers or wavegauges w\d\d # 'incl09': '--tables_list', 'incl.*' if not cfg['in']['probes'] else f"incl.*(?:{'|'.join('{:0>2}'.format(p) for p in cfg['in']['probes'])})", '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '', '--out.db_path', str(db_path_out), '--table', f'V_incl_bin{aggregate_period_s}' if aggregate_period_s else 'V_incl', '--verbose', 'INFO', #'DEBUG' get many numba messages '--b_del_temp_db', '1', # '--calc_version', 'polynom(force)', # depreshiated # '--chunksize', '20000', # '--not_joined_h5_path', f'{db_path.stem}_proc.h5', ] # if aggregate_period_s <= 5: # [s], do not need split csv for big average interval # args += (['--split_period', '1D']) if aggregate_period_s is None: # proc. parameters (if we have saved proc. data then when aggregating we are not processing) args += ([ '--max_dict', 'M[xyz]:4096', # Note: for Baranov's prog 4096 is not suited # '--timerange_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n," # not works - use kwarg # '--timerange_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00' '--split_period', '1D' ] if subs_made else [ '--bad_p_at_bursts_starts_peroiod', '1H', ]) # csv splitted by 1day (default for no avg) and monolith csv if aggregate_period_s==600 if aggregate_period_s not in cfg['out'][ 'aggregate_period_s_not_to_text']: # , 300, 600]: args += ['--text_path', str(db_path.parent / 'text_output')] kwarg = { 'in': { 'min_date': cfg['filter']['min_date'][0], 'max_date': cfg['filter']['max_date'][0], 'timerange_zeroing': cfg['in']['timerange_zeroing'], 'azimuth_add': azimuth_add } } # If need all data to be combined one after one: # set_field_if_no(kwarg, 'in', {}) # kwarg['in'].update({ # # 'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0], # 'dates_min': min_date.values(), # in table list order # 'dates_max': max_date.values(), # # }) # set_field_if_no(kwarg, 'out', {}) # kwarg['out'].update({'b_all_to_one_col': 'True'}) incl_h5clc.main(args, **kwarg) # Calculate spectrograms. if st(3): # Can be done at any time after step 1 def raise_ni(): raise NotImplementedError( 'Can not proc probes having different fs in one run: you need to do it separately' ) args = [ Path(incl_h5clc.__file__).with_name( f'incl_h5spectrum{db_path.stem}.yaml'), # if no such file all settings are here '--db_path', str(db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')), '--tables_list', f"{cfg['in']['probes_prefix']}.*", # inclinometers or wavegauges w\d\d ## 'w02', 'incl.*', # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '', '--min_date', datetime64_str(cfg['filter']['min_date'][0]), '--max_date', datetime64_str(cfg['filter']['max_date'] [0]), # '2019-09-09T16:31:00', #17:00:00 # '--max_dict', 'M[xyz]:4096', # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity '--out.db_path', f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5", # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd', '--fs_float', f"{fs(probes[0], cfg['in']['probes_prefix'])}", # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni() # # '--timerange_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00' # '--verbose', 'DEBUG', # '--chunksize', '20000', '--b_interact', '0', ] if 'w' in cfg['in']['probes_prefix']: args += [ '--split_period', '1H', '--dt_interval_minutes', '10', # burst mode '--fmin', '0.0001', '--fmax', '4' ] else: args += [ '--split_period', '2H', '--fmin', '0.0004', #0.0004 '--fmax', '1.05' ] incl_h5spectrum.main(args) # Draw in Veusz if st(4): b_images_only = True # False pattern_path = db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz' # r'vsz_5min\191126_0000_5m_w02.vsz' if not b_images_only: pattern_bytes_slice_old = re.escape(b'((5828756, 5830223, None),)') # Length of not adjacent intervals, s (set None to not allow) period = '1D' length = '5m' # period # '1D' dt_custom_s = pd_period_to_timedelta( length) if length != period else None # None # 60 * 5 if True: # Load starts and assign ends t_intervals_start = pd.read_csv( cfg['in']['path_cruise'] / r'vsz+h5_proc\intervals_selected.txt', converters={ 'time_start': lambda x: np.datetime64(x, 'ns') }, index_col=0).index edges = (pd.DatetimeIndex(t_intervals_start), pd.DatetimeIndex(t_intervals_start + dt_custom_s) ) # np.zeros_like() else: # Generate periodic intervals t_interval_start, t_intervals_end = intervals_from_period( datetime_range=np.array( [ cfg['filter']['min_date']['0'], cfg['filter']['max_date']['0'] ], # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'], # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'], # ['2018-11-16T15:19', '2018-12-14T14:35'], # ['2018-10-22T12:30', '2018-10-27T06:30:00'], 'datetime64[s]'), period=period) edges = (pd.DatetimeIndex([t_interval_start ]).append(t_intervals_end[:-1]), pd.DatetimeIndex(t_intervals_end)) for i, probe in enumerate(probes): probe_name = f"{cfg['in']['probes_prefix']}{probe:02}" # table name in db l.info('Draw %s in Veusz: %d intervals...', probe_name, edges[0].size) # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1): cfg_vp = {'veusze': None} for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(*edges), start=1): # if i_interval < 23: #<= 0: # TEMPORARY Skip this number of intervals # continue if period != length: t_interval_start = t_interval_end - pd.Timedelta( dt_custom_s, 's') try: # skipping absent probes start_end = h5q_interval2coord( db_path=str(db_path), table=f'/{probe_name}', t_interval=(t_interval_start, t_interval_end)) if not len(start_end): break # no data except KeyError: break # device name not in specified range, go to next name pattern_path_new = pattern_path.with_name( f"{t_interval_start:%y%m%d_%H%M}_{length}_{probe_name}.vsz" ) # Modify pattern file if not b_images_only: probe_name_old = re.match('.*((?:incl|w)\d*).*', pattern_path.name).groups()[0] bytes_slice = bytes( '(({:d}, {:d}, None),)'.format(*(start_end + np.int32([-1, 1]))), 'ascii') def f_replace(line): """ Replace in file 1. probe name 2. slice """ # if i_interval == 1: line, ok = re.subn(bytes(probe_name_old, 'ascii'), bytes(probe_name, 'ascii'), line) if ok: # can be only in same line line = re.sub(pattern_bytes_slice_old, bytes_slice, line) return line if not rep_in_file(pattern_path, pattern_path_new, f_replace=f_replace): l.warning('Veusz pattern not changed!') # break elif cfg_vp['veusze']: cfg_vp['veusze'].Load(str(pattern_path_new)) elif cfg_vp['veusze']: cfg_vp['veusze'].Load(str(pattern_path_new)) txt_time_range = \ """ "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \ """.format(t_interval_start, t_interval_end) print(f'{i_interval}. {txt_time_range}', end=' ') cfg_vp = veuszPropagate.main( [ Path(veuszPropagate.__file__).parent.with_name( 'veuszPropagate.ini'), # '--data_yield_prefix', '-', '--path', str( db_path ), # use for custom loading from db and some source is required '--tables_list', f'/{probe_name}', # 181022inclinometers/ \d* '--pattern_path', str(pattern_path_new), # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz', # str(db_path.parent / dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name # '--before_next', 'restore_config', # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{length}", '--filename_fun', f'lambda tbl: "{pattern_path_new.name}"', '--add_custom_list', 'USEtime', # nAveragePrefer', '--add_custom_expressions_list', txt_time_range, # + """ # ", 5" # """, '--b_update_existed', 'True', '--export_pages_int_list', '1, 2', # 0 for all '6, 7, 8', #'1, 2, 3' # '--export_dpi_int', '200', '--export_format', 'emf', '--b_interact', '0', '--b_images_only', f'{b_images_only}', '--return', '<embedded_object>', # reuse to not bloat memory ], veusze=cfg_vp['veusze'])
def main(config: ConfigType) -> None: """ ---------------------------- Save data to Pandas HDF5 store*.h5 ---------------------------- The store contains tables for each device and each device table contains log with metadata of recording sessions :param config: with fields: - in - mapping with fields: - tables_log: - log table name or pattern str for it: in pattern '{}' will be replaced by data table name - cols_good_data: - ['dt_from_utc', 'db', 'db_path', 'table_nav'] - out - mapping with fields: - cols: can use i - data row number and i_log_row - log row number that is used to load data range - cols_log: can use i - log row number - text_date_format - file_name_fun, file_name_fun_log - {fun} part of "lambda rec_num, t_st, t_en: {fun}" string to compile function for name of data and log text files - sep """ global cfg cfg = to_vaex_hdf5.cfg_dataclasses.main_init(config, cs_store_name) cfg_in = cfg.pop('input') cfg_in['cfgFile'] = cs_store_name cfg['in'] = cfg_in # try: # cfg = to_vaex_hdf5.cfg_dataclasses.main_init_input_file(cfg, cs_store_name, ) # except Ex_nothing_done: # pass # existed db is not mandatory device_path, cfg['out']['db_path'] = device_in_out_paths( db_path=cfg['out'].get('db_path'), path_cruise=cfg['in']['path_cruise'], device_short_name=cfg['in']['probes_prefix'], device_dir_pattern='*inclinometer*') out = cfg['out'] # h5init(cfg['in'], out) probes = cfg['in']['probes'] or range( 1, 41) # sets default range, specify your values before line --- raw_root, probe_is_incl = re.subn('INCL_?', 'INKL_', cfg['in']['probes_prefix'].upper()) # some parameters that depends of probe type (indicated by probes_prefix) p_type = defaultdict( # baranov's format constant_factory({ 'correct_fun': partial(correct_txt, mod_file_name=mod_incl_name, sub_str_list=[ b'^\r?(?P<use>20\d{2}(\t\d{1,2}){5}(\t\d{5}){8}).*', b'^.+' ]), 'fs': 10, 'format': 'Baranov', }), { 'incl': { 'correct_fun': partial( correct_txt, mod_file_name=mod_incl_name, sub_str_list=[ b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,6}){6}(,\d{1,2}\.\d{2})(,\-?\d{1,3}\.\d{2})).*', b'^.+' ]), 'fs': 5, 'format': 'Kondrashov', }, 'voln': { 'correct_fun': partial( correct_txt, mod_file_name=mod_incl_name, sub_str_list=[ b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,8})(,\-?\d{1,2}\.\d{2}){2}).*', b'^.+' ]), 'fs': 5, #'tbl_prefix': 'w', 'format': 'Kondrashov', } }) if st(1, 'Save inclinometer or wavegage data from ASCII to HDF5'): # Note: Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need) i_proc_probe = 0 # counter of processed probes i_proc_file = 0 # counter of processed files # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]': raw_parent = dir_incl / '_raw' # raw_parent /= if cfg['in']['raw_subdir'] is None: cfg['in']['raw_subdir'] = '' dir_out = raw_parent / re.sub(r'[.\\/ *?]', '_', cfg['in']['raw_subdir']) # sub replaces multilevel subdirs to 1 level that correct_fun() can only make def dt_from_utc_2000(probe): """ Correct time of probes started without time setting. Raw date must start from 2000-01-01T00:00""" return ( datetime(year=2000, month=1, day=1) - cfg['in']['time_start_utc'][probe] ) if cfg['in']['time_start_utc'].get(probe) else timedelta(0) # convert cfg['in']['dt_from_utc'] keys to int cfg['in']['dt_from_utc'] = { int(p): v for p, v in cfg['in']['dt_from_utc'].items() } # convert cfg['in']['t_start_utc'] to cfg['in']['dt_from_utc'] and keys to int cfg['in']['dt_from_utc'].update( # overwriting the 'time_start_utc' where already exist {int(p): dt_from_utc_2000(p) for p, v in cfg['in']['time_start_utc'].items()} ) # make cfg['in']['dt_from_utc'][0] be default value cfg['in']['dt_from_utc'] = defaultdict( constant_factory(cfg['in']['dt_from_utc'].pop(0, timedelta(0))), cfg['in']['dt_from_utc']) for probe in probes: raw_found = [] raw_pattern_file = str( Path(glob.escape(cfg['in']['raw_subdir'])) / cfg['in']['raw_pattern'].format(prefix=raw_root, number=probe)) correct_fun = p_type[cfg['in']['probes_prefix']]['correct_fun'] # if not archive: if (not re.match(r'.*(\.zip|\.rar)$', cfg['in']['raw_subdir'], re.IGNORECASE)) and raw_parent.is_dir(): raw_found = list(raw_parent.glob(raw_pattern_file)) if not raw_found: # Check if already have corrected files for probe generated by correct_txt(). If so then just use them raw_found = list( dir_out.glob( f"{cfg['in']['probes_prefix']}{probe:0>2}.txt")) if raw_found: print('corrected csv file', [r.name for r in raw_found], 'found') correct_fun = lambda x, dir_out: x elif not cfg['in']['raw_subdir']: continue for file_in in (raw_found or open_csv_or_archive_of_them( raw_parent, binary_mode=False, pattern=raw_pattern_file)): file_in = correct_fun(file_in, dir_out=dir_out) if not file_in: continue tbl = file_in.stem # f"{cfg['in']['probes_prefix']}{probe:0>2}" # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w', # correct name # re.sub('^[\d_]*|\*', '', file_in.stem).lower()), # remove date-prefix if in name csv2h5( [ str( Path(__file__).parent / 'ini' / f"csv_{'inclin' if probe_is_incl else 'wavegage'}_{p_type[cfg['in']['probes_prefix']]['format']}.ini" ), '--path', str(file_in), '--blocksize_int', '50_000_000', # 50Mbt '--table', tbl, '--db_path', str(db_path), # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'), # '--b_raise_on_err', '0', # ? '--b_interact', '0', '--fs_float', str(p_type[cfg['in']['probes_prefix']] ['fs']), #f'{fs(probe, file_in.stem)}', '--dt_from_utc_seconds', str(cfg['in']['dt_from_utc'][probe].total_seconds()), '--b_del_temp_db', '1', ] + (['--csv_specific_param_dict', 'invert_magnitometr: True'] if probe_is_incl else []), **{ 'filter': { 'min_date': cfg['filter']['min_date'].get( probe, np.datetime64(0, 'ns')), 'max_date': cfg['filter']['max_date'].get( probe, np.datetime64('now', 'ns') ), # simple 'now' works in sinchronious mode } }) # Get coefs: l.info( f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}" ) try: h5copy_coef(cfg['in']['db_coefs'], db_path, tbl) except KeyError as e: # Unable to open object (component not found) l.warning( 'No coefs to copy?' ) # write some dummy coefficients to can load Veusz patterns: h5copy_coef(None, db_path, tbl, dict_matrices=dict_matrices_for_h5(tbl=tbl)) except OSError as e: l.warning( 'Not found DB with coefs?' ) # write some dummy coefficients to can load Veusz patterns: h5copy_coef(None, db_path, tbl, dict_matrices=dict_matrices_for_h5(tbl=tbl)) i_proc_file += 1 else: print('no', raw_pattern_file, end=', ') i_proc_probe += 1 print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.') cfg_in['tables'] = ['incl30'] from inclinometer.incl_h5clc import h5_names_gen from inclinometer.h5inclinometer_coef import rot_matrix_x, rot_matrix_y #rotate_x, rotate_y # R*[xyz]. As we next will need apply coefs Ag = Rz*Ry*Rx we can incorporate this # operation by precalculate it adding known angles on each axes to Rz,Ry,Rx. # If rotation is 180 deg, then we can add it only to Rx. Modified coef: Ag_new = Rz*Ry*R(x+180) # R(x+180) = Rx*Rx180 equivalent to rotate Ag.T in opposite direction: # Ag_new = rotate_x() # inclinometer changed so that applying coefs returns rotated data fiels vectors: # Out_rotated = Ag * In # We rotate it back: # Out = rotate(Out_rotated) = # after angle after calibration to some angle P so determine angle relative to vertical # by rotate data vector in opposite dir: Out = Ag * R_back * In. This equivalent to have new coef by apply rotation to Ag: # Ag_new = Ag * R_back = (R_back.T * Ag.T).T = rotate_forward(Ag.T).T = # Applying calibration coef will get data in inverted basis so we need rotate it after: # # coefs['Ag'] = rotate_x(coefs['Ag'], angle_degrees=180) # coefs['Ah'] = rotate_x(coefs['Ah'], angle_degrees=180) # dfLogOld, cfg_out['db'], cfg_out['b_skip_if_up_to_date'] = h5temp_open(**cfg_out) for i1, (tbl, coefs) in enumerate(h5_names_gen(cfg_in), start=1): # using property of rotation around same axis: R(x, θ1)@R(x, θ2) = R(x, θ1 + θ2) coefs['Ag'] = coefs['Ag'] @ rot_matrix_x(np.cos(np.pi), np.sin(np.pi)) coefs['Ah'] = coefs['Ah'] @ rot_matrix_x(np.cos(np.pi), np.sin(np.pi)) coefs['azimuth_shift_deg'] = 180 h5copy_coef(None, cfg['out']['db_path'], tbl, dict_matrices=dict_matrices_for_h5(coefs, tbl, to_nested_keys=True)) # Calculate velocity and average if st(2): # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5 if not cfg['out']['aggregate_period_s']: cfg['out']['aggregate_period_s'] = [ None, 2, 600, 3600 if 'w' in cfg['in']['probes_prefix'] else 7200 ] if cfg['in']['azimuth_add']: if 'Lat' in cfg['in']['azimuth_add']: from datetime import datetime # add magnetic declination,° for used coordinates # todo: get time azimuth_add = mag_dec(cfg['in']['azimuth_add']['Lat'], cfg['in']['azimuth_add']['Lon'], datetime(2020, 9, 10), depth=-1) else: azimuth_add = 0 if 'constant' in cfg['in']['azimuth_add']: # and add constant. For example, subtruct declination at the calibration place if it was applied azimuth_add += cfg['in']['azimuth_add'][ 'constant'] # add -6.65644183° to account for calibration in Kaliningrad for aggregate_period_s in cfg['out']['aggregate_period_s']: if aggregate_period_s is None: db_path_in = db_path db_path_out = db_path.with_name( f'{db_path.stem}_proc_noAvg.h5') else: db_path_in = db_path.with_name(f'{db_path.stem}_proc_noAvg.h5') db_path_out = f'{db_path.stem}_proc.h5' # or separately: '_proc{aggregate_period_s}.h5' args = [ Path(incl_h5clc.__file__).with_name( f'incl_h5clc_{db_path.stem}.yaml'), # if no such file all settings are here '--db_path', str(db_path_in), # ! 'incl.*|w\d*' inclinometers or wavegauges w\d\d # 'incl09': '--tables_list', 'incl.*' if not cfg['in']['probes'] else f"incl.*(?:{'|'.join('{:0>2}'.format(p) for p in cfg['in']['probes'])})", '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '', '--out.db_path', str(db_path_out), '--table', f'V_incl_bin{aggregate_period_s}' if aggregate_period_s else 'V_incl', '--verbose', 'INFO', #'DEBUG' get many numba messages '--b_del_temp_db', '1', # '--calc_version', 'polynom(force)', # depreshiated # '--chunksize', '20000', # '--not_joined_h5_path', f'{db_path.stem}_proc.h5', ] # if aggregate_period_s <= 5: # [s], do not need split csv for big average interval # args += (['--split_period', '1D']) if aggregate_period_s is None: # proc. parameters (if we have saved proc. data then when aggregating we are not processing) args += ([ '--max_dict', 'M[xyz]:4096', # Note: for Baranov's prog 4096 is not suited # '--time_range_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n," # not works - use kwarg # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00' '--split_period', '1D' ] if subs_made else [ '--bad_p_at_bursts_starts_peroiod', '1H', ]) # csv splitted by 1day (default for no avg) and monolith csv if aggregate_period_s==600 if aggregate_period_s not in cfg['out'][ 'aggregate_period_s_not_to_text']: # , 300, 600]: args += ['--text_path', str(db_path.parent / 'text_output')] kwarg = { 'in': { 'min_date': cfg['filter']['min_date'][0], 'max_date': cfg['filter']['max_date'][0], 'time_range_zeroing': cfg['in']['time_range_zeroing'], 'azimuth_add': azimuth_add } } # If need all data to be combined one after one: # set_field_if_no(kwarg, 'in', {}) # kwarg['in'].update({ # # 'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0], # 'dates_min': min_date.values(), # in table list order # 'dates_max': max_date.values(), # # }) # set_field_if_no(kwarg, 'out', {}) # kwarg['out'].update({'b_all_to_one_col': 'True'}) incl_h5clc.main(args, **kwarg) # Calculate spectrograms. if st(3): # Can be done at any time after step 1 def raise_ni(): raise NotImplementedError( 'Can not proc probes having different fs in one run: you need to do it separately' ) args = [ Path(incl_h5clc.__file__).with_name( f'incl_h5spectrum{db_path.stem}.yaml'), # if no such file all settings are here '--db_path', str(db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')), '--tables_list', f"{cfg['in']['probes_prefix']}.*", # inclinometers or wavegauges w\d\d ## 'w02', 'incl.*', # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '', '--min_date', datetime64_str(cfg['filter']['min_date'][0]), '--max_date', datetime64_str(cfg['filter']['max_date'] [0]), # '2019-09-09T16:31:00', #17:00:00 # '--max_dict', 'M[xyz]:4096', # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity '--out.db_path', f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5", # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd', '--fs_float', f"{fs(probes[0], cfg['in']['probes_prefix'])}", # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni() # # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00' # '--verbose', 'DEBUG', # '--chunksize', '20000', '--b_interact', '0', ] if 'w' in cfg['in']['probes_prefix']: args += [ '--split_period', '1H', '--dt_interval_minutes', '10', # burst mode '--fmin', '0.0001', '--fmax', '4' ] else: args += [ '--split_period', '2H', '--fmin', '0.0004', #0.0004 '--fmax', '1.05' ] incl_h5spectrum.main(args) # Draw in Veusz if st(4): b_images_only = True # False pattern_path = db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz' # r'vsz_5min\191126_0000_5m_w02.vsz' if not b_images_only: pattern_bytes_slice_old = re.escape(b'((5828756, 5830223, None),)') # Length of not adjacent intervals, s (set None to not allow) period = '1D' length = '5m' # period # '1D' dt_custom_s = pd_period_to_timedelta( length) if length != period else None # None # 60 * 5 if True: # Load starts and assign ends t_intervals_start = pd.read_csv( cfg['in']['path_cruise'] / r'vsz+h5_proc\intervals_selected.txt', converters={ 'time_start': lambda x: np.datetime64(x, 'ns') }, index_col=0).index edges = (pd.DatetimeIndex(t_intervals_start), pd.DatetimeIndex(t_intervals_start + dt_custom_s) ) # np.zeros_like() else: # Generate periodic intervals t_interval_start, t_intervals_end = intervals_from_period( datetime_range=np.array( [ cfg['filter']['min_date']['0'], cfg['filter']['max_date']['0'] ], # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'], # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'], # ['2018-11-16T15:19', '2018-12-14T14:35'], # ['2018-10-22T12:30', '2018-10-27T06:30:00'], 'datetime64[s]'), period=period) edges = (pd.DatetimeIndex([t_interval_start ]).append(t_intervals_end[:-1]), pd.DatetimeIndex(t_intervals_end)) for i, probe in enumerate(probes): probe_name = f"{cfg['in']['probes_prefix']}{probe:02}" # table name in db l.info('Draw %s in Veusz: %d intervals...', probe_name, edges[0].size) # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1): cfg_vp = {'veusze': None} for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(*edges), start=1): # if i_interval < 23: #<= 0: # TEMPORARY Skip this number of intervals # continue if period != length: t_interval_start = t_interval_end - pd.Timedelta( dt_custom_s, 's') try: # skipping absent probes start_end = h5q_interval2coord( db_path=str(db_path), table=f'/{probe_name}', t_interval=(t_interval_start, t_interval_end)) if not len(start_end): break # no data except KeyError: break # device name not in specified range, go to next name pattern_path_new = pattern_path.with_name( f"{t_interval_start:%y%m%d_%H%M}_{length}_{probe_name}.vsz" ) # Modify pattern file if not b_images_only: probe_name_old = re.match('.*((?:incl|w)\d*).*', pattern_path.name).groups()[0] bytes_slice = bytes( '(({:d}, {:d}, None),)'.format(*(start_end + np.int32([-1, 1]))), 'ascii') def f_replace(line): """ Replace in file 1. probe name 2. slice """ # if i_interval == 1: line, ok = re.subn(bytes(probe_name_old, 'ascii'), bytes(probe_name, 'ascii'), line) if ok: # can be only in same line line = re.sub(pattern_bytes_slice_old, bytes_slice, line) return line if not rep_in_file(pattern_path, pattern_path_new, f_replace=f_replace): l.warning('Veusz pattern not changed!') # break elif cfg_vp['veusze']: cfg_vp['veusze'].Load(str(pattern_path_new)) elif cfg_vp['veusze']: cfg_vp['veusze'].Load(str(pattern_path_new)) txt_time_range = \ """ "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \ """.format(t_interval_start, t_interval_end) print(f'{i_interval}. {txt_time_range}', end=' ') cfg_vp = veuszPropagate.main( [ Path(veuszPropagate.__file__).parent.with_name( 'veuszPropagate.ini'), # '--data_yield_prefix', '-', '--path', str( db_path ), # use for custom loading from db and some source is required '--tables_list', f'/{probe_name}', # 181022inclinometers/ \d* '--pattern_path', str(pattern_path_new), # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz', # str(db_path.parent / dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name # '--before_next', 'restore_config', # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{length}", '--filename_fun', f'lambda tbl: "{pattern_path_new.name}"', '--add_custom_list', 'USEtime', # nAveragePrefer', '--add_custom_expressions_list', txt_time_range, # + """ # ", 5" # """, '--b_update_existed', 'True', '--export_pages_int_list', '1, 2', # 0 for all '6, 7, 8', #'1, 2, 3' # '--export_dpi_int', '200', '--export_format', 'emf', '--b_interact', '0', '--b_images_only', f'{b_images_only}', '--return', '<embedded_object>', # reuse to not bloat memory ], veusze=cfg_vp['veusze'])
'M,A', '--b_update_existed', 'True', # to not skip. '--export_pages_int_list', '0', # 0 = all '--b_interact', 'False' ]) # if step == 3: # to 1st db too # l = init_logging(logging, None) l.info( f"Adding coefficients to {db_path_calibr_scalling}/{tbl} from {db_path_tank}" ) h5copy_coef(db_path_tank, db_path_calibr_scalling, tbl, ok_to_replace_group=True) if step == 3: time_ranges_nord = { 1: ['2019-07-11T18:48:35', '2019-07-11T18:49:20'], # 7: ['2019-07-11T16:53:40', '2019-07-11T16:54:10'], ??? # 30: ['2019-07-09T17:54:50', '2019-07-09T17:55:22'], 4: ['2019-07-11T17:22:15', '2019-07-11T17:23:08'], 5: ['2019-07-11T18:27:10', '2019-07-11T18:27:48'], 9: ['2019-12-20T16:58:30', '2019-12-20T16:59:15'], 10: ['2019-12-23T17:32:35', '2019-12-23T17:33:27'], 11: ['2019-07-11T17:41:44', '2019-07-11T18:42:48'], 12: ['2019-07-11T18:04:46', '2019-07-11T18:05:36'], 14: ['2019-09-02T14:01:41', '2019-09-02T14:02:15'], # todo 16: ['2019-09-03T19:22:20', '2019-09-03T19:22:54'],
def main(new_arg=None, **kwargs): """ :param new_arg: list of strings, command line arguments :kwargs: dicts of dictcts (for each ini section): specified values overwrites ini values """ # global l cfg = cfg_from_args(my_argparser(), new_arg, **kwargs) if not cfg['program']: return # usually error of unrecognized arguments displayed cfg['in']['db_coefs'] = Path(cfg['in']['db_coefs']) for path_field in ['db_coefs', 'path_cruise']: if not cfg['in'][path_field].is_absolute(): cfg['in'][path_field] = ( cfg['in']['cfgFile'].parent / cfg['in'][path_field] ).resolve().absolute() # cfg['in']['cfgFile'].parent / def constant_factory(val): def default_val(): return val return default_val for lim in ('min_date', 'max_date'): # convert keys to int because they must be comparable to probes_int_list (for command line arguments keys are allways strings, in yaml you can set string or int) _ = {int(k): v for k, v in cfg['filter'][lim].items()} cfg['filter'][lim] = defaultdict(constant_factory(_.get(0)), _) l = init_logging(logging, None, None, 'INFO') #l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) if True: # False. Experimental speedup but takes memory from dask.cache import Cache cache = Cache(2e9) # Leverage two gigabytes of memory cache.register() # Turn cache on globally #if __debug__: # # because there was errors on debug when default scheduler used # cfg['program']['dask_scheduler'] = 'synchronous' if cfg['program']['dask_scheduler']: if cfg['program']['dask_scheduler'] == 'distributed': from dask.distributed import Client # cluster = dask.distributed.LocalCluster(n_workers=2, threads_per_worker=1, memory_limit="5.5Gb") client = Client(processes=False) # navigate to http://localhost:8787/status to see the diagnostic dashboard if you have Bokeh installed # processes=False: avoide inter-worker communication for computations releases the GIL (numpy, da.array) # without is error else: if cfg['program']['dask_scheduler'] == 'synchronous': l.warning('using "synchronous" scheduler for debugging') import dask dask.config.set(scheduler=cfg['program']['dask_scheduler']) # Run steps : st.start = cfg['program']['step_start'] st.end = cfg['program']['step_end'] st.go = True if not cfg['out'][ 'db_name']: # set name by 'path_cruise' name or parent if it has digits at start. priority for name is "*inclinometer*" for p in (lambda p: [p, p.parent])(cfg['in']['path_cruise']): m = re.match('(^[\d_]*).*', p.name) if m: break cfg['out']['db_name'] = f"{m.group(1).strip('_')}incl.h5" dir_incl = next((d for d in cfg['in']['path_cruise'].glob('*inclinometer*') if d.is_dir()), cfg['in']['path_cruise']) db_path = dir_incl / '_raw' / cfg['out']['db_name'] # --------------------------------------------------------------------------------------------- # def fs(probe, name): # if 'w' in name.lower(): # Baranov's wavegauge electronic # return 10 # 5 # return 5 # if probe < 20 or probe in [23, 29, 30, 32, 33]: # 30 [4, 11, 5, 12] + [1, 7, 13, 30] # return 5 # if probe in [21, 25, 26] + list(range(28, 35)): # return 8.2 # return 4.8 def datetime64_str(time_str: Optional[str] = None) -> np.ndarray: """ Reformat time_str to ISO 8601 or to 'NaT'. Used here for input in funcs that converts str to numpy.datetime64 :param time_str: May be 'NaT' :return: ndarray of strings (tested for 1 element only) formatted by numpy. """ return np.datetime_as_string(np.datetime64(time_str, 's')) probes = cfg['in']['probes'] or range( 1, 41) # sets default range, specify your values before line --- raw_root, probe_is_incl = re.subn('INCL_?', 'INKL_', cfg['in']['probes_prefix'].upper()) # some parameters that depends of probe type (indicated by probes_prefix) p_type = defaultdict( # baranov's format constant_factory({ 'correct_fun': partial(correct_txt, mod_file_name=mod_incl_name, sub_str_list=[ b'^\r?(?P<use>20\d{2}(\t\d{1,2}){5}(\t\d{5}){8}).*', b'^.+' ]), 'fs': 10, 'format': 'Baranov', }), { (lambda x: x if x.startswith('incl') else 'incl')(cfg['in']['probes_prefix']): { 'correct_fun': partial( correct_txt, mod_file_name=mod_incl_name, sub_str_list=[ b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,6}){6}(,\d{1,2}\.\d{2})(,\-?\d{1,3}\.\d{2})).*', b'^.+' ]), 'fs': 5, 'format': 'Kondrashov', }, 'voln': { 'correct_fun': partial( correct_txt, mod_file_name=mod_incl_name, sub_str_list=[ b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,8})(,\-?\d{1,2}\.\d{2}){2}).*', b'^.+' ]), 'fs': 5, #'tbl_prefix': 'w', 'format': 'Kondrashov', } }) if st(1, 'Save inclinometer or wavegage data from ASCII to HDF5'): # Note: Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need) i_proc_probe = 0 # counter of processed probes i_proc_file = 0 # counter of processed files # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]': raw_parent = dir_incl / '_raw' # raw_parent /= if cfg['in']['raw_subdir'] is None: cfg['in']['raw_subdir'] = '' dir_out = raw_parent / re.sub(r'[.\\/ *?]', '_', cfg['in']['raw_subdir']) # sub replaces multilevel subdirs to 1 level that correct_fun() can only make def dt_from_utc_2000(probe): """ Correct time of probes started without time setting. Raw date must start from 2000-01-01T00:00""" return ( datetime(year=2000, month=1, day=1) - cfg['in']['time_start_utc'][probe] ) if cfg['in']['time_start_utc'].get(probe) else timedelta(0) # convert cfg['in']['dt_from_utc'] keys to int cfg['in']['dt_from_utc'] = { int(p): v for p, v in cfg['in']['dt_from_utc'].items() } # convert cfg['in']['t_start_utc'] to cfg['in']['dt_from_utc'] and keys to int cfg['in']['dt_from_utc'].update( # overwriting the 'time_start_utc' where already exist {int(p): dt_from_utc_2000(p) for p, v in cfg['in']['time_start_utc'].items()} ) # make cfg['in']['dt_from_utc'][0] be default value cfg['in']['dt_from_utc'] = defaultdict( constant_factory(cfg['in']['dt_from_utc'].pop(0, timedelta(0))), cfg['in']['dt_from_utc']) for probe in probes: raw_found = [] raw_pattern_file = str( Path(glob.escape(cfg['in']['raw_subdir'])) / cfg['in']['raw_pattern'].format(prefix=raw_root, number=probe)) correct_fun = p_type[cfg['in']['probes_prefix']]['correct_fun'] # if not archive: if (not re.match(r'.*(\.zip|\.rar)$', cfg['in']['raw_subdir'], re.IGNORECASE)) and raw_parent.is_dir(): raw_found = list(raw_parent.glob(raw_pattern_file)) if not raw_found: # Check if already have corrected files for probe generated by correct_txt(). If so then just use them raw_found = list( dir_out.glob( f"{cfg['in']['probes_prefix']}{probe:0>2}.txt")) if raw_found: print('corrected csv file', [r.name for r in raw_found], 'found') correct_fun = lambda x, dir_out: x elif not cfg['in']['raw_subdir']: continue for file_in in (raw_found or open_csv_or_archive_of_them( raw_parent, binary_mode=False, pattern=raw_pattern_file)): file_in = correct_fun(file_in, dir_out=dir_out) if not file_in: continue tbl = file_in.stem # f"{cfg['in']['probes_prefix']}{probe:0>2}" # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w', # correct name # re.sub('^[\d_]*|\*', '', file_in.stem).lower()), # remove date-prefix if in name csv2h5( [ str( Path(__file__).parent / 'ini' / f"csv_{'inclin' if probe_is_incl else 'wavegage'}_{p_type[cfg['in']['probes_prefix']]['format']}.ini" ), '--path', str(file_in), '--blocksize_int', '50_000_000', # 50Mbt '--table', tbl, '--db_path', str(db_path), # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'), # '--b_raise_on_err', '0', # ? '--b_interact', '0', '--fs_float', str(p_type[cfg['in']['probes_prefix']] ['fs']), #f'{fs(probe, file_in.stem)}', '--dt_from_utc_seconds', str(cfg['in']['dt_from_utc'][probe].total_seconds()), '--b_del_temp_db', '1', ] + (['--csv_specific_param_dict', 'invert_magnitometr: True'] if probe_is_incl else []), **{ 'filter': { 'min_date': cfg['filter']['min_date'].get( probe, np.datetime64(0, 'ns')), 'max_date': cfg['filter']['max_date'].get( probe, np.datetime64('now', 'ns') ), # simple 'now' works in sinchronious mode } }) # Get coefs: l.info( f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}" ) try: h5copy_coef(cfg['in']['db_coefs'], db_path, tbl) except KeyError as e: # Unable to open object (component not found) l.warning( 'No coefs to copy?' ) # write some dummy coefficients to can load Veusz patterns: h5copy_coef(None, db_path, tbl, dict_matrices=dict_matrices_for_h5(tbl=tbl)) except OSError as e: l.warning( 'Not found DB with coefs?' ) # write some dummy coefficients to can load Veusz patterns: h5copy_coef(None, db_path, tbl, dict_matrices=dict_matrices_for_h5(tbl=tbl)) i_proc_file += 1 else: print('no', raw_pattern_file, end=', ') i_proc_probe += 1 print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.') if st(2, 'Calculate physical parameters and average'): kwarg = { 'in': { 'min_date': cfg['filter']['min_date'][0], 'max_date': cfg['filter']['max_date'][0], 'time_range_zeroing': cfg['in']['time_range_zeroing'] }, 'proc': {} } # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5 if not cfg['out']['aggregate_period_s']: cfg['out']['aggregate_period_s'] = [ None, 2, 600, 7200 if probe_is_incl else 3600 ] if cfg['in']['azimuth_add']: if 'Lat' in cfg['in']['azimuth_add']: # add magnetic declination,° for used coordinates # todo: get time kwarg['proc']['azimuth_add'] = mag_dec( cfg['in']['azimuth_add']['Lat'], cfg['in']['azimuth_add']['Lon'], datetime(2020, 9, 10), depth=-1) else: kwarg['proc']['azimuth_add'] = 0 if 'constant' in cfg['in']['azimuth_add']: # and add constant. For example, subtruct declination at the calibration place if it was applied kwarg['proc']['azimuth_add'] += cfg['in']['azimuth_add'][ 'constant'] # add -6.656 to account for calibration in Kaliningrad (mag deg = 6.656°) for aggregate_period_s in cfg['out']['aggregate_period_s']: if aggregate_period_s is None: db_path_in = db_path db_path_out = dir_incl / f'{db_path.stem}_proc_noAvg.h5' else: db_path_in = dir_incl / f'{db_path.stem}_proc_noAvg.h5' db_path_out = dir_incl / f'{db_path.stem}_proc.h5' # or separately: '_proc{aggregate_period_s}.h5' # 'incl.*|w\d*' inclinometers or wavegauges w\d\d # 'incl09': tables_list_regex = f"{cfg['in']['probes_prefix'].replace('voln', 'w')}.*" if cfg['in']['probes']: tables_list_regex += "(?:{})".format('|'.join( '{:0>2}'.format(p) for p in cfg['in']['probes'])) args = [ '../../empty.yml', # all settings are here, so to not print 'using default configuration' we use some existed empty file '--db_path', str(db_path_in), '--tables_list', tables_list_regex, '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '', '--out.db_path', str(db_path_out), '--table', f'V_incl_bin{aggregate_period_s}' if aggregate_period_s else 'V_incl', '--verbose', 'INFO', #'DEBUG' get many numba messages '--b_del_temp_db', '1', # '--calc_version', 'polynom(force)', # depreshiated # '--chunksize', '20000', # '--not_joined_h5_path', f'{db_path.stem}_proc.h5', ] if aggregate_period_s is None: # proc. parameters (if we have saved proc. data then when aggregating we are not processing) # Note: for Baranov's prog 4096 is not suited: args += ([ '--max_dict', 'M[xyz]:4096', # '--time_range_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n," # not works - use kwarg # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00' '--split_period', '1D' ] if probe_is_incl else [ '--bad_p_at_bursts_starts_peroiod', '1H', ]) # csv splitted by 1day (default for no avg) else csv is monolith if aggregate_period_s not in cfg['out'][ 'aggregate_period_s_not_to_text']: # , 300, 600]: args += ['--text_path', str(dir_incl / 'text_output')] # If need all data to be combined one after one: # set_field_if_no(kwarg, 'in', {}) # kwarg['in'].update({ # # 'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0], # 'dates_min': min_date.values(), # in table list order # 'dates_max': max_date.values(), # # }) # set_field_if_no(kwarg, 'out', {}) # kwarg['out'].update({'b_all_to_one_col': 'True'}) incl_h5clc.main(args, **kwarg) if st(3, 'Calculate spectrograms'): # Can be done at any time after step 1 min_Pressure = 7 # add dict dates_min like {probe: parameter} of incl_clc to can specify param to each probe def raise_ni(): raise NotImplementedError( 'Can not proc probes having different fs in one run: you need to do it separately' ) args = [ Path(incl_h5clc.__file__).with_name( f'incl_h5spectrum{db_path.stem}.yaml'), # if no such file all settings are here '--db_path', str(dir_incl / f'{db_path.stem}_proc_noAvg.h5'), '--tables_list', f"{cfg['in']['probes_prefix']}.*", # inclinometers or wavegauges w\d\d ## 'w02', 'incl.*', # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '', '--min_date', datetime64_str(cfg['filter']['min_date'][0]), '--max_date', datetime64_str(cfg['filter']['max_date'] [0]), # '2019-09-09T16:31:00', #17:00:00 '--min_Pressure', f'{min_Pressure}', # '--max_dict', 'M[xyz]:4096', # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity '--out.db_path', f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5", # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd', '--fs_float', str(p_type[cfg['in']['probes_prefix']] ['fs']), # f"{fs(probes[0], cfg['in']['probes_prefix'])}", # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni() # # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00' # '--verbose', 'DEBUG', # '--chunksize', '20000', '--b_interact', '0', ] if probe_is_incl: args += [ '--split_period', '2H', '--fmin', '0.0004', #0.0004 '--fmax', '1.05' ] else: args += [ '--split_period', '1H', '--dt_interval_minutes', '15', # set this if burst mode to the burst interval '--fmin', '0.0001', '--fmax', '4', #'--min_Pressure', '-1e15', # to not load NaNs ] incl_h5spectrum.main(args) if st(4, 'Draw in Veusz'): pattern_path = dir_incl / r'processed_h5,vsz/201202-210326incl_proc#28.vsz' # r'\201202_1445incl_proc#03_pattern.vsz' #' # db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz' # r'vsz_5min\191126_0000_5m_w02.vsz' b_images_only = False # importing in vsz index slices replacing: pattern_str_slice_old = None # Length of not adjacent intervals, s (set None to not allow) # pandas interval in string or tuple representation '1D' of period between intervals and interval to draw period_str = '0s' # '1D' # dt dt_str = '0s' # '5m' file_intervals = None period = to_offset(period_str).delta dt = to_offset(dt_str).delta # timedelta(0) # 60 * 5 if file_intervals and period and dt: # Load starts and assign ends t_intervals_start = pd.read_csv( cfg['in']['path_cruise'] / r'vsz+h5_proc\intervals_selected.txt', converters={ 'time_start': lambda x: np.datetime64(x, 'ns') }, index_col=0).index edges = (pd.DatetimeIndex(t_intervals_start), pd.DatetimeIndex(t_intervals_start + dt_custom_s) ) # np.zeros_like() elif period and dt: # Generate periodic intervals t_interval_start, t_intervals_end = intervals_from_period( datetime_range=np.array( [ cfg['filter']['min_date']['0'], cfg['filter']['max_date']['0'] ], # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'], # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'], # ['2018-11-16T15:19', '2018-12-14T14:35'], # ['2018-10-22T12:30', '2018-10-27T06:30:00'], 'datetime64[s]'), period=period) edges = (pd.DatetimeIndex([t_interval_start ]).append(t_intervals_end[:-1]), pd.DatetimeIndex(t_intervals_end)) else: # [min, max] edges for each probe edges_dict = { pr: [cfg['filter']['min_date'][pr], cfg['filter']['max_date'][pr]] for pr in probes } cfg_vp = {'veusze': None} for i, probe in enumerate(probes): # cfg_vp = {'veusze': None} if edges_dict: # custom edges for each probe edges = [pd.DatetimeIndex([t]) for t in edges_dict[probe]] # substr in file to rerplace probe_name_in_pattern (see below). probe_name = f"_{cfg['in']['probes_prefix'].replace('incl', 'i')}{probe:02}" tbl = None # f"/{cfg['in']['probes_prefix']}{probe:02}" # to check probe data exist in db else will not check l.info('Draw %s in Veusz: %d intervals...', probe_name, edges[0].size) # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1): for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(*edges), start=1): # if i_interval < 23: #<= 0: # TEMPORARY Skip this number of intervals # continue if period and period != dt: t_interval_start = t_interval_end - pd.Timedelta( dt_custom_s, 's') if tbl: try: # skipping absent probes start_end = h5q_interval2coord( db_path=str(db_path), table=tbl, t_interval=(t_interval_start, t_interval_end)) if not len(start_end): break # no data except KeyError: break # device name not in specified range, go to next name pattern_path_new = pattern_path.with_name(''.join([ f'{t_interval_start:%y%m%d_%H%M}', f'_{dt_str}' if dt else '', f'{probe_name}.vsz' ])) # Modify pattern file if not b_images_only: pattern_type, pattern_number = re.match( r'.*(incl|w)_proc?#?(\d*).*', pattern_path.name).groups() probe_name_in_pattern = f"_{pattern_type.replace('incl', 'i')}{pattern_number}" def f_replace(line): """ Replace in file 1. probe name 2. slice """ # if i_interval == 1: line, ok = re.subn(probe_name_in_pattern, probe_name, line) if ok and pattern_str_slice_old: # can be only in same line str_slice = '(({:d}, {:d}, None),)'.format( *(start_end + np.int32([-1, 1]))) # bytes(, 'ascii') line = re.sub(pattern_str_slice_old, str_slice, line) return line if not rep_in_file(pattern_path, pattern_path_new, f_replace=f_replace, binary_mode=False): l.warning('Veusz pattern not changed!' ) # may be ok if we need draw pattern # break elif cfg_vp['veusze']: cfg_vp['veusze'].Load(str(pattern_path_new)) elif cfg_vp['veusze']: cfg_vp['veusze'].Load(str(pattern_path_new)) txt_time_range = \ """ "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \ """.format(t_interval_start, t_interval_end) print(f'{i_interval}. {txt_time_range}', end=' ') cfg_vp = veuszPropagate.main( [ Path(veuszPropagate.__file__).parent.with_name( 'veuszPropagate.ini'), # '--data_yield_prefix', '-', # '--path', str(db_path), # if custom loading from db and some source is required '--tables_list', '', # switches to search vsz-files only # f'/{probe_name}', # 181022inclinometers/ \d* '--pattern_path', str(pattern_path_new), # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz', # str(dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name # '--before_next', 'restore_config', # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{dt}", '--filename_fun', f'lambda tbl: "{pattern_path_new.name}"', '--add_custom_list', f'USEtime__', # f'USEtime{probe_name}', nAveragePrefer', '--add_custom_expressions_list', txt_time_range, # + """ # ", 5" # """, '--b_update_existed', 'True', '--export_pages_int_list', '0', # 0 for all '6, 7, 8', #'1, 2, 3' # '--export_dpi_int', '200', '--export_format', 'jpg', #'emf', '--b_interact', '0', '--b_images_only', f'{b_images_only}', '--return', '<embedded_object>', # reuse to not bloat memory '--b_execute_vsz', 'True', '--before_next', 'Close()' # Close() need if b_execute_vsz many files ], veusze=cfg_vp['veusze']) if st(40, f'Draw in Veusz by loader-drawer.vsz method'): # save all vsz files that uses separate code from os import chdir as os_chdir dt_s = 300 cfg['in'][ 'pattern_path'] = db_path.parent / f'vsz_{dt_s:d}s' / '~pattern~.vsz' time_starts = pd.read_csv( db_path.parent / r'processed_h5,vsz' / 'intervals_selected.txt', index_col=0, parse_dates=True, date_parser=lambda x: pd.to_datetime(x, format='%Y-%m-%dT%H:%M:%S' )).index pattern_code = cfg['in']['pattern_path'].read_bytes( ) # encoding='utf-8' path_vsz_all = [] for i, probe in enumerate(probes): probe_name = f"{cfg['in']['probes_prefix']}{probe:02}" # table name in db l.info('Draw %s in Veusz: %d intervals...', probe_name, time_starts.size) for i_interval, time_start in enumerate(time_starts, start=1): path_vsz = cfg['in']['pattern_path'].with_name( f"{time_start:%y%m%d_%H%M}_{probe_name.replace('incl','i')}.vsz" ) # copy file to path_vsz path_vsz.write_bytes(pattern_code) # replaces 1st row path_vsz_all.append(path_vsz) os_chdir(cfg['in']['pattern_path'].parent) veuszPropagate.main( [ 'ini/veuszPropagate.ini', '--path', str(cfg['in']['pattern_path'].with_name( '??????_????_*.vsz')), # db_path), '--pattern_path', f"{cfg['in']['pattern_path']}_", # here used to auto get export dir only. may not be _not existed file path_ if ['out']['paths'] is provided # '--table_log', f'/{device}/logRuns', # '--add_custom_list', f'{device_veusz_prefix}USE_time_search_runs', # 'i3_USE_timeRange', # '--add_custom_expressions', # """'[["{log_row[Index]:%Y-%m-%dT%H:%M:%S}", "{log_row[DateEnd]:%Y-%m-%dT%H:%M:%S}"]]'""", # '--export_pages_int_list', '1', #'--b_images_only', 'True' '--b_interact', '0', '--b_update_existed', 'True', # todo: delete_overlapped '--b_images_only', 'True', '--load_timeout_s_float', str(cfg['program']['load_timeout_s']) # '--min_time', '2020-07-08T03:35:00', ], **{'out': { 'paths': path_vsz_all }}) if st(50, 'Export from existed Veusz files in dir'): pattern_parent = db_path.parent # r'vsz_5min\191126_0000_5m_w02.vsz'' pattern_path = str(pattern_parent / r'processed_h5,vsz' / '??????incl_proc#[1-9][0-9].vsz') # [0-2,6-9] veuszPropagate.main([ 'ini/veuszPropagate.ini', '--path', pattern_path, '--pattern_path', pattern_path, # '--export_pages_int_list', '1', #'--b_images_only', 'True' '--b_interact', '0', '--b_update_existed', 'True', # todo: delete_overlapped '--b_images_only', 'True', '--load_timeout_s_float', str(cfg['program']['load_timeout_s']), '--b_execute_vsz', 'True', '--before_next', 'Close()' # Close() need if b_execute_vsz many files ])
def main(new_arg=None, veusze=None): """ Note: if vsz data source have 'Ag_old_inv' variable then not invert coef. Else invert to use in vsz which not invert coefs :param new_arg: :return: """ global l p = veuszPropagate.my_argparser() p_groups = { g.title: g for g in p._action_groups if g.title.split(' ')[-1] != 'arguments' } # skips special argparse groups p_groups['in'].add( '--channels_list', help= 'channels needed zero calibration: "magnetometer" or "M" for magnetometer and any else for accelerometer, use "M, A" for both, empty to skip ' ) p_groups['in'].add( '--widget', help= 'path to Veusz widget property which contains coefficients. For example "/fitV(force)/grid1/graph/fit1/values"' ) p_groups['in'].add( '--data_for_coef', default='max_incl_of_fit_t', help= 'Veusz data to use as coef. If used with widget then this data is appended to data from widget' ) p_groups['out'].add('--out.path', help='path to db where write coef') p_groups['out'].add( '--re_tbl_from_vsz_name', help= 'regex to extract hdf5 table name from to Veusz file name (last used "\D*\d*")' # ? why not simly specify table name? ) # todo: "b_update_existed" arg will be used here for exported images. Check whether False works or prevent open vsz cfg = cfg_from_args(p, new_arg) if not Path(cfg['program']['log']).is_absolute(): cfg['program']['log'] = str( Path(__file__).parent.joinpath( cfg['program']['log'])) # l.root.handlers[0].baseFilename if not cfg: return if new_arg == '<return_cfg>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) veuszPropagate.l = l print('\n' + this_prog_basename(__file__), 'started', end=' ') if cfg['out']['b_images_only']: print('in images only mode.') try: print('Output pattern ') # Using cfg['out'] to store pattern information if not Path(cfg['in']['pattern_path']).is_absolute(): cfg['in']['pattern_path'] = str(cfg['in']['path'].parent.joinpath( cfg['in']['pattern_path'])) set_field_if_no(cfg['out'], 'path', cfg['in']['pattern_path']) cfg['out']['paths'], cfg['out']['nfiles'], cfg['out'][ 'path'] = init_file_names(**cfg['out'], b_interact=cfg['program']['b_interact']) except Ex_nothing_done as e: print(e.message, ' - no pattern') return # or raise FileNotFoundError? try: print(end='Data ') cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][ 'path'] = init_file_names( **cfg['in'], b_interact=False) # do not bother user 2nd time except Ex_nothing_done as e: print(e.message) return # or raise FileNotFoundError? if not cfg['out']['export_dir']: cfg['out']['export_dir'] = Path(cfg['out']['path']).parent if cfg['program']['before_next'] and 'restore_config' in cfg['program'][ 'before_next']: cfg['in_saved'] = cfg['in'].copy() # cfg['loop'] = asyncio.get_event_loop() # cfg['export_timeout_s'] = 600 cfg['out']['export_dir'] = dir_from_cfg(cfg['out']['path'].parent, cfg['out']['export_dir']) veuszPropagate.load_vsz = veuszPropagate.load_vsz_closure( cfg['program']['veusz_path'], b_execute_vsz=cfg['program']['b_execute_vsz']) gen_veusz_and_logs = veuszPropagate.load_to_veusz( veuszPropagate.ge_names(cfg), cfg, veusze) names_get = ['Inclination_mean_use1', 'logVext1_m__s' ] # \, 'Inclination_mean_use2', 'logVext2_m__s' names_get_fits = ['fit'] # , 'fit2' vsz_data = {n: [] for n in names_get} for n in names_get_fits: vsz_data[n] = [] # prepare collecting all coef in text also names_get_txt_results = ['fit1result'] # , 'fit2result' txt_results = {n: {} for n in names_get_txt_results} i_file = 0 for veusze, log in gen_veusz_and_logs: if not veusze: continue i_file += 1 print(i_file) if cfg['out']['re_tbl_from_vsz_name']: table = cfg['out']['re_tbl_from_vsz_name'].match( log['out_name']).group() else: table = re.sub( '^[\d_]*', '', log['out_name']) # delete all first digits (date part) for n in names_get: vsz_data[n].append(veusze.GetData(n)[0]) for n in [cfg['in']['data_for_coef']]: vsz_data[n] = list(veusze.GetData(n)[0]) # Save velocity coefficients into //{table}//coef//Vabs{i} where i - fit number enumeretad from 0 for i, name_out in enumerate(names_get_fits): # ['fit1', 'fit2'] coef = veusze.Get( cfg['in']['widget'] ) # veusze.Root['fitV(inclination)']['grid1']['graph'][name_out].values.val if 'a' in coef: coef_list = [ coef[k] for k in ['d', 'c', 'b', 'a'] if k in coef ] else: coef_list = [ coef[k] for k in sorted(coef.keys(), key=digits_first) ] if cfg['in']['data_for_coef']: coef_list += vsz_data[cfg['in']['data_for_coef']] vsz_data[name_out].append(coef_list) h5copy_coef(None, cfg['out']['path'], table, dict_matrices={ f'//coef//Vabs{i}': coef_list, f'//coef//date': np.float64([ np.NaN, np.datetime64(datetime.now()).astype(np.int64) ]) }) # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list) txt_results[names_get_txt_results[i]][table] = str(coef) # Zeroing matrix - calculated in Veusz by rotation on old0pitch old0roll Rcor = veusze.GetData( 'Rcor' )[0] # zeroing angles tuned by "USEcalibr0V_..." in Veusz Custom definitions if len(cfg['in']['channels']): l.info( 'Applying zero calibration matrix of peach = {} and roll = {} degrees' .format(np.rad2deg(veusze.GetData('old0pitch')[0][0]), np.rad2deg(veusze.GetData('old0roll')[0][0]))) with h5py.File(cfg['out']['path'], 'a') as h5: for channel in cfg['in']['channels']: (col_str, coef_str) = channel_cols(channel) # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list), dict_matrices={'//coef//' + coef_str + '//A': coefs[tbl][channel]['A'], '//coef//' + coef_str + '//C': coefs[tbl][channel]['b']}) # Currently used inclinometers have electronics rotated on 180deg. Before we inserted additional # rotation operation in Veusz by inverting A_old. Now we want iclude this information in database coef only. try: # Checking that A_old_inv exist A_old_inv = veusze.GetData('Ag_old_inv') is_old_used = True # Rcor is not account for electronic is rotated. except KeyError: is_old_used = False # Rcor is account for rotated electronic. if is_old_used: # The rotation is done in vsz (A_old in vsz is inverted) so need rotate it back to # use in vsz without such invertion # Rotate on 180 deg (note: this is not inversion) A_old_inv = h5[f'//{table}//coef//{coef_str}//A'][...] A_old = np.dot(A_old_inv, [[1, 0, 0], [0, -1, 0], [0, 0, -1] ]) # adds 180 deg to roll else: A_old = h5[f'//{table}//coef//{coef_str}//A'][...] # A_old now accounts for rotated electronic A = np.dot(Rcor, A_old) h5copy_coef(None, h5, table, dict_matrices={f'//coef//{coef_str}//A': A}) # veusze.Root['fitV(inclination)']['grid1']['graph2'][name_out].function.val print(vsz_data) veuszPropagate.export_images( veusze, cfg['out'], f"_{log['out_name']}", b_skip_if_exists=not cfg['out']['b_update_existed']) # vsz_data = veusz_data(veusze, cfg['in']['data_yield_prefix']) # # caller do some processing of data and gives new cfg: # cfgin_update = yield(vsz_data, log) # to test run veusze.Save('-.vsz') # cfg['in'].update(cfgin_update) # only update of cfg.in.add_custom_expressions is tested # if cfg['in']['add_custom']: # for n, e in zip(cfg['in']['add_custom'], cfg['in']['add_custom_expressions']): # veusze.AddCustom('definition', n, e, mode='replace') # #cor_savings.send((veusze, log)) # # # # # veusze.Save(str(path_vsz_save), mode='hdf5') # veusze.Save(str(path_vsz_save)) saves time with bad resolution print(f'Ok') print(txt_results) for n in names_get: pd.DataFrame.from_dict( dict(zip(list(txt_results['fit1result'].keys()), vsz_data[n]))).to_csv( Path(cfg['out']['path']).with_name( f'average_for_fitting-{n}.txt'), sep='\t', header=txt_results['fit1result'].keys, mode='a') return {**vsz_data, 'veusze': veusze}