def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters """ p = my_argparser_common_part({ 'description': """ ---------------------------- Convert data from GPX files to CSV ----------------------------""" }) # Configuration sections # All argumets of type str (default for add_argument...), because of # custom postprocessing based of args names in ini2dict s = p.add_argument_group('in', 'all about input files') s.add( '--path', default='.', # nargs=?, help='path to source file(s) to parse. Use patterns in Unix shell style' )
def my_argparser(): """ Configuration parser options and its description """ p = my_argparser_common_part({'description': 'Extract data from Pandas HDF5 ' 'store*.h5 files to GPX'}) # 'gpx2h5.ini' s = p.add_argument_group('in', 'data from hdf5 store') s.add('--db_path', help='hdf5 store file path') # '*.h5' s.add('--tables_log_list', help='hdf5 log files name', default='logFiles') s.add('--table_nav', help='table where to search coordinates. If empty then use data tables', default='navigation') s = p.add_argument_group('out', 'Output files: paths, formats... ' \ ' - not calculation intensive affecting parameters') s.add('--select_from_tablelog_ranges_index', help='if set to 0 (or -1) then use only 1 data point per log row and retrieve navigation at data points only (to calc dist) else if None then use all data for ranges specified in log rows and saves tracks (not points)') s.add('--gpx_names_funs_list', default='i+1', help='list of functions to name of tracks/waypoints, each item for each table. Use arguments of current indexes: i - waypoint. will be converted to string, to duplicates will be added letter in alphabet order. Functions arguments are i: row index, row: pandas series with fields: Index (datetime), Lat, Lon') s.add('--gpx_names_funs_cobined', default='gpx_names_funs[row.itbl](i)', help='tracks/waypoints names of combined gpx. Possibilites are the same as for gpx_names_funs_list item. Default function will keep combined values same as individual') s.add('--gpx_names_fun_format', default='{}', help='name\'s format to display gpx_names_funs_(list/combined) result') s.add('--path', default='', help='directory to place output files') s = p.add_argument_group('process', 'calculation parameters') s.add_argument('--b_missed_coord_to_zeros', help='out all points even if no coordinates, but replace them to zeros') s.add_argument('--simplify_tracks_error_m_float', help='does Ramer-Douglas-Peucker algorithm for simplification of tracks if set') s.add('--dt_search_nav_tolerance_seconds', default='1', help='start interpolte nav when not found exact data time') s.add('--period_files', default='', help='pandas offset strings as D, 5D, H, ... (most useful: D), exporrt data in intervals') s.add('--period_segments', default='', help='pandas offset strings as D, 5D, H, ... to divide track on segments') s = p.add_argument_group('program', 'Program behaviour') s.add_argument('--log', help='write log if path to existed file is specified') return p
def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters All p argumets are of type str (default for add_argument...), because of custom postprocessing based of args names in ini2dict """ p = my_argparser_common_part( { 'description': 'incl_load version {}'.format(version) + """ --------------------------------------------------- Processing raw inclinometer data, Saving result to indexed Pandas HDF5 store (*.h5) and *.csv -----------------------------------------------------------""" }, version) # Configuration sections s = p.add_argument_group('in', 'All about input files') s.add( '--path_cruise', default='.', # nargs=?, help='Directory where inclinometer data will be stored, subdirs:' '"_raw": required, with raw file(s)') s.add( '--raw_subdir', default='', help= 'Optional zip/rar arhive name (data will be unpacked) or subdir in "path_cruise/_raw"' ) s.add( '--raw_pattern', default="*{prefix:}{number:0>3}*.[tT][xX][tT]", help= 'Pattern to find raw files: Python "format" command pattern to format prefix and probe number.' '"prefix" is a --probes_prefix arg that is in UPPER case and INCL replaced with INKL.' ) s.add('--probes_int_list', help='Note: Not affects steps 2, 3, set empty list to load all') s.add( '--probes_prefix', default='incl', help= '''Table name prefix in DB (and in raw files with modification described in --raw_pattern help). I have used "incl" for inclinometers, "w" for wavegauges. Note: only if "incl" in probes_prefix the Kondrashov format is used else it must be in Baranov's format''' ) s.add( '--db_coefs', default=r'd:\WorkData\~configuration~\inclinometr\190710incl.h5', help='coefs will be copied from this hdf5 store to output hdf5 store') s.add('--timerange_zeroing_list', help='See incl_h5clc') s.add( '--timerange_zeroing_dict', help= 'See incl_h5clc. Example: incl14: [2020-07-10T21:31:00, 2020-07-10T21:39:00]' ) s.add( '--dt_from_utc_seconds', default='0', help= 'add this correction to loading datetime data. Can use other suffixes instead of "seconds"' ) s.add( '--dt_from_utc_hours', default='0', help= 'add this correction to loading datetime data. Can use other suffixes instead of "hours"' ) s.add( '--azimuth_add_float_dict', help= 'degrees, adds this value to velocity direction (will sum with _azimuth_shift_deg_ coef)' ) s = p.add_argument_group('filter', 'Filter all data based on min/max of parameters') s.add( '--min_date_dict', default='0: 2020-06-28T18:00', help= 'minimum time for each probe, use probe number 0 to set default value') s.add( '--max_date_dict', default='0: now', help= 'maximum time for each probe, use probe number 0 to set default value') s = p.add_argument_group('out', 'All about output files') s.add('--db_name', help='output hdf5 file name, do not set for auto using dir name') s.add( '--aggregate_period_s_int_list', default='', help= 'bin average data in this intervals will be placed in separate section in output DB and csv for ' '[None, 300, 600], default: None, 2, 600, 3600 if "w" in [in][probes_prefix] else 7200' ) s.add('--aggregate_period_s_not_to_text_int_list', default='None', help='do not save text files for this aggregate periods') s = p.add_argument_group('program', 'Program behaviour') s.add('--step_start_int', default='1', choices=['1', '2', '3', '4'], help='step to start') s.add( '--step_end_int', default='2', choices=['1', '2', '3', '4'], help= 'step to end (inclusive, or if less than start then will run one start step only)' ) s.add('--dask_scheduler', help='can be "synchronous" (for help debugging) or "distributed"') return (p)
def my_argparser(): """ Configuration parser options and its description :return p: configargparse object of parameters """ from utils2init import my_argparser_common_part p = my_argparser_common_part({ 'description': 'Grid data from Pandas HDF5, VSZ files ' 'and Pandas HDF5 store*.h5' }) s = p.add_argument_group('in', 'data from hdf5 store') s.add( '--db_path', help= 'hdf5 store file path where to load source data and write resulting coef' ) # '*.h5' s.add('--tables_list', help='tables names list or pattern to find tables to load data') s.add( '--channels_list', help= 'channel can be "magnetometer" or "M" for magnetometer and any else for accelerometer', default='M, A') s.add('--chunksize_int', help='limit loading data in memory', default='50000') s.add('--timerange_list', help='time range to use') s.add( '--timerange_dict', help= 'time range to use for each inclinometer number (consisted of digits in table name)' ) s.add('--timerange_nord_list', help='time range to zeroing nord. Not zeroing Nord if not used') s.add( '--timerange_nord_dict', help= 'time range to zeroing nord for each inclinometer number (consisted of digits in table name)' ) s = p.add_argument_group('filter', 'excludes some data') s.add( '--no_works_noise_float_dict', default='M:10, A:100', help= 'is_works() noise argument for each channel: excludes data if too small changes' ) s.add('--blocks_int_list', default='21, 7', help='despike() argument') s.add('--offsets_float_list', default='1.5, 2', help='despike() argument') s.add('--std_smooth_sigma_float', default='4', help='despike() argument') s = p.add_argument_group('out', 'where write resulting coef (additionally)') s.add( '--out.db_path', help= 'hdf5 store file path where to write resulting coef. Writes to tables that names configured for input data (cfg[in].tables) in this file' ) s = p.add_argument_group('program', 'program behaviour') s.add( '--return', default='<end>', # nargs=1, choices=['<cfg_from_args>', '<gen_names_and_log>', '<end>'], help= '<cfg_from_args>: returns cfg based on input args only and exit, <gen_names_and_log>: execute init_input_cols() and returns... - see main()' ) return (p)
def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters All p argumets are of type str (default for add_argument...), because of custom postprocessing based of args names in ini2dict """ p = my_argparser_common_part( { 'description': 'csv2h5 version {}'.format(version) + """ ---------------------------- Extract data from Pandas HDF5 store*.h5 files, process it and save HDF5/CSV ----------------------------""" }, version) # , 'default_config_files': [os_path.join(os_path.dirname(__file__), name) for name in # ('CTD_calc.ini', 'CTD_calc.json')] # Configuration sections s = p.add_argument_group('in', 'all about input files') s.add( '--db_path', default='.', # nargs=?, help= 'path to pytables hdf5 store to load data. May use patterns in Unix shell style' ) s.add( '--tables_list', help= 'table name in hdf5 store to read data. If not specified then will be generated on base of path of input files' ) s.add( '--tables_log', help= 'table name in hdf5 store to read data intervals. If not specified then will be "{}/logFiles" where {} will be replaced by current data table name' ) s.add( '--table_nav', default='navigation', help= 'table name in hdf5 store to add data from it to log table when in "find runs" mode. Use empty strng to not add' ) s.add( '--dt_from_utc_hours', default='0', help= 'add this correction to loading datetime data. Can use other suffixes instead of "hours"' ) s.add( '--b_skip_if_up_to_date', default='True', help= 'exclude processing of files with same name and which time change is not bigger than recorded in database (only prints ">" if detected). If finds updated version of same file then deletes all data which corresponds old file and after it brfore procesing of next files' ) s.add( '--b_temp_on_its90', default='True', help= 'When calc CTD parameters treat Temp have red on ITS-90 scale. (i.e. same as "temp90")' ) s.add( '--path_coef', help= 'path to file with coefficients. Used for processing of Neil Brown CTD data' ) s.add('--lat_float', help='Latitude used to calc SA if no such data column') s.add('--lon_float', help='Longitude used to calc SA if no such data column') s = p.add_argument_group('out', 'all about output files') info_default_path = '[in] path from *.ini' s.add('--out.db_path', help='hdf5 store file path') s.add( '--out.tables_list', help= 'table name in hdf5 store to write data. If not specified then it is same as input tables_list (only new subtable will created here), else it will be generated on base of path of input files' ) s.add( '--out.tables_log_list', help= 'table name in hdf5 store to save intervals. If contains "logRuns" then runs will be found first' ) s.add('--path_csv', help='path to output directory of csv file(s)') s.add('--data_columns_list', help='list of columns names used in output csv file(s)') s.add('--b_insert_separator', default='True', help='insert NaNs row in table after each file data end') s.add('--b_remove_duplicates', default='False', help='Set True if you see warnings about') s.add( '--text_date_format', default='%Y-%m-%d %H:%M:%S.%f', help= 'Format of date column in csv files. Can use float or string representations' ) s = p.add_argument_group('extract_runs', 'program behaviour') s.add( '--cols_list', default='Pres', help= 'column for extract_runs (other common variant besides default is "Depth")' ) s.add('--dt_between_min_minutes', default='1', help='') s.add('--min_dp', default='20', help='') s.add('--min_samples', default='200', help='100 use small value (10) for binned (averaged) samples') s.add( '--b_keep_minmax_of_bad_files', default='False', help= 'keep 1 min before max and max of separated parts of data where movements insufficient to be runs' ) s.add('--b_save_images', default='True', help='to review split result') s = p.add_argument_group('filter', 'filter all data based on min/max of parameters') s.add( '--min_dict', help= 'List with items in "key:value" format. Sets to NaN data of ``key`` columns if it is below ``value``' ) s.add( '--max_dict', help= 'List with items in "key:value" format. Sets to NaN data of ``key`` columns if it is above ``value``' ) s = p.add_argument_group('program', 'program behaviour') s.add( '--return', default='<end>', # nargs=1, choices=['<cfg_from_args>', '<gen_names_and_log>', '<end>'], help= '<cfg_from_args>: returns cfg based on input args only and exit, <gen_names_and_log>: execute init_input_cols() and also returns fun_proc_loaded function... - see main()' ) return (p)
def my_argparser(): p = my_argparser_common_part( {'description': 'Add data from *.gpx to *.h5'}) s = p.add_argument_group('in', 'data') s.add('--path', help='path/mask to GPX file(s) to parse') s.add( '--b_search_in_subdirs', default='False', help= 'used if mask or only dir in path (not full path) to search in subdirectories' ) s.add('--ext', default='gpx', help='used if only dir in path - extension of gpx files') s.add( '--dt_from_utc_hours', default='0', help= 'add this correction to loading datetime data. May to use other suffixes instead of "hours"' ) s.add( '--b_skip_if_up_to_date', default='True', help= 'exclude processing of files with same name and which time change is not bigger than recorded in database (only prints ">" if detected). If finds updated version of same file then deletes all data which corresponds old file and after it brfore procesing of next files' ) s.add( '--sort', default='False', # 'correct', 'sort_rows' help= 'if time not sorted then coorect it trying affecting minimum number of values. Used here for tracks/segments only. This is different from sorting rows which is performed at last step after the checking table in database' ) # Parameters specific to gpx s.add( '--waypoints_cols_list', default='time, latitude, longitude, name, symbol, description', help= 'column names (comma separated) of gpxpy fields of gpx waypoints to load (symbol=sym, description=cmt), first will be index. Its number and order must match out.waypoints_cols_list' ) s.add('--routes_cols_list', default='time, latitude, longitude, name, symbol, description', help='same as waypoints_cols_list but for routes') s.add('--tracks_cols_list', default='time, latitude, longitude', help='same as waypoints_cols_list but for tracks') s.add('--segments_cols_list', default='time, latitude, longitude', help='same as waypoints_cols_list but for segments') s = p.add_argument_group('out', 'all about output files') s.add('--db_path', help='hdf5 store file path') s.add( '--table_prefix', help= 'prepend tables names to save data with this string (Note: _waypoints or _routes or ... suffix will be added automaticaly)' ) s.add( '--tables_list', default='waypoints, tracks, tracks/segments, routes', help='tables names (comma separated) in hdf5 store to write data' 'keep them in logical order: [waypoints, tracks, tracks sections, routes]' ) s.add('--out.waypoints_cols_list', default='time, Lat, Lon, name, sym, cmt', help='column names (comma separated) in hdf5 table to write data, ' 'its number and order must match in.waypoints_cols_list') s.add('--out.tracks_cols_list', default='time, Lat, Lon', help='same as waypoints_cols_list but for tracks') s.add('--out.segments_cols_list', default='time, Lat, Lon', help='same as waypoints_cols_list but for segments') s.add('--b_insert_separator', default='False', help='insert NaNs row in table after each file data end') s.add( '--b_use_old_temporary_tables', default='False', help= 'Warning! Set True only if temporary storage already have good data!' 'if True and b_skip_if_up_to_date= True then not replace temporary storage with current storage before adding data to the temporary storage' ) # candidates to move out to common part s.add( '--exclude_dirs_endswith_list', default='-, bad, test, TEST, toDel-', help= 'exclude dirs which ends with this srings. This and next option especially useful when search recursively in many dirs' ) s.add('--exclude_files_endswith_list', default='coef.txt, -.txt, test.txt', help='exclude files which ends with this srings') s = p.add_argument_group('filter', 'filter all data based on min/max of parameters') s.add('--min_date', help='minimum time') s.add('--max_date', help='maximum time') s.add( '--min_dict', help= 'List with items in "key:value" format. Sets to NaN data of ``key`` columns if it is below ``value``' ) s.add( '--max_dict', help= 'List with items in "key:value" format. Sets to NaN data of ``key`` columns if it is above ``value``' ) s = p.add_argument_group('program', 'program behaviour') s.add( '--return', default='<end>', # nargs=1, choices=['<cfg_from_args>', '<gen_names_and_log>', '<end>'], help= '<cfg_from_args>: returns cfg based on input args only and exit, <gen_names_and_log>: execute init_input_cols() and also returns fun_proc_loaded function... - see main()' ) return p
def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters """ version = '0.1.0' p = my_argparser_common_part( { 'description': 'veuszPropagate version {}'.format(version) + """ ---------------------------- Create vsz file for each source file based on vsz pattern ----------------------------""" }, version) # Configuration sections s = p.add_argument_group('in', 'all about input files') s.add( '--path', help= 'path to source file(s) to generate list of their names (usually *.csv or *.txt) or pytables hdf5 store' ) s.add('--pattern_path', help='path to ".vsz" file to use as pattern') # '*.h5' s.add('--import_method', help='Veusz method to imort data in ".vsz" pattern' ) # todo: read it from pattern s.add('--start_file_index', default="0", help='indexes begins from 0') s.add( '--add_custom_list', help= 'custom definitions names for evaluation of expressions defined in add_custom_expressions_list' ) s.add('--add_custom_expressions_list', help='custom_expressions_list to add by Veusz AddCustom() function') s.add('--eval_list', help='string represented Veusz.Embed function call to eval') s.add('--data_yield_prefix', help='used to get data from Vieusz which names started from this') s.add('--tables_list', help='path to tables in db to find instead files') s.add( '--table_log', help= 'name of log table - path to hdf5 table having intervals ("index" of type pd.DatetimeIndex and "DateEnd" of type pd.Datetime)' ) s.add( '--min_time', help='%%Y-%%m-%%dT%%H:%%M:%%S, optional, allows range table_log rows') s.add( '--max_time', help='%%Y-%%m-%%dT%%H:%%M:%%S, optional, allows range table_log rows') s = p.add_argument_group('out', 'all about output files') s.add( '--export_pages_int_list', default='0', help='pages numbers to export, comma separated (1 is first), 0 = all') s.add( '--b_images_only', default='False', help= 'export only. If true then all output vsz must exist, they will be loaded and vsz not be updated' ) s.add( '--b_update_existed', default='False', help= 'replace all existed vsz files else skip existed files. In b_images_only mode - skip exporting vsz files for wich any files in "export_dir/*{vsz file stem}*. {export_format}" exist' ) s.add( '--export_dir', default='images(vsz)', help='subdir relative to input path or absolute path to export images') s.add('--export_format', default='jpg', help='extention of images to export which defines format') s.add( '--export_dpi_int_list', default='300', help= 'resolution (dpi) of images to export for all pages, defined in `export_pages_int_list`' ) s.add( '--filename_fun', default='lambda tbl: tbl', help= 'function to modify output file name. Argument is input table name in hdf5' ) s.add( '--add_to_filename', default='', help= 'string will be appended to output filenames. If input is from hdf5 table then filename is name of table, this will be added to it' ) # candidates to move out to common part s.add( '--exclude_dirs_endswith_list', default='-, bad, test, TEST, toDel-', help= 'exclude dirs which ends with this srings. This and next option especially useful when search recursively in many dirs' ) s.add('--exclude_files_endswith_list', default='coef.txt, -.txt, test.txt', help='exclude files which ends with this srings') s = p.add_argument_group('program', 'program behaviour') s.add('--export_timeout_s_float', default='0', help='export asyncroniously with this timeout, s (tried 600s?)') s.add('--load_timeout_s_float', default='180', help='export asyncroniously with this timeout, s (tried 600s?)') s.add( '--b_execute_vsz', default='False', help='instead of Load() read vsz and execute its content line by line') s.add( '--veusz_path', default=default_veusz_path, help= 'directory of Veusz like /usr/lib64/python3.6/site-packages/veusz-2.1.1-py3.6-linux-x86_64.egg/veusz' ) s.add('--before_next_list', default=',', help=''' "Close()" - each time reopens pattern, "restore_config" - saves and restores initial configuration (may be changed in data_yield mode: see data_yield_prefix argument)''' ) s.add( '--f_custom_in_cycle', help='''function evaluated in cycle: not implemented over command line''' ) # todo: implement s.add( '--return', default='<end>', # nargs=1, choices=[ '<cfg_from_args>', '<gen_names_and_log>', '<embedded_object>', '<end>' ], help= '<cfg_from_args>: returns cfg based on input args only and exit, <gen_names_and_log>: execute init_input_cols() and also returns fun_proc_loaded function... - see main()' ) return p
def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters All p argumets are of type str (default for add_argument...), because of custom postprocessing based of args names in ini2dict """ p = my_argparser_common_part( { 'description': 'SMS2GPX version {}'.format(version) + """ ---------------------------- Convert SMS *.xml to *.gpx ----------------------------""" }, version) # config_file_paths=r'SMS2GPX.ini' s = p.add_argument_group('in', 'XML files') s.add('--path', help='Path to XML or directory with XML files to parse') s.add( '--dt_from_utc_hours', default='0', help= 'add this correction to loading datetime data. Can use other suffixes instead of "hours"' ) s.add('--contacts_names', help= 'list of contacts names to use like "tracker 3, Трекер 0, Трекер 1"') s = p.add_argument_group('out', 'XML files') s.add('--out.path', default='./<filename>.gpx', help='''Output dir/path. Join data from all found input files to single output if extension provided. If "<filename>" found it will be sabstituted with [1st file name]+, if "<dir>" - with last directory name. Else, if no extension provided then ".gpx" will be used, "<filename>" string will be sabstituted with correspondng input file names. ''') s.add( '--dt_between_track_segments_hours', default='99999', help= '''dt between track segments. also can try other valid time interval suffixes - all different suffix options will be summed''' ) s = p.add_argument_group('process', 'calculation parameters') # s.add_argument('--b_missed_coord_to_zeros', # help='out all points even if no coordinates, but replace them to zeros') s.add_argument( '--min_date', help= 'UTC, not output data with < min_date (if date is smaller then treat it as bad, so tries get from stamp if b_all_time_from_stamp is False. If it smaller too then data discard, format like in 13.05.2017 09:00:00' ) s = p.add_argument_group('program', 'Program behaviour') s.add_argument('--log', help='write log if path to existed file is specified') return p
def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters All p argumets are of type str (default for add_argument...), because of custom postprocessing based of args names in ini2dict """ version = '0.0.1' p = my_argparser_common_part( { 'description': 'csv2h5 version {}'.format(version) + """ ---------------------------- Add data from bin files to Pandas HDF5 store*.h5 ----------------------------""" }, version=version) # Configuration sections s = p.add_argument_group('in', 'all about input files') s.add( '--path', default='.', # nargs=?, help='path to source file(s) to parse. Use patterns in Unix shell style' ) s.add( '--data_word_len_integer', default='2', # nargs=?, help='[bytes] => data type is int16') s.add( '--filename2timestart_format', default='%Y%m%d_%H%M', help= 'Time from file name. For example for RealTerm v3+ writes names formatted %Y-%m-%d_%H%M' ) s = p.add_argument_group('out', 'all about output files') s.add('--db_path', help='hdf5 store file path') s.add( '--table', help= 'table name in hdf5 store to write data. If not specified then will be generated on base of path of input files. Note: "*" is used to write blocks in autonumbered locations (see dask to_hdf())' ) # s.add('--tables_list', # help='tables names in hdf5 store to write data (comma separated)') s.add('--b_insert_separator', default='True', help='insert NaNs row in table after each file data end') s.add( '--b_use_old_temporary_tables', default='False', help= 'Warning! Set True only if temporary storage already have good data!' 'if True and b_skip_if_up_to_date= True then not replace temporary storage with current storage before adding data to the temporary storage' ) s.add('--b_remove_duplicates', default='False', help='Set True if you see warnings about') # 'logfield_filename_len': 255, s = p.add_argument_group('filter', 'filter all data based on min/max of parameters') s.add('--min_date', help='minimum time') # todo: set to filt_min.key and filt_min.value s.add('--max_date', help='maximum time') # todo: set to filt_max.key and filt_max.value s.add( '--min_dict', help= 'List with items in "key:value" format. Filter out (set to NaN) data of ``key`` columns if it is below ``value``' ) s.add( '--max_dict', help= 'List with items in "key:value" format. Filter out data of ``key`` columns if it is above ``value``' ) s = p.add_argument_group('program', 'program behaviour') s.add( '--return', default='<end>', # nargs=1, choices=['<cfg_from_args>', '<gen_names_and_log>', '<end>'], help= '<cfg_from_args>: returns cfg based on input args only and exit, <gen_names_and_log>: execute init_input_cols() and also returns fun_proc_loaded function... - see main()' ) return (p)
def my_argparser(): """ Configuration parser - add here common options for different inputs - add help strings for them :return p: configargparse object of parameters All p argumets are of type str (default for add_argument...), because of custom postprocessing based of args names in ini2dict """ p = my_argparser_common_part( { 'description': 'incl_load version {}'.format(version) + """ --------------------------------------------------- Processing raw inclinometer data, Saving result to indexed Pandas HDF5 store (*.h5) and *.csv saves loading log in inclinometer\scripts\log\csv2h5_inclin_Kondrashov.log -----------------------------------------------------------""" }, version) # Configuration sections s = p.add_argument_group('in', 'All about input files') s.add( '--path_cruise', default='.', # nargs=?, help='Directory where inclinometer data will be stored, subdirs:' '"_raw": required, with raw file(s)') s.add( '--raw_subdir', default='', help= 'Optional zip/rar arhive name (data will be unpacked) or subdir in "path_cruise/_raw"' ) s.add( '--raw_pattern', default="*{prefix:}{number:0>3}*.[tT][xX][tT]", help= 'Pattern to find raw files: Python "format" command pattern to format prefix and probe number.' 'where "prefix" is a --probes_prefix arg. that is in UPPER case and INCL replaced with INKL.' ) s.add('--probes_int_list', help='Note: Not affects steps 2, 3, set empty list to load all') s.add( '--probes_prefix', default='incl', help= '''Table name prefix in DB (and in raw files with modification described in --raw_pattern help). I have used "incl" for inclinometers, "w" for wavegauges. Note (at step 1): only if probes_prefix starts with "incl" or is "voln" in then raw data must be in Kondrashov format else Baranov's format. For "voln" we replace "voln_v" with "w" when saving corrected raw files and use it to name tables so only "w" in outputs and we replace "voln" with "w" to search tables''' ) s.add( '--db_coefs', default=r'd:\WorkData\~configuration~\inclinometr\190710incl.h5', help='coefs will be copied from this hdf5 store to output hdf5 store') s.add('--time_range_zeroing_list', help='See incl_h5clc') s.add( '--time_range_zeroing_dict', help= 'See incl_h5clc. Example: incl14: [2020-07-10T21:31:00, 2020-07-10T21:39:00]' ) s.add( '--dt_from_utc_seconds_dict', help= 'add this correction to loading datetime data. Can use other suffixes instead of "seconds"' ) s.add( '--dt_from_utc_hours_dict', help= 'add this correction to loading datetime data. Can use other suffixes instead of "hours"' ) s.add( '--dt_from_utc_days_dict', help= 'add this correction to loading datetime data. Can use other suffixes instead of "days"' ) s.add( '--time_start_utc_dict', help= 'Start time of probes started without time setting: when raw date start is 2000-01-01T00:00' ) s.add( '--azimuth_add_dict', help= 'degrees, adds this value to velocity direction (will sum with _azimuth_shift_deg_ coef)' ) s = p.add_argument_group('filter', 'Filter all data based on min/max of parameters') s.add( '--min_date_dict', default='0: 2020-06-28T18:00', help= 'minimum time for each probe, use probe number 0 to set default value. For step 2 only number 0 is used' ) s.add( '--max_date_dict', default='0: now', help= 'maximum time for each probe, use probe number 0 to set default value') s = p.add_argument_group('out', 'All about output files') s.add( '--db_name', help= 'output hdf5 file name, if not set then dir name will be used. As next steps use DB saved on previous steps do not change between steps or you will need rename source DB accordingly' ) s.add( '--aggregate_period_s_int_list', default='', help= 'bin average data in this intervals [s]. Default [None, 300, 600, 3600] if "w" in [in][probes_prefix]' ' else last in list is replaced to 7200. None means do not average. Output with result data for' ' None will be placed in hdf5 store with suffix "proc_noAvg" in separate sections for each probe. For other' ' values in list result will be placed in hdf5 store with suffix "proc" in tables named "bin{average_value}"' ' in columns named by parameter suffixed by probe number. Also result will be saved to text files with' ' names having date and suffixes for each probe and average value') s.add('--aggregate_period_s_not_to_text_int_list', default='None', help='do not save text files for this aggregate periods') s = p.add_argument_group('program', 'Program behaviour') s.add('--step_start_int', default='1', choices=[str(i) for i in [1, 2, 3, 4, 40, 50]], help='step to start') s.add( '--step_end_int', default='2', choices=['1', '2', '3', '4', '40'], help= 'step to end (inclusive, or if less than start then will run one start step only)' ) s.add('--dask_scheduler', help='can be "synchronous" (for help debugging) or "distributed"') s.add( '--load_timeout_s_float', default='180', help= 'For step 4: export asynchronously with this timeout, s (tried 600s?)') return p
def my_argparser(varargs=None): """ todo: implement :return p: configargparse object of parameters """ if not varargs: varargs = {} varargs.setdefault( 'description', '{} version {}'.format(prog, version) + """ --------------------------------- Load data from hdf5 table (or group of tables) Calculate new data (averaging by specified interval) Combine this data to new specified table --------------------------------- """) p = my_argparser_common_part(varargs, version) # Fill configuration sections # All argumets of type str (default for add_argument...), because of # custom postprocessing based of my_argparser names in ini2dict s = p.add_argument_group('in', 'Parameters of input files') s.add( '--db_path', default='*.h5', # nargs=?, help= 'path to pytables hdf5 store to load data. May use patterns in Unix shell style' ) s.add('--tables_list', help='table names in hdf5 store to get data. Uses regexp') s.add('--chunksize_int', help='limit loading data in memory', default='50000') s.add('--min_date', help='time range min to use', default='2019-01-01T00:00:00') s.add('--max_date', help='time range max to use') s.add('--fs_float', help='sampling frequency of input data, Hz') s = p.add_argument_group('filter', 'Filter all data based on min/max of parameters') s.add( '--min_dict', help= 'List with items in "key:value" format. Filter out (not load), data of ``key`` columns if it is below ``value``' ) s.add( '--max_dict', help= 'List with items in "key:value" format. Filter out data of ``key`` columns if it is above ``value``' ) s.add( '--min_Pressure', help= 'min value of Pressure range use. Note: to filer {parameter} NaNs - use some value in min_{parameter} or max_{parameter}. Example of filtering out only nans of Pressure using very big min_dict value: "--min_Pressure -1e15"', default='-1e15') s.add('--max_Pressure', help='max value of Pressure range to use') s = p.add_argument_group('out', 'Parameters of output files') s.add('--out.db_path', help='hdf5 store file path') s.add( '--table', default='psd', help= 'table name in hdf5 store to write data. If not specified then will be generated on base of path of input files. Note: "*" is used to write blocks in autonumbered locations (see dask to_hdf())' ) s.add( '--split_period', help= 'pandas offset string (5D, H, ...) to process and output in separate blocks. Number of spectrums is split_period/overlap_float. Use big values to not split', default='100Y') s = p.add_argument_group('proc', 'Processing parameters') s.add( '--overlap_float', help= 'period overlap ratio [0, 1): 0 - no overlap. 0.5 for default dt_interval' ) s.add( '--time_intervals_center_list', help= 'list of intervals centers that need to process. Used only if if period is not used' ) s.add( '--dt_interval_hours', help= 'time range of each interval. By default will be set to the split_period in units of suffix (hours+minutes)' ) s.add('--dt_interval_minutes') s.add( '--fmin_float', # todo: separate limits for different parameters help='min output frequency to calc') s.add('--fmax_float', help='max output frequency to calc') s.add('--calc_version', default='trigonometric(incl)', help='string: variant of processing Vabs(inclination):', choices=['trigonometric(incl)', 'polynom(force)']) s.add( '--max_incl_of_fit_deg_float', help= 'Overwrites last coefficient of trigonometric version of g: Vabs = g(Inclingation). It corresponds to point where g(x) = Vabs(inclination) became bend down. To prevent this g after this point is replaced with line, so after max_incl_of_fit_deg {\Delta}^{2}y ≥ 0 for x > max_incl_of_fit_deg' ) s = p.add_argument_group('program', 'Program behaviour') s.add( '--return', default='<end>', choices=['<return_cfg>', '<return_cfg_with_options>'], help= 'executes part of code and returns parameters after skipping of some code' ) return (p)