Example #1
0
def filter_local(
        d: Union[pd.DataFrame, dd.DataFrame, Mapping[str, pd.Series],
                 Mapping[str, dd.Series]],
        cfg_filter: Mapping[str, Any]) -> Union[pd.DataFrame, dd.DataFrame]:
    """
    Filtering values without changing output size: set to NaN if exceed limits
    :param d: DataFrame
    :param cfg_filter: This is a dict with dicts "min" and "max" having fields with:
     - keys equal to column names to filter or regex strings to selelect columns: "*" or "[" must be present to detect
    it as regex.
     - values are min and max limits consequently.
    :return: filtered d with bad values replaced by NaN

    """
    for limit, f_compare in [('min', lambda x, v: x > v),
                             ('max', lambda x, v: x < v)]:
        # todo: check if is better to use between(left, right, inclusive=True)
        if not cfg_filter.get(limit):
            continue
        for key, v in cfg_filter[limit].items():
            if ('*' in key) or ('[' in key):  # get multiple keys by regex
                keys = [c for c in d.columns if re.fullmatch(key, c)]
                d[keys] = d[keys].where(f_compare(d[keys], v))
                key = ', '.join(keys)  # for logging only
            else:
                try:
                    d[key] = d[key].where(f_compare(d[key], v))
                except KeyError as e:  # allow redundant parameters in config
                    l.warning('Can not filter this parameer %s',
                              standard_error_info(e))
            l.debug('filtering %s(%s) = %g', limit, key, v)
    return d
Example #2
0
def filter_local_arr(
        d: Mapping[str, Sequence],
        cfg_filter: Mapping[str, Any]) -> Mapping[str, np.ndarray]:
    """
    Same as filter_local but for dict of arrays
    Filtering values without changing output size: set to NaN if exceed limits
    :param d: dict of arrays
    :param cfg: must have field 'filter'. This is a dict with dicts "min" and "max" having fields with:
     - keys equal to column names to filter or regex strings to selelect columns: "*" or "[" must be present to detect
    it as regex.
     - values are min and max limits consequently.
    :return: filtered input where filtered values are np.ndarrays having filtered values replaced by NaN

    """
    for limit, f_compare in [('min', lambda x, v: x < v),
                             ('max', lambda x, v: x > v)]:
        # todo: check if is better to use between(left, right, inclusive=True)
        if not cfg_filter.get(limit):
            continue
        for key, v in cfg_filter[limit].items():
            if ('*' in key) or ('[' in key):  # get multiple keys by regex
                # Filter multiple columns at once
                keys = [c for c in d.columns if re.fullmatch(key, c)]
                d[keys][f_compare(d[keys], v)] = np.NaN
                key = ', '.join(keys)  # for logging only
            else:
                try:
                    d[key][f_compare(d[key], v)] = np.NaN
                except KeyError as e:  # allow redundant parameters in config
                    l.warning('Can not filter this parameer %s',
                              standard_error_info(e))
            l.debug('filtering %s(%s) = %g', limit, key, v)
    return d
Example #3
0
def main_init(
    cfg,
    cs_store_name,
    __file__=None,
):
    """
    - prints parameters
    - prints message that program (__file__ or cs_store_name) started
    - converts cfg parameters to types according to its prefixes/suffixes names (see ini2dict())

    :param cfg:
    :param cs_store_name:
    :param __file__:
    :return:
    """

    # global lf
    # if cfg.search_path is not None:
    #     override_path = hydra.utils.to_absolute_path(cfg.search_path)
    #     override_conf = OmegaConf.load(override_path)
    #     cfg = OmegaConf.merge(cfg, override_conf)

    print("Working directory : {}".format(os.getcwd()))
    # todo: print only if config changed
    print(OmegaConf.to_yaml(cfg))

    # cfg = cfg_from_args(argparser_files(), **kwargs)
    if not cfg.program.return_:
        print('Can not initialise')
        return cfg
    elif cfg.program.return_ == '<cfg_from_args>':  # to help testing
        return cfg

    hydra.verbose = 1 if cfg.program.verbose == 'DEBUG' else 0  # made compatible to my old cfg

    print('\n' + this_prog_basename(__file__) if __file__ else cs_store_name,
          end=' started. ')
    try:
        cfg_t = ini2dict(
            cfg)  # fields named with type pre/suffixes are converted
    except MissingMandatoryValue as e:
        lf.error(standard_error_info(e))
        raise Ex_nothing_done()
    except Exception:
        lf.exception('startup error')

    # OmegaConf.update(cfg, "in", cfg.input, merge=False)  # error
    # to allow non primitive types (cfg.out['db']) and special words field names ('in'):
    # cfg = omegaconf.OmegaConf.to_container(cfg)
    return cfg_t
Example #4
0
def filename2date(inF):
    """

    :param inF: base name of source data file (20171015_090558p1) yyyymmdd_HHMMSS*
    :param cfg: dict with keys:
        out:
            dir, paths - pattern path
            b_images_only, b_update_existed - see command line arguments,
        in:
            import_method, header if import_method == 'ImportFile',
            add_custom
    :param g:
    :return:
    """
    a = np.array(inF,
                 dtype={
                     'yyyy': ('a4', 0),
                     'mm': ('a2', 4),
                     'dd': ('a2', 6),
                     'HH': ('a2', 9),
                     'MM': ('a2', 11),
                     'SS': ('a2', 13)
                 })
    try:
        date = np.array(
            a['yyyy'].astype(np.object) + b'-' + a['mm'].astype(np.object) +
            b'-' + a['dd'].astype(np.object) + b'T' +
            a['HH'].astype(np.object) + b':' + a['MM'].astype(np.object) +
            b':' + a['SS'].astype(np.object),
            '|S19',
            ndmin=1)
        # date = b'%(yyyy)b-%(mm)b-%(dd)bT%(HH)02b-%(MM)02b-%(SS)02b' % a
    except Exception as e:
        print('Can not convert date: ', standard_error_info(e))
        raise e
    return convertNumpyArrayOfStrings(date, 'datetime64[ns]')
Example #5
0
def main(new_arg=None):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')

    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**cfg['in'],
                                      b_interact=cfg['program']['b_interact'],
                                      cfg_search_parent=cfg['out'])
        h5init(cfg['in'], cfg['out'])
    except Ex_nothing_done as e:
        print(e.message)
        exit()

    df_dummy = pd.DataFrame(
        np.full(1,
                np.NaN,
                dtype=np.dtype({
                    'formats': ['float64', 'float64'],
                    'names': cfg['out']['tracks_cols'][1:]
                })),
        index=(pd.NaT, ))  # used for insert separator lines

    if 'routes_cols' not in cfg['in']:
        cfg['in']['routes_cols'] = cfg['in']['waypoints_cols']
    if 'routes_cols' not in cfg['out']:
        cfg['out']['routes_cols'] = cfg['out'][
            'waypoints_cols']  # cfg['in']['routes_cols']  #
    # Writing
    if True:  # try:
        l.warning('processing ' + str(cfg['in']['nfiles']) + ' file' +
                  's:' if cfg['in']['nfiles'] > 1 else ':')
        cfg['out']['log'] = {}
        set_field_if_no(cfg['out'], 'table_prefix',
                        PurePath(cfg['in']['path']).stem)
        cfg['out']['table_prefix'] = cfg['out']['table_prefix'].replace(
            '-', '')
        if len([t for t in cfg['out']['tables'] if len(t)]) > 1:
            cfg['out']['tables'] = \
                [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables']]
            cfg['out']['tables_log'] = \
                [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables_log']]

        tables = dict(zip(df_names, cfg['out']['tables']))
        tables_log = dict(zip(df_names, cfg['out']['tables_log']))
        # Can not save path to DB (useless?) so set  for this max file name length:
        set_field_if_no(cfg['out'], 'logfield_fileName_len', 50)
        cfg['out']['index_level2_cols'] = cfg['in']['routes_cols'][0]

        # ###############################################################
        # ## Cumulate all data in cfg['out']['path_temp'] ##################
        ## Main circle ############################################################
        for i1_file, path_gpx in h5_dispenser_and_names_gen(
                cfg['in'], cfg['out']):
            l.info('{}. {}: '.format(i1_file, path_gpx.name))
            # Loading data
            dfs = gpxConvert(cfg, path_gpx)
            print('write', end=': ')
            sys_stdout.flush()
            for key, df in dfs.items():
                if (not tables.get(key)) or df.empty:
                    continue
                elif key == 'tracks':
                    # Save last time to can filter next file
                    cfg['in']['time_last'] = df.index[-1]

                sort_time = False if key in {'waypoints', 'routes'} else None

                # monkey patching
                if 'tracker' in tables[key]:
                    # Also {} must be in tables[key]. todo: better key+'_fun_tracker' in cfg['out']?
                    # Trackers processing
                    trackers_numbers = {
                        '0-3106432': '1',
                        '0-2575092': '2',
                        '0-3124620': '3',
                        '0-3125300': '4',
                        '0-3125411': '5',
                        '0-3126104': '6'
                    }
                    tables_pattern = tables[key]
                    tables_log_pattern = tables_log[key]

                    df['comment'] = df['comment'].str.split(" @",
                                                            n=1,
                                                            expand=True)[0]
                    # split data and save to multipe tables
                    df_all = df.set_index(['comment', df.index])
                    for sn, n in trackers_numbers.items(
                    ):  # set(df_all.index.get_level_values(0))
                        try:
                            df = df_all.loc[sn]
                        except KeyError:
                            continue
                        # redefine saving parameters
                        cfg['out']['table'] = tables_pattern.format(
                            trackers_numbers[sn])
                        cfg['out']['table_log'] = tables_log_pattern.format(
                            trackers_numbers[sn])
                        call_with_valid_kwargs(df_filter_and_save_to_h5,
                                               df**cfg,
                                               input=cfg['in'],
                                               sort_time=sort_time)
                else:
                    cfg['out']['table'] = tables[key]
                    cfg['out']['table_log'] = tables_log[key]
                    call_with_valid_kwargs(df_filter_and_save_to_h5,
                                           df,
                                           **cfg,
                                           input=cfg['in'],
                                           sort_time=sort_time)

    # try:
    # if cfg['out']['b_remove_duplicates']:
    #     for tbls in cfg['out']['tables_have_wrote']:
    #         for tblName in tbls:
    #             cfg['out']['db'][tblName].drop_duplicates(keep='last', inplace= True)
    # print('Create index', end=', ')

    # create_table_index calls create_table which docs sais "cannot index Time64Col() or ComplexCol"
    # so load it, index, then save
    # level2_index = None
    # df = cfg['out']['db'][tblName] # last commented
    # df.set_index([navp_all_index, level2_index])
    # df.sort_index()

    # cfg['out']['db'][tblName].sort_index(inplace=True)

    # if df is not None:  # resave
    #     df_log = cfg['out']['db'][tblName]
    #     cfg['out']['db'].remove(tbls[0])
    #     cfg['out']['db'][tbls[0]] = df
    #     cfg['out']['db'][tbls[1]] = df_log

    try:
        pass
    except Exception as e:
        print('The end. There are error ', standard_error_info(e))

    #     import traceback, code
    #     from sys import exc_info as sys_exc_info
    #
    #     tb = sys_exc_info()[2]  # type, value,
    #     traceback.print_exc()
    #     last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb
    #     frame = last_frame().tb_frame
    #     ns = dict(frame.f_globals)
    #     ns.update(frame.f_locals)
    #     code.interact(local=ns)
    # finally:
    #     cfg['out']['db'].close()
    #     failed_storages= h5move_tables(cfg['out'], cfg['out']['tables_have_wrote'])

    try:
        failed_storages = h5move_tables(cfg['out'],
                                        tbl_names=cfg['out'].get(
                                            'tables_have_wrote', set()))
        print('Finishing...' if failed_storages else 'Ok.', end=' ')
        # Sort if have any processed data that needs it (not the case for the routes and waypoints), else don't because ``ptprepack`` not closes hdf5 source if it not finds data
        if cfg['in'].get('time_last'):
            cfg['out']['b_remove_duplicates'] = True
            h5index_sort(
                cfg['out'],
                out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
                in_storages=failed_storages,
                tables=cfg['out'].get('tables_have_wrote', set()))
    except Ex_nothing_done:
        print('ok')
Example #6
0
def h5_append(cfg_out: Dict[str, Any],
              df: Union[pd.DataFrame, dd.DataFrame],
              log,
              log_dt_from_utc=pd.Timedelta(0),
              tim=None):
    '''
    Append dataframe to Store: df to cfg_out['table'] ``table`` node and
    append chield table with 1 row metadata including 'index' and 'DateEnd' which
    is calculated as first and last elements of df.index

    :param df: pandas or dask datarame to append. If dask then log_dt_from_utc must be None (not assign log metadata here)
    :param log: dict which will be appended to child tables, cfg_out['tables_log']
    :param cfg_out: dict with fields:
        db: opened hdf5 store in write mode
        table: name of table to update (or tables: list, then used only 1st element)
        table_log: name of chield table (or tables_log: list, then used only 1st element)
        tables: None - to return with done nothing!
                list of str - to assign cfg_out['table'] = cfg_out['tables'][0]
        tables_log: list of str - to assign cfg_out['table_log'] = cfg_out['tables_log'][0]
        b_insert_separator: (optional), freq (optional)
        data_columns: optional, list of column names to write.
        chunksize: may be None but then must be chunksize_percent to calcW ake Up:
            chunksize = len(df) * chunksize_percent / 100
    :param log_dt_from_utc: 0 or pd.Timedelta - to correct start and end time: index and DateEnd.
        Note: if log_dt_from_utc is None then start and end time: 'Date0' and 'DateEnd' fields of log must be filled right already
    :return: None
    :updates:
        log:
            'Date0' and 'DateEnd'
        cfg_out: only if not defined already:
            cfg_out['table_log'] = cfg_out['tables_log'][0]
            table_log
            tables_have_wrote list appended (or created) with tuple `(table, table_log)`
    '''

    df_len = len(df) if tim is None else len(
        tim)  # use computed values if possible for faster dask
    if df_len:  # dask.dataframe.empty is not implemented
        if cfg_out.get('b_insert_separator'):
            # Add separatiion row of NaN
            msg_func = f'{df_len}rows+1dummy'
            cfg_out.setdefault('fs')
            df = h5_append_dummy_row(df, cfg_out['fs'], tim)
            df_len += 1
        else:
            msg_func = f'{df_len}rows'

        # Save to store
        # check/set tables names
        if 'tables' in cfg_out:
            if cfg_out['tables'] is None:
                l.info('selected(%s)... ', msg_func)
                return
            set_field_if_no(cfg_out, 'table', cfg_out['tables'][0])

        l.info('h5_append(%s)... ', msg_func)
        set_field_if_no(cfg_out, 'nfiles', 1)

        if 'chunksize' in cfg_out and cfg_out['chunksize'] is None:
            if ('chunksize_percent' in cfg_out):  # based on first file
                cfg_out['chunksize'] = int(
                    df_len * cfg_out['chunksize_percent'] / 1000) * 10
                if cfg_out['chunksize'] < 10000:
                    cfg_out['chunksize'] = 10000
            else:
                cfg_out['chunksize'] = 10000

                if df_len <= 10000 and isinstance(df, dd.DataFrame):
                    df = df.compute()  # dask not writes "all NaN" rows

            # , compute=False
            # cfg_out['db'].append(cfg_out['table'], df, data_columns=True, index=False,
            #              chunksize=cfg_out['chunksize'])
        table = None
        try:
            table = df_data_append_fun(df, cfg_out['table'], cfg_out)
        except ValueError as e:
            table = h5append_on_inconsistent_index(cfg_out, cfg_out['table'],
                                                   df, df_data_append_fun, e,
                                                   msg_func)
        except TypeError as e:  # (, AttributeError)?
            if isinstance(df, dd.DataFrame):
                last_nan_row = df.loc[df.index.compute()[-1]].compute()
                # df.compute().query("index >= Timestamp('{}')".format(df.index.compute()[-1].tz_convert(None))) ??? works
                # df.query("index > Timestamp('{}')".format(t_end.tz_convert(None)), meta) #df.query(f"index > {t_end}").compute()
                if all(last_nan_row.isna()):
                    l.exception(
                        f'{msg_func}: dask not writes separator? Repeating using pandas'
                    )
                    table = df_data_append_fun(
                        last_nan_row,
                        cfg_out['table'],
                        cfg_out,
                        min_itemsize={
                            c: 1
                            for c in (cfg_out['data_columns'] if cfg_out.
                                      get('data_columns', True
                                          ) is not True else df.columns)
                        })
                    # sometimes pandas/dask get bug (thinks int is a str?): When I add row of NaNs it tries to find ``min_itemsize`` and obtain NaN (for float too, why?) this lead to error
                else:
                    l.exception(msg_func)
            else:
                l.error('%s: Can not write to store. %s', msg_func,
                        standard_error_info(e))
                raise (e)
        except Exception as e:
            l.error(f'%s: Can not write to store. %s', msg_func,
                    standard_error_info(e))
            raise (e)

    # run even if df is empty becouse of possible needs to write log only
    table_log = h5add_log(cfg_out, df, log, tim, log_dt_from_utc)

    _t = (table, table_log)
    if 'tables_have_wrote' in cfg_out:
        cfg_out['tables_have_wrote'].add(_t)
    else:
        cfg_out['tables_have_wrote'] = {_t}
Example #7
0
def h5append_on_inconsistent_index(cfg_out, tbl_parent, df, df_append_fun, e,
                                   msg_func):
    """

    :param cfg_out:
    :param tbl_parent:
    :param df:
    :param df_append_fun:
    :param e:
    :param msg_func:
    :return:
    """

    if tbl_parent is None:
        tbl_parent = cfg_out['table']

    error_info_list = [s for s in e.args if isinstance(s, str)]
    msg = msg_func + ' Error:'.format(
        e.__class__) + '\n==> '.join(error_info_list)
    if not error_info_list:
        l.error(msg)
        raise e
    b_correct_time = False
    b_correct_str = False
    b_correct_cols = False
    str_check = 'invalid info for [index] for [tz]'
    if error_info_list[0].startswith(
            str_check) or error_info_list[0] == 'Not consistent index':
        if error_info_list[0] == 'Not consistent index':
            msg += 'Not consistent index detected'
        l.error(
            msg +
            'Not consistent index time zone? Changing index to standard UTC')
        b_correct_time = True
    elif error_info_list[0].startswith('Trying to store a string with len'):
        b_correct_str = True
        l.error(msg + error_info_list[0])  # ?
    elif error_info_list[0].startswith(
            'cannot match existing table structure'):
        b_correct_cols = True
        l.error(f'{msg} => Adding columns...')
        # raise e #?
    elif error_info_list[0].startswith('invalid combination of [values_axes] on appending data') or \
            error_info_list[0].startswith('invalid combination of [non_index_axes] on appending data'):
        # old pandas version has word "combinate" insted of "combination"!
        b_correct_cols = True
        l.error(f'{msg} => Adding columns/convering type...')
    else:  # Can only append to Tables - need resave?
        l.error(f'{msg} => Can not handle this error!')
        raise e

    # Align types
    # -----------
    # Make index to be UTC
    df_cor = cfg_out['db'][tbl_parent]
    b_df_cor_changed = False

    def align_columns(df, df_ref, columns=None):
        """

        :param df: changing dataframe. Will Updated Implisitly!
        :param df_ref: reference dataframe
        :param columns:
        :return: updated df
        """
        if columns is None:
            columns = df.columns
        df = df.reindex(df_ref.columns, axis="columns", copy=False)
        for col, typ in df_ref[columns].dtypes.items():
            fill_value = np.array(
                0 if np.issubdtype(typ, np.integer) else
                np.NaN if np.issubdtype(typ, np.floating) else '',
                dtype=typ)
            df[col] = fill_value
        return df

    if b_correct_time:
        # change stored to UTC
        df_cor.index = pd.DatetimeIndex(df_cor.index.tz_convert(tz=tzUTC))
        b_df_cor_changed = True

    elif b_correct_cols:
        new_cols = list(set(df.columns).difference(df_cor.columns))
        if new_cols:
            df_cor = align_columns(df_cor, df, columns=new_cols)
            b_df_cor_changed = True
            # df_cor = df_cor.reindex(columns=df.columns, copy=False)
        # add columns to df same as in store
        new_cols = list(set(df_cor.columns).difference(df.columns))
        if new_cols:
            if isinstance(df, dd.DataFrame):
                df = df.compute()
            df = align_columns(df, df_cor, columns=new_cols)

    for col, dtype in zip(df_cor.columns, df_cor.dtypes):
        d = df_cor[col]
        # if isinstance(d[0], pd.datetime):
        if dtype != df[col].dtype:
            if b_correct_time:
                if isinstance(d[0], pd.datetime):
                    df_cor[col] = d.dt.tz_convert(tz=df[col].dt.tz)
                    b_df_cor_changed = True
            elif b_correct_str:
                # todo: correct str length
                pass
            else:
                try:
                    dtype_max = np.result_type(df_cor[col].dtype,
                                               df[col].dtype)
                    if df[col].dtype != dtype_max:
                        df[col] = df[col].astype(dtype_max)
                    if df_cor[col].dtype != dtype_max:
                        df_cor[col] = df_cor[col].astype(dtype_max)
                        b_df_cor_changed = True
                except e:
                    l.exception('Col "%s" have not numpy dtype?', col)
                    df_cor[col] = df_cor[col].astype(df[col].dtype)
                    b_df_cor_changed = True
                # pd.api.types.infer_dtype(df_cor.loc[df_cor.index[0], col], df.loc[df.index[0], col])
    if b_df_cor_changed:
        # Update all cfg_out['db'] store data
        try:
            with ReplaceTableKeepingChilds([df_cor, df], tbl_parent, cfg_out,
                                           df_append_fun):
                pass
            return tbl_parent
        except Exception as e:
            l.error('%s Can not write to store. May be data corrupted. %s',
                    msg_func, standard_error_info(e))
            raise (e)
        except HDF5ExtError as e:
            l.exception(e)
            raise (e)
    else:
        # Append corrected data to cfg_out['db'] store
        try:
            return df_append_fun(df, tbl_parent, cfg_out)
        except Exception as e:
            l.error('%s Can not write to store. May be data corrupted. %s',
                    msg_func, standard_error_info(e))
            raise (e)
        except HDF5ExtError as e:
            l.exception(e)
            raise (e)
Example #8
0
Load or set default
Ag_old, Cg: scaling coefficients for inclinometer
Ah_old, Ch: scaling coefficients for magnitometer
"""
try:

    with h5py.File(cfg['out']['db_path_coef' if 'db_path_coef' in cfg['out'] else 'db_path']
            , "r") as h5source:
        tblD = cfg['out']['table_coef' if 'table_coef' in cfg['out'] else 'table']
        print(f'loading coefficient from {h5source.file.name}/{tblD}')
        Ag_old = h5source[tblD + '//coef//G//A'].value
        Cg = h5source[tblD + '//coef//G//C'].value
        Ah_old = h5source[tblD + '//coef//H//A'].value
        Ch = h5source[tblD + '//coef//H//C'].value
except Exception as e:
    print(standard_error_info(e), '- Can not load coef. Using default!\n')
    Ah_old = np.float64([[1, 0, 0], [0, 1, 0], [0, 0, 1]])  # /500.0
    Ag_old = np.float64([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) / 16384.0
    Cg = np.float64([[0, 0, 0]])
    Ch = np.float64([[0, 0, 0]])
# @-others

# @-<<loading>>
# @+others
# @+node:korzh.20180603123200.1: ** zeroing
try:

    if True:
        # zeroing
        start_end = h5q_interval2coord(
            db_path=cfg['out']['db_path'],
Example #9
0
def griddata_by_surfer(
        ctd, path_stem_pattern: Union[str, Path] = r'%TEMP%\xyz{}',
        margins: Union[bool, Tuple[float, float], None] = True,
        xCol: str='Lon', yCol: str='Lat', zCols: Sequence[str]= None,
        SearchEnable=True, BlankOutsideHull=1,
        DupMethod=15, ShowReport=False,  # DupMethod=15=constants.srfDupAvg
        **kwargs):
    """
    Grid by Surfer
    :param ctd: pd.DataFrame
    :param path_stem_pattern:
    :param margins: extend grid size on all directions:
      - True - use 10% if limits (xMin...) passed else use InflateHull value
      - Tuple[float, float] - use this values to add to edges. Note: limits (xMin...) args must be provided in this case
    :param zCols: z column indexes in ctd
    :param xCol: x column index in ctd
    :param yCol: y column index in ctd
    :param kwargs: other Surfer.GridData4() arguments
    :return:
    """
    global Surfer
    if not Surfer:
        gencache.EnsureModule('{54C3F9A2-980B-1068-83F9-0000C02A351C}', 0, 1, 4)
        try:
            Surfer = Dispatch("Surfer.Application")
        except pywintypes.com_error as e:
            print("Open Surfer! ", standard_error_info(e))
            try:
                Surfer = Dispatch("Surfer.Application")
            except pywintypes.com_error as e:
                print("Open Surfer! ", standard_error_info(e))
                raise
    try:
        tmpF = f"{path_stem_pattern.format('~temp')}.csv"
    except AttributeError:
        path_stem_pattern = str(path_stem_pattern)
        tmpF = f"{path_stem_pattern.format('~temp')}.csv"
    kwargs['xCol'] = ctd.dtype.names.index(xCol) + 1
    kwargs['yCol'] = ctd.dtype.names.index(yCol) + 1
    if zCols is None:
        zCols = list(ctd.dtype.names)
        zCols.remove(xCol)
        zCols.remove(yCol)

    izCols = [ctd.dtype.names.index(zCol) + 1 for zCol in zCols]  # ctd.columns.get_indexer(zCols) + 1
    np.savetxt(tmpF, ctd, header=','.join(ctd.dtype.names), delimiter=',', comments='')

    if margins:
        if isinstance(margins, bool):
            margins = [0, 0]
            for i, coord in enumerate(('x', 'y')):
                if (f'{coord}Min' in kwargs) and (f'{coord}Max' in kwargs):
                    margins[i] = (kwargs[f'{coord}Max'] - kwargs[f'{coord}Min']) / 10
                elif kwargs.get('InflateHull'):
                    margins[i] = kwargs['InflateHull']

        kwargs['xMin'] -= margins[0]
        kwargs['xMax'] += margins[0]
        kwargs['yMin'] -= margins[1]
        kwargs['yMax'] += margins[1]

        if kwargs.get('SearchRad1') is None:
            kwargs['SearchRad1'] = margins[1] * 3

        if kwargs.get('InflateHull') is None:
            kwargs['InflateHull'] = margins[1]


        # const={'srfDupAvg': 15, 'srfGridFmtS7': 3}
    # gdal_geotransform = (x_min, cfg['out']['x_resolution'], 0, -y_min, 0, -cfg['y_resolution_use'])
    for i, kwargs['zCol'] in enumerate(izCols):
        outGrd = path_stem_pattern.format(zCols[i]) + '.grd'
        try:
            Surfer.GridData4(
                Algorithm=constants.srfKriging, DataFile=tmpF, OutGrid=outGrd,
                SearchEnable=(SearchEnable and ctd.size > 3), BlankOutsideHull=BlankOutsideHull, DupMethod=DupMethod,
                ShowReport=ShowReport, **kwargs)
        except pywintypes.com_error as e:
            print(standard_error_info(e))
            if i >= 0:  # True but in debug mode you can change to not raise and continue without side effects by set i=-1
                raise
    return margins
def main():
    print('\n' + this_prog_basename(__file__), end=' started. ')
    # try:
    #     cfg['in']= init_file_names(cfg['in'])
    # except Ex_nothing_done as e:
    #     print(e.message)
    #     return()

    # gen_names = ge_names(cfg)

    cfg = veuszPropagate([
        os_path.join(os_path.dirname(file_veuszPropagate),
                     'veuszPropagate_incl.ini'),
        # '--path', r'd:\workData\BalticSea\171003_ANS36\inclinometr\171017\171017#??.TXT',  # windows
        '--path',
        r'/mnt/D/workData/BalticSea/171003Strahov/inclinometr/171017/171017#??.TXT',
        # in Linux match the case is important
        '--pattern_path',
        '171017#01.vsz',
        '--log',
        os_path.join(os_path.dirname(file_veuszPropagate),
                     'logs/viewsPropagate.log'),
        '--data_yield_prefix',
        'Hxyz',
        '--eval_list',
        """
        "ImportFileCSV(u'{nameRFE}', blanksaredata=True, encoding='ascii', headermode='1st', linked=True, dsprefix='counts', rowsignore=2, skipwhitespace=True)", 
        "TagDatasets(u'source', [u'countsAx', u'countsAy', u'countsAz', u'countsBattery', u'countsDay', u'countsHour', u'countsMinute', u'countsMonth', u'countsMx', u'countsMy', u'countsMz', u'countsSecond', u'countsTemp', u'countsYear'])"
        """,
        # '--import_method', 'ImportFileCSV',
        '--add_custom_list',
        'Ag_old',  # , Ch',
        '--add_custom_expressions_list',
        # """
        # 'float64([[1,0,0],[0,1,0],[0,0,1]])/16384.0'
        # """
        None,
        '--before_next_list',
        'restore_config',  # 'Close(), '
        '--export_pages_int_list',
        '1,3,4,5',  #
        '--veusz_path',
        '/usr/lib64/python3.6/site-packages/veusz-2.1.1-py3.6-linux-x86_64.egg/veusz',
        # '/home/korzh/Python/other_sources/veusz/veusz',
        '-V',
        'DEBUG'
    ])
    # os_path.dirname( #
    if not cfg:
        exit(0)

    file_cal_pattern = cfg['in']['path'].with_name(
        '171121zeroing/INKL_{:03}_data.txt')
    iFile = cfg['in']['start_file']  # inclinometers are numbered from 1
    cfgin_update = None
    while True:
        iFile += 1
        try:
            d, log = cfg['co_send_data'].send(cfgin_update)

        except (GeneratorExit, StopIteration):
            print('ok>')
            break
        except Exception as e:
            print('There are error ', standard_error_info(e))
            continue

        i = int(log['fileName'].split('#')[1])

        Hxyz = d['Hxyz']
        # Hxyz = np.column_stack((a['Mx'], a['My'], a['Mz']))[slice(*iUseTime.flat)].T
        if len(Hxyz) < 3 or not np.prod(
                Hxyz.shape):  # 3 is ok but may be empty
            print('\nNo data from Veusz!\n')
            bBad = True
        else:
            file_data = file_cal_pattern.format(i)
            with open(file_data) as f:
                Ag_str = f.read()
            Ag_str = re.sub(r'((?<=\d)([ ]+))|(?=\n)', r',\1', Ag_str)
            Ag = np.float64(eval(Ag_str))
            Ag_str = 'float64({})'.format(Ag_str)
            if Ag[0, 0] > 10:
                print('\nBad calibration!\n')
                bBad = True
            else:
                bBad = False

        if bBad:
            print('using default coef!')
            Ag_str = 'float64([[1,0,0],[0,1,0],[0,0,1]])/16384.0'
        """
        AddCustom('definition', u'Ch', u'float64([[60,-160,-650]])')
        AddCustom('definition', u'Ah_old', u'float64([[50,0,0],\n[0,65,0],\n[0,0,90]])*1e-4')
        """

        # calibrate_plot(Hxyz, Ah, b)
        # A_str, b_str = coef2str(Ah, b)
        # b_str= 'float64([{}])'.format(b_str)
        if not bBad:
            print(
                'calibration coefficient loaded ({}): '.format(
                    os_path.basename(file_data)), 'A = \n', Ag_str)
        cfgin_update = {'add_custom_expressions': [Ag_str]}
Example #11
0
def csv_to_h5(
        read_csv_args,
        to_hdf_args,
        dates_formats: Mapping[str, str],
        correct_fun: Tuple[None, bool, Callable[[pd.DataFrame], None]] = None,
        processing: Optional[Mapping[Tuple[Tuple[str], Tuple[str]], Callable[[Any], Any]]] = None,
        out_cols: Optional[Sequence] = None,
        continue_row=False,
        vaex_format: Optional[bool]=None
        ):
    """
    Read csv and write to hdf5
    :param read_csv_args: dict, must have keys:
        filepath_or_buffer, chunksize
    :param to_hdf_args:
        path_or_buf: default = read_csv_args['filepath_or_buffer'].with_suffix('vaex.h5' if vaex_format else '.h5')
        mode: default = 'w' if not continue_row else 'a',
        key: hdf5 group name in hdf5 file where store data
        ...
    :param dates_formats:
        column: csv column name wich need to be convert from str to DateTime,
        date_format: date formats
    :param processing: dict with
        keys: ((_input cols_), (_output cols_)) and
        values: function(_input cols_) that will return _output cols_
    :param out_cols: default is all excluding columns that in inputs but not in output of custom param:processing
    :param continue_row: csv row number (excluding header) to start with shifting index.
    If output file exist and continue_row = True then continue converting starting from row equal to last index in it,
    useful to continue after program interrupting or csv appending. If not exist then start from row 0 giving it index 0.
    If continue_row = integer then start from this row, giving starting index = continue_row
    :param correct_fun: function applied to each chunk returned by read_csv() which is a frame of column data of type str
    :param vaex_format: bool how to write chunks:
    - True: to many vaex hdf5 files. They at end will be converted to single vaex hdf5 file
    - False: appending to single pandas hdf5 table
    - None: evaluates to True if to_hdf_args['path_or_buf'] has next to last suffix ".vaex" else to False

    :return:
    """
    if to_hdf_args.get('path_or_buf'):
        if vaex_format is None:
            vaex_format = Path(str(to_hdf_args['path_or_buf']).strip()).suffixes[:-1] == ['.vaex']
    else:  # give default name to output file
        to_hdf_args['path_or_buf'] = Path(read_csv_args['filepath_or_buffer']).with_suffix(
            f'{".vaex" if vaex_format else ""}.h5'
            )

    # Deal with vaex/pandas storing difference
    if vaex_format:
        open_for_pandas_to_hdf = None
        tmp_save_pattern, tmp_search_pattern = h5pandas_to_vaex_file_names(
            path_out_str=str(to_hdf_args['path_or_buf'])
            )
        ichunk = None
    else:
        def open_for_pandas_to_hdf(path_or_buf):
            return pd.HDFStore(
                to_hdf_args['path_or_buf'],
                to_hdf_args.get('mode', 'a' if continue_row else 'w')
                )

    # Find csv row to start
    msg_start = f'Converting in chunks of {read_csv_args["chunksize"]} rows.'
    if continue_row is True:  # isinstance(continue_same_csv, bool)
        try:
            if vaex_format:

                hdf5_list = glob.glob(tmp_search_pattern)
                if len(hdf5_list):      # continue interrupted csv_to_h5()
                    hdf5_list.sort()
                    file_last = hdf5_list[-1]
                    lf.info('Found {:d} temporary files, continue from index found in last file', len(hdf5_list))
                    "table/columns/index"
                else:                   # add next csv data
                    file_last = to_hdf_args['path_or_buf']
                with h5py.File(file_last, mode='r') as to_hdf_buf:
                    continue_row = to_hdf_buf['table/columns/index/data'][-1] + 1
            else:
                with pd.HDFStore(to_hdf_args['path_or_buf'], mode='r') as to_hdf_buf:
                    continue_row = to_hdf_buf.select(to_hdf_args['key'], columns=[], start=-1).index[-1] + 1
        except (OSError) as e:
            msg_start += ' No output file.'
            continue_row = None
        except KeyError as e:
            msg_start += ' No data in output file.'
            continue_row = None
        else:
            msg_start += ' Starting from next to last loaded csv row:'
    elif continue_row:
        msg_start += ' Starting from specified csv data row:'
    if continue_row:
        lf.info('{:s} {:d}...', msg_start, continue_row)
        read_csv_args['skiprows'] = read_csv_args.get('skiprows', 0) + continue_row
    else:
        lf.info('{:s} begining from csv row 0, giving it index 0...', msg_start)

    dtypes = read_csv_args['dtype']

    # Set default output cols if not set
    if out_cols is None and processing:
        # we will out all we will have except processing inputs if they are not mentioned in processing outputs
        cols_in_used = set()
        cols_out_used = set()
        for (c_in, c_out) in processing.keys():
            cols_in_used.update(c_in)
            cols_out_used.update(c_out)
        cols2del = cols_in_used.difference(cols_out_used)
        out_cols = dtypes.keys()
        for col in cols2del:
            del out_cols[col]
    cols_out_used = set(out_cols if out_cols is not None else dtypes.keys())

    # Group cols for conversion by types specified
    str_cols = []
    int_and_nans_cols = []
    other_cols = []
    for col, typ in dtypes.items():
        if out_cols and col not in cols_out_used:
            continue
        kind = typ[0]
        (str_cols if kind == 'S' else
         int_and_nans_cols if kind == 'I' else
         other_cols).append(col)

    str_not_dates = list(set(str_cols).difference(dates_formats.keys()))
    min_itemsize = {col: int(dtypes[col][1:]) for col in str_not_dates}

    # Read csv, process, write hdf5
    with open(read_csv_args['filepath_or_buffer'], 'r') as read_csv_buf, \
            FakeContextIfOpen(open_for_pandas_to_hdf, to_hdf_args['path_or_buf']) as to_hdf_buf:
        read_csv_args.update({
            'filepath_or_buffer': read_csv_buf,
            'memory_map': True,
            'dtype': 'string'  # switch off read_csv dtypes convertion (because if it fails it is hard to correct:
            })  # to read same csv place by pandas)
        to_hdf_args.update({
            'path_or_buf': to_hdf_buf,
            'format': 'table',
            'data_columns': True,
            'append': True,
            'min_itemsize': min_itemsize
            })
        # rows_processed = 0
        # rows_in_chunk = read_csv_args['chunksize']

        for ichunk, chunk in enumerate(pd.read_csv(**read_csv_args)):
            if continue_row:
                if chunk.size == 0:
                    ichunk = np.ceil(continue_row / read_csv_args['chunksize']).astype(int) - 1
                    break  # continue_row is > data rows
                else:
                    chunk.index += continue_row

            lf.extra['id'] = f'chunk start row {chunk.index[0]:d}'
            if ichunk % 10 == 0:
                print(f'{ichunk}', end=' ')
            else:
                print('.', end='')

            if correct_fun:
                correct_fun(chunk)

            # Convert to user specified types

            # 1. dates str to DateTime
            for col, f in dates_formats.items():
                # the convertion of 'bytes' to 'strings' is needed for pd.to_datetime()
                try:
                    chunk[col] = pd.to_datetime(chunk[col], format=f)
                except ValueError as e:
                    lf.error(
                        'Conversion to datetime("{:s}" formatted as "{:s}") {:s} -> '
                        'Replacing malformed strings by NaT...', col, f, standard_error_info(e))
                    chunk[col] = pd.to_datetime(chunk[col], format=f, exact=False, errors='coerce')

            # 2. str to numeric for other_cols and int_and_nans_cols (which is limited support pandas extension dtypes)
            # but we use numpy types instead replasing nans by -1 to able write to hdf5
            chunk[other_cols] = chunk[other_cols].fillna('NaN')  # <NA> to numpy recognized eq meaning string
            chunk[int_and_nans_cols] = chunk[int_and_nans_cols].fillna('-1')
            for col in (int_and_nans_cols + other_cols):  # for col, typ in zip(nans.columns, chunk[nans.columns].dtypes):
                typ = dtypes[col]
                if col in int_and_nans_cols:
                    is_integer = True
                    typ = f'i{typ[1:]}'  # typ.numpy_dtype
                else:
                    is_integer = np.dtype(typ).kind == 'i'
                try:
                    chunk[col] = chunk[col].astype(typ)
                    continue
                except (ValueError, OverflowError) as e:
                    # Cleaning. In case of OverflowError we do it here to prevent ValueError while handling of OverflowError below.
                    pattern_match = r'^[\d]$' if is_integer else r'^-?[\d.]$'
                    ibad = ~chunk[col].str.match(pattern_match)
                    rep_val = '-1' if is_integer else 'NaN'
                    # ibad = np.flatnonzero(chunk[col] == re.search(r'(?:")(.*)(?:")', e.args[0]).group(1), 'ascii')
                    lf.error('Conversion {:s}("{:s}") {:s} -> replacing {:d} values not maching pattern "{:s}" with "{'
                             ':s}" and again...', typ, col, standard_error_info(e), ibad.sum(), pattern_match, rep_val)
                    chunk.loc[ibad, col] = rep_val
                    # astype(str).replace(regex=True, to_replace=r'^.*[^\d.].*$', value=
                try:
                    chunk[col] = chunk[col].astype(typ)
                except (OverflowError,
                        ValueError) as e:  # May be bad value from good symbols: r'^\d*\.\d*\.+\d*$' but instead checking it we do coerce_to_exact_dtype() on ValueError here too
                    lf.error('Conversion {:s}("{:s}") {:s} -> Replacing malformed strings and big numbers'
                    ' by NaN ...', typ, col, standard_error_info(e))
                    chunk[col] = coerce_to_exact_dtype(chunk[col], dtype=typ)

            # Limit big strings length and convert StringDtype to str to can save by to_hdf()
            for col, max_len in min_itemsize.items():  # for col, typ in zip(nans.columns, chunk[nans.columns].dtypes):
                chunk[col] = chunk[col].str.slice(stop=max_len)  # apply(lambda x: x[:max_len]) not handles <NA>
            chunk[str_not_dates] = chunk[str_not_dates].astype(str)

            # Apply specified data processing
            if processing:
                for (cols_in, c_out), fun in processing.items():
                    cnv_result = fun(chunk[list(cols_in)])
                    chunk[list(c_out)] = cnv_result

            # # Bad rows check
            # is_different = chunk['wlaWID'].fillna('') != chunk['wlaAPIHartStandard'].fillna('')
            # if is_different.any():
            #     i_bad = np.flatnonzero(is_different.values)
            #     lf.debug('have wlaWID != wlaAPIHartStandard in rows {:s}', chunk.index[i_bad])
            #     # chunk= chunk.drop(chunk.index[i_bad])   # - deleting
            #     pass

            # Check unique index
            # if chunk['wlaWID'].duplicated()

            try:
                if vaex_format:
                    df = vaex.from_pandas(chunk if out_cols is None else chunk[out_cols])
                    df.export_hdf5(tmp_save_pattern.format(ichunk))
                else:  # better to move this command upper and proc. by vaex instead of pandas
                    (chunk if out_cols is None else chunk[out_cols]).to_hdf(**to_hdf_args)
                #rows_processed += rows_in_chunk  # think we red always the same length exept last which length value will not be used

            except Exception as e:
                lf.exception('write error')
                pass
        try:
            del lf.extra['id']
        except KeyError:
            lf.info('was no more data rows to read')

    # If vaex store was specified then we have chunk files that we combine now by export_hdf5():
    if vaex_format:
        h5pandas_to_vaex_combine(tmp_search_pattern, str(to_hdf_args['path_or_buf']), check_files_number=ichunk+1)
Example #12
0
def main():
    print('\n' + this_prog_basename(__file__), end=' started. ')
    # try:
    #     cfg['in']= init_file_names(cfg['in'])
    # except Ex_nothing_done as e:
    #     print(e.message)
    #     return()

    # gen_names = ge_names(cfg)
    gen_data = veuszPropagate([os_path.join(os_path.dirname(
        file_veuszPropagate), 'veuszPropagate.ini'),
        '--data_yield_prefix', 'Hxyz', '--path', cfg['in']['path'], '--pattern_path',
        r'd:\workData\BalticSea\171003_ANS36\inclinometr\171015_intercal_on_board\~pattern~.vsz',
        '--eval_list',
        """
        'ImportFileCSV(u"{nameRFE}", blanksaredata=True, encoding="ascii", headermode="1st", linked=True, dsprefix=u"counts", rowsignore=2, skipwhitespace=True)',
        "TagDatasets(u'source', [u'countsAx', u'countsAy', u'countsAz', u'countsBattery', u'countsDay', u'countsHour', u'countsMinute', u'countsMonth', u'countsMx', u'countsMy', u'countsMz', u'countsSecond', u'countsTemp', u'countsYear'])"
        """,
        # '--import_method', 'ImportFileCSV',
        '--add_custom_list', 'Ah_old, Ch',
        '--add_custom_expressions_list',
        """
        'float64([[ 1, 0, 0],\
          [0, 1, 0],\
          [0, 0, 1]])\
        ',
        
        'float64([[0, 0, 0]])'
        """, '--before_next', 'restore_config',
        '--export_pages_int_list', '1'])

    cfgin_update = None
    while True:
        try:
            d = gen_data.send(cfgin_update)
        except (GeneratorExit, StopIteration):
            print('ok>')
            break
        except Exception as e:
            print('There are error ', standard_error_info(e))
            continue

        Hxyz = d['Hxyz']
        # Hxyz = np.column_stack((a['Mx'], a['My'], a['Mz']))[slice(*iUseTime.flat)].T
        if len(Hxyz) < 3 or not np.prod(Hxyz.shape):  # 3 is ok but may be empty
            print('\nNo data from Veusz!\n')
            bBad = True
        else:
            Ah, b = calibrate(Hxyz)
            if Ah[0, 0] > 10:
                print('\nBad calibration!\n')
                bBad = True
            else:
                bBad = False
        if bBad:
            print('use 1st coef!')
            b = np.float64([[46, -166, -599]])
            Ah = np.float64([[0.0054, -0.0001, -0.0001],
                             [-0.0001, 0.0069, -0.0001],
                             [-0.0001, -0.0001, 0.0089]])
        """
        AddCustom('definition', u'Ch', u'float64([[60,-160,-650]])')
        AddCustom('definition', u'Ah_old', u'float64([[50,0,0],\n[0,65,0],\n[0,0,90]])*1e-4')
        """

        # calibrate_plot(Hxyz, Ah, b)
        A_str, b_str = coef2str(Ah, b)
        # b_str= 'float64([{}])'.format(b_str)
        if not bBad:
            print('calibration coefficients calculated:',
                  '\nA = \n', A_str, '\nb = \n', b_str)
        cfgin_update = {'add_custom_expressions': [A_str, b_str]}
Example #13
0
        return wrap


    @with_in_config
    def main(cfg):
        """
        :param new_arg: list of strings, command line arguments

        Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument
         argv[1:] == '-h' or '-v' passed to this code
        argv[1] is cfgFile. It was used with cfg files:
            'csv2h5_nav_supervisor.ini'
            'csv2h5_IdrRedas.ini'
            'csv2h5_Idronaut.ini'

        :return:
        """


    def version():
        """Show the version"""
        return 'version {0}'.format(VERSION)


    if __name__ == '__main__':
        run(main, alt=version)

except ModuleNotFoundError as e:
    print(standard_error_info(e))
Example #14
0
def parse_csv(filename, cfg_in):
    """
    Guess csv structure

    :param filename:
    :param cfg_in:
    :param known_structure: list of strings formats in order of columns, from start
    but may be not all (next is auto treeted)
    :return: lst_types, offset, headers 


    * quotechar - specifies a one-character string to use as the
        quoting character.  It defaults to '"'.
    * delimiter - specifies a one-character string to use as the
        field separator.  It defaults to ','.
    * skipinitialspace - specifies how to interpret whitespace which
        immediately follows a delimiter.  It defaults to False, which
        means that whitespace immediately following a delimiter is part
        of the following field.
    * lineterminator -  specifies the character sequence which should
        terminate rows.
    * quoting - controls when quotes should be generated by the writer.
        It can take on any of the following module constants:

        csv.QUOTE_MINIMAL means only when required, for example, when a
            field contains either the quotechar or the delimiter
        csv.QUOTE_ALL means that quotes are always placed around fields.
        csv.QUOTE_NONNUMERIC means that quotes are always placed around
            fields which do not parse as integers or floating point
            numbers.
        csv.QUOTE_NONE means that quotes are never placed around fields.
    * escapechar - specifies a one-character string used to escape
        the delimiter when quoting is set to QUOTE_NONE.
    * doublequote - controls the handling of quotes inside fields.  When
        True, two consecutive quotes are interpreted as one during read,
        and when writing, each quote character embedded in the data is
        written as two quotes
    Example:
    parse_csv(filename, ['%H:%M:%S'])
    """
    set_field_if_no(cfg_in, 'types', [])
    set_field_if_no(cfg_in, 'delimiter')
    with open(filename, 'rb') as fh:
        ext = os_path.splitext(filename)[1]
        # Load a file object:
        try:
            # If you are sure that file is csv use CSVTableSet(fh)
            from magic import MagicException  # because any_tableset uses libmagic
            table_set = any_tableset(fh,
                                     mimetype=None,
                                     extension=ext,
                                     delimiter=cfg_in['delimiter'])
        except (ImportError, MagicException) as e:
            print('There are error ', standard_error_info(e),
                  '\n=> Loading file as csv without trying other formats')
            table_set = CSVTableSet(fh, delimiter=cfg_in['delimiter'])

        # A table set is a collection of tables:
        row_set = table_set.tables[0]
        # A row set is an iterator over the table, but it can only
        # be run once. To peek, a sample is provided:

        # guess header names and the offset of the header:
        offset, headers = headers_guess(row_set.sample)  # tolerance=1
        row_set.register_processor(headers_processor(headers))
        # add one to begin with content, not the header:
        row_set.register_processor(offset_processor(offset + 1))
        # guess column types:
        lst_types = type_guess(row_set.sample, strict=True)
        row_sample = next(row_set.sample)

        # check not detected types
        def formats2types(formats_str):
            for f in formats_str:
                if f:
                    if is_date_format(f):
                        yield (types.DateType(f))
                    else:
                        yield (TimeType())
                else:
                    yield (None)

        known_types = formats2types(cfg_in['types'])

        for n, (t, s, kt) in enumerate(zip(lst_types, row_sample,
                                           known_types)):
            if t.result_type == types.StringType.result_type:
                # not auto detected? -> check known_types
                if kt.test(s.value):
                    lst_types[n] = kt  # t= kt
                else:  # known_types fits element
                    print(
                        "col'"
                        's#{:d} value "{}" type not match provided type of {}'.
                        format(n, s.value, type(kt)))
                    # kt = types.DateType('mm/dd/yyyy')
                    # kt.test('0'+s.value)
                    # detect?
            else:
                pass
        # not works for time type:
        # print(jts.headers_and_typed_as_jts(headers,
        #       list(map(jts.celltype_as_string, lst_types))).as_json())
        return lst_types, offset, headers
Example #15
0
def load_to_veusz(in_fulls, cfg, veusze=None):
    """
    Generate Veusz embedded instances by opening vsz-file(s) and modify it by executing commands specified in cfg
    :param in_full: full name of source data file to load in veusz pattern (usually csv)
    :param cfg: dict with keys:
        out:
            path, paths - pattern paths
            b_images_only, b_update_existed - command line arguments - see my_argparser()
        in:
            before_next - modify Veusz pattern data by execute Veusz commands if have fields:
                'Close()' - reopen same pattern each cycle

            import_method, header if import_method == 'ImportFile',
            add_custom, add_custom_expressions - Veusz Castom Definitions
            eval - any Veusz command
    :param veusze: Veusz embedded object. If it is None creates new Veusz embedded object loading pattern path
    :yields (veusze, log)
        veusze: Veusz embedded object.
        log: dict, {'out_name': inF, 'out_vsz_full': out_vsz_full}

    Note 1: Uses global load_vsz function if veusze = None or cfg['out']['b_images_only']
    which is defined by call load_vsz_closure()
    Note 2: If 'restore_config' in cfg['program']['before_next'] then sets cfg['in']= cfg['in_saved']
    """
    global load_vsz

    filename_fun = eval(compile(cfg['out']['filename_fun'], '', 'eval'))
    ifile = 0
    for in_full in in_fulls:
        ifile += 1
        in_full = Path(in_full)

        out_name = filename_fun(in_full.stem) + cfg['out']['add_to_filename']
        out_vsz_full = Path(
            cfg['out']['path']).with_name(out_name).with_suffix('.vsz')

        # if ifile < cfg['in']['start_file']:
        #     continue

        # do not update existed vsz/images if not specified 'b_update_existed'
        if not cfg['out']['b_update_existed']:
            if cfg['out']['b_images_only']:
                glob = Path(cfg['out']['export_dir']).glob(
                    f"*{out_name}*.{cfg['out']['export_format']}")
                if any(glob):
                    continue
            elif out_vsz_full.is_file():
                continue
        # yield (None, None)

        if in_full.stem != out_vsz_full.stem:
            l.info('%d. %s -> %s, ', ifile, in_full.name, out_vsz_full.name)
        else:
            l.info('%d. %s, ', ifile, in_full.name)
        sys_stdout.flush()
        log = {'out_name': out_name, 'out_vsz_full': out_vsz_full}

        if veusze:
            try:
                b_closed = veusze.IsClosed()
            except Exception as e:
                l.error('IsClosed() error', exc_info=True)
                b_closed = True
            if b_closed:
                veusze = None
            elif 'Close()' in cfg['program'][
                    'before_next']:  # or in_ext == '.vsz'
                veusze.Close()
                veusze = None

        def do_load_vsz(in_full, veusze, load_vsz):
            in_ext = in_full.suffix.lower()
            # load same filePattern (last in list) if data file not "vsz":
            vsz_load = in_full if in_ext == '.vsz' else cfg['out']['paths'][-1]
            if cfg['out']['b_images_only']:
                veusze = load_vsz(
                    vsz_load,
                    veusze)[0]  # veusze.Load(in_full.with_suffix('.vsz'))
            else:
                if 'restore_config' in cfg['program']['before_next']:
                    cfg['in'] = cfg['in_saved'].copy(
                    )  # useful if need restore add_custom_expressions?
                if not veusze:
                    veusze = load_vsz(vsz_load)[0]  # , veusze=veusze
                    if cfg['program']['verbose'] == 'DEBUG':
                        veusze.SetVerbose()  # nothing changes
                # Relative path from new vsz to data, such as u'txt/160813_0010.txt'
                try:
                    file_name_r = in_full.relative_to(
                        cfg['out']['path'].parent)
                except ValueError as e:
                    # l.exception('path not related to pattern')
                    file_name_r = in_full
                veusze_commands(veusze, cfg['in'], file_name_r)
            return veusze

        try:
            veusze = do_load_vsz(in_full, veusze, load_vsz)
        except asyncio.TimeoutError as e:
            l.warning('Recreating window because of %s',
                      standard_error_info(e))
            veusze.remote.terminate()
            veusze.remote = None

            # veusze.Close()  # veusze.exitQt()                                              # not works
            # import socket
            # veusze.serv_socket.shutdown(socket.SHUT_RDWR)
            # veusze.serv_socket.close()
            # veusze.serv_socket, veusze.from_pipe = -1, -1

            # veusze.serv_socket.shutdown(socket.SHUT_RDWR); veusze.serv_socket.close()
            # veusze.startRemote()                                                           # no effect

            load_vsz = load_vsz_closure(
                cfg['program']['veusz_path'], cfg['program']['load_timeout_s'],
                cfg['program']
                ['b_execute_vsz'])  # this is only unfreezes Veusz
            veusze = do_load_vsz(in_full, None, load_vsz)

        yield (veusze, log)
Example #16
0
                            plt.clabel(CS, fmt='%g', fontsize=9, inline=1)
                            # if Dep_filt is not None:
                            #     plt.plot(nav_dist, -nav_dep, color='m', alpha=0.5, label='Depth')  # bot
                            if bFirst:
                                plt.plot(ctd['Lon'],
                                         ctd['Lat'],
                                         color='m',
                                         alpha=0.5,
                                         label='run path')
                            CBI = plt.colorbar(im, shrink=0.8)
                            # CB = plt.colorbar(CS, shrink=0.8, extend='both')
                            # plt.gcf().set_size_inches(9, 3)
                            plt.savefig(
                                path_stem.with_suffix('.png'))  # , dpi= 200
                            # plt.show()
                            plt.close()
                            pass
                        except Exception as e:
                            print('\nCan no draw contour! ',
                                  standard_error_info(e))
                    # gdal_drv_grid.Register()

                    write_grd_this_geotransform(path_stem.with_suffix('.grd'),
                                                z)
                if bFirst:
                    bFirst = False

except Exception as e:
    print('\nError! ', standard_error_info(e))
    raise e
Example #17
0
            str = fdata.readline()
        fdata = BytesIO(str.encode())

        if 'b_raise_on_err' in cfg['in'] and not cfg['in']['b_raise_on_err']:
            try:
                a = np.genfromtxt(fdata,
                                  dtype=cfg['in']['dtype'],
                                  delimiter=cfg['in']['delimiter'],
                                  usecols=cfg['in']['cols_load'],
                                  converters=cfg['in']['converters'],
                                  skip_header=cfg['in']['skiprows'],
                                  comments=cfg['in']['comments'],
                                  invalid_raise=False)  # ,autostrip= True
                # warnings.warn("Mean of empty slice.", RuntimeWarning)
            except Exception as e:
                print(*standard_error_info(e), ' - Bad file. skip!\n')
                continue
        else:
            try:
                a = np.loadtxt(fdata,
                               dtype=cfg['in']['dtype'],
                               delimiter=cfg['in']['delimiter'],
                               usecols=cfg['in']['cols_load'],
                               converters=cfg['in']['converters'],
                               skiprows=cfg['in']['skiprows'])
            except Exception as e:
                print('{}\n Try set [in].b_raise_on_err= False'.format(e))
                raise (e)
        # Process a and get date date in ISO standard format
        try:
            date = fun_proc_loaded(a, cfg['in'])