Example #1
0
def zeroing_azimuth(store, tbl, timerange_nord, coefs=None, cfg_in=None):
    """
    azimuth_shift_deg by calculating velocity (Ve, Vn) in cfg_in['timerange_nord'] interval of tbl data:
     taking median, calculating direction, multipling by -1
    :param timerange_nord:
    :param store:
    :param tbl:
    :param coefs: dict with fields having values of array type with sizes:
    'Ag': (3, 3), 'Cg': (3, 1), 'Ah': (3, 3), 'Ch': array(3, 1), 'azimuth_shift_deg': (1,), 'kVabs': (n,)
    :param cfg_in: dict with fields:
        - timerange_nord
        - other, needed in load_hdf5_data() and optionally in incl_calc_velocity_nodask()
    :return: azimuth_shift_deg
    """
    l.debug('Zeroing Nord direction')
    df = load_hdf5_data(store, table=tbl, t_intervals=timerange_nord)
    if df.empty:
        l.info('Zero calibration range out of data scope')
        return
    dfv = incl_calc_velocity_nodask(df,
                                    **coefs,
                                    cfg_filter=cfg_in,
                                    cfg_proc={
                                        'calc_version': 'trigonometric(incl)',
                                        'max_incl_of_fit_deg': 70
                                    })
    dfv.query('10 < inclination & inclination < 170', inplace=True)
    dfv_mean = dfv.loc[:, ['Ve', 'Vn']].median()
    # or df.apply(lambda x: [np.mean(x)], result_type='expand', raw=True)
    # df = incl_calc_velocity_nodask(dfv_mean, **calc_vel_flat_coef(coefs), cfg_in=cfg_in)

    # coefs['M']['A'] = rotate_z(coefs['M']['A'], dfv_mean.Vdir[0])
    azimuth_shift_deg = -np.degrees(np.arctan2(*dfv_mean.to_numpy()))
    l.info('Nord azimuth shifting coef. found: %s degrees', azimuth_shift_deg)
    return azimuth_shift_deg
Example #2
0
def h5_velocity_by_intervals_gen(
        cfg: Mapping[str, Any],
        cfg_out: Mapping[str, Any]) -> Iterator[Tuple[str, Tuple[Any, ...]]]:
    """
    Loads data and calculates velocity: many intervals from many of hdf5 tables sequentially.
    :param cfg: dict with fields:
        ['proc']['dt_interval'] - numpy.timedelta64 time interval of loading data
        one group of fields:
            1.  'split_period', pandas interval str, as required by intervals_from_period() to cover all data by it
                'overlap'

            2.  'time_intervals_start' - manually specified starts of intercals

    :param cfg_out: fields must be provided:
        - see h5_names_gen(cfg_in, cfg_out) requirements
    :return:
    """
    # Prepare cycle
    if cfg_out.get('split_period'):

        def gen_loaded(tbl):
            """
            Variant 1. Generate regular intervals (may be with overlap)
            :param tbl:
            :return:
            """
            cfg['in']['table'] = tbl
            # To obtain ``t_intervals_start`` used in query inside gen_data_on_intervals(cfg_out, cfg)
            # we copy its content here:
            t_prev_interval_start, t_intervals_start = intervals_from_period(
                **cfg['in'], period=cfg_out['split_period'])
            if cfg['proc']['overlap']:
                dt_shifts = np.arange(
                    0, 1,
                    (1 - cfg['proc']['overlap'])) * pd_period_to_timedelta(
                        cfg_out['split_period'])
                t_intervals_start = (t_intervals_start.to_numpy(
                    dtype="datetime64[ns]")[np.newaxis].T +
                                     dt_shifts).flatten()
                if cfg['in']['max_date']:
                    idel = t_intervals_start.searchsorted(
                        np.datetime64(
                            cfg['in']['max_date'] -
                            pd_period_to_timedelta(cfg_out['split_period'])))
                    t_intervals_start = t_intervals_start[:idel]
                cfg['in'][
                    'time_intervals_start'] = t_intervals_start  # to save queried time - see main()
            cfg_filter = None
            cfg_in_columns_saved = cfg['in']['columns']
            for start_end in h5q_starts2coord(
                    cfg['in']['db_path'],
                    cfg['in']['table'],
                    t_intervals_start,
                    dt_interval=cfg['proc']['dt_interval']):
                a = h5_load_range_by_coord(**cfg['in'],
                                           range_coordinates=start_end)
                if cfg_filter is None:  # only 1 time
                    # corrects columns if they are not exact mutch to faster h5_load_range_by_coord() next time
                    cfg['in']['columns'] = a.columns  # temporary
                    # and exclude absent fields to not filter warning of no such column in filt_data_dd()
                    detect_filt = f"m(ax|in)_({'|'.join(cfg['in']['columns'])})"
                    cfg_filter = {
                        k: v
                        for k, v in cfg['filter'].items()
                        if re.match(detect_filt, k)
                    }
                d, i_burst = filt_data_dd(a, cfg['in']['dt_between_bursts'],
                                          cfg['in']['dt_hole_warning'],
                                          cfg_filter)

                n_bursts = len(i_burst)
                if n_bursts > 1:  # 1st is always 0
                    l.info('gaps found: (%s)! at %s', n_bursts - 1,
                           i_burst[1:] - 1)
                df0 = d.compute()
                if not len(df0):
                    continue
                start_end = df0.index[[0, -1]].values
                yield df0, start_end
            cfg['in'][
                'columns'] = cfg_in_columns_saved  # recover to not affect next file

    else:
        query_range_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')"

        def gen_loaded(tbl):
            """
            Variant 2. Generate intervals at specified start values with same width cfg['proc']['dt_interval']
            :param tbl:
            :return:
            """
            for start_end in zip(
                    cfg['in']['time_intervals_start'],
                    cfg['in']['time_intervals_start'] +
                    cfg['proc']['dt_interval']):
                query_range_lims = pd.to_datetime(start_end)
                qstr = query_range_pattern.format(*query_range_lims)
                l.info(f'query:\n%s... ', qstr)
                df0 = store.select(tbl, where=qstr, columns=None)
                yield df0, start_end

    dt_interval_in_its_units = cfg['proc']['dt_interval'].astype(int)
    dt_interval_units = np.datetime_data(cfg['proc']['dt_interval'])[0]
    data_name_suffix = f'{dt_interval_in_its_units}{dt_interval_units}'

    # Cycle
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        for (tbl, coefs) in h5_names_gen(cfg['in'], cfg_out):
            # Get data in ranges
            for df0, start_end in gen_loaded(tbl):
                if cfg['in']['db_path'].stem.endswith('proc_noAvg'):
                    df = df0
                else:  # loading source data needed to be processed to calc velocity
                    df0 = filter_local(df0, cfg['filter'])
                    df = incl_calc_velocity_nodask(df0,
                                                   **coefs,
                                                   cfg_filter=cfg['in'],
                                                   cfg_proc=cfg['proc'])

                data_name = f'{tbl}/PSD_{start_end[0]}{data_name_suffix}'
                yield (df, tbl, data_name)