Esempio n. 1
0
def clean_outliers_moving_average(data_series, length, limit):
    """
    Get a moving average of the ``data_series`` over ``length`` entries, by means of
    :func:`outlier filter <omc3.utils.outliers.get_filter_mask>`.
    The values are shifted, so that the averaged value takes ceil((length-1)/2) values previous
    and floor((length-1)/2) following values into account.

    Args:
        data_series: `Series` of data.
        length: length of the averaging window.
        limit: points beyond that limit are always filtered.
    """
    LOG.debug(
        "Filtering and calculating moving average of length {:d}.".format(
            length))
    init_mask = ~data_series.isna()
    mask = init_mask.copy()
    for i in range(len(data_series) - length):
        mask[i:i + length] &= get_filter_mask(data_series[i:i + length],
                                              limit=limit,
                                              mask=init_mask[i:i + length])

    _is_almost_empty_mask(mask, length)
    data_mav, std_mav = _get_interpolated_moving_average(
        data_series, ~mask, length)
    return data_mav, std_mav, mask
Esempio n. 2
0
def clean_by_tune(tunes: pd.Series, tune_clean_limit):
    """
    Looks for outliers in the tunes pandas Series and returns their indices.

    Args:
        tunes (pd.Series): Pandas series with the tunes per BPM and the BPM names as index.
        tune_clean_limit: No BPM will find as outlier if its distance to the
            average is lower than this limit.
    """
    bad_bpms_mask = outliers.get_filter_mask(tunes, limit=tune_clean_limit)
    bad_bpms_names = tunes[~bad_bpms_mask].index
    return bad_bpms_names
Esempio n. 3
0
def _filter_by_column(df: pd.DataFrame, column: str,
                      limit: Number) -> pd.DataFrame:
    """Get the dataframe with all rows dropped filtered by the given column."""
    if column not in df.columns:
        LOG.info(f"{column} not in current file. Skipping cleaning.")
        return df

    good_bpms = get_filter_mask(data=df[column], limit=limit)
    n_good, n_total = sum(good_bpms), len(good_bpms)
    LOG.info(
        f"Cleaned {n_total-n_good:d} of {n_total:d} elements in {column} ({n_good:d} remaining)."
    )
    return df.loc[good_bpms, :]
Esempio n. 4
0
def clean_data(magnet_df, no_autoclean):
    if no_autoclean:
        LOG.info(
            'Manual cleaning is not yet implemented, no cleaning was performed'
        )
        for plane in PLANES:
            magnet_df[f"{CLEANED}{plane}"] = True
    else:
        LOG.debug('Automatic Tune cleaning')
        for plane in PLANES:
            magnet_df[f"{CLEANED}{plane}"] = outliers.get_filter_mask(
                magnet_df[f"{TUNE}{plane}"].values,
                x_data=magnet_df[K].values,
                limit=1e-5)
    return magnet_df