Beispiel #1
0
    def detect_change_points(self, ys: np.ndarray, **kwargs) -> Sequence[int]:
        '''
        @param model: "l1", "rbf", "linear", "normal", "ar" (default is "l2")
        :return: list of estimated change points
        '''
        model = kwargs["model"] if "model" in kwargs else "l2"
        estimator = ruptures.BottomUp(model=model).fit(ys)

        return estimator.predict(pen=3)
Beispiel #2
0
    def calculate_segments(self, X, times, start_time, end_time):
        min_fit_size = 1000
        recorded_segments, last_fixed_index, start_fit_index = \
            check_recorded_segments(times, self.username, start_time, end_time, min_fit_size)

        # TODO inefficient repetition
        downs = (np.where(self.get_down_times(times))[0] + 1).tolist()
        starts = [0] + downs
        ends = downs + [len(times)]
        cam_segments = [list(a) for a in zip(starts, ends)]
        downs = (np.where(self.get_down_times(times[start_fit_index:]))[0] +
                 1 + start_fit_index).tolist()
        starts = [start_fit_index] + downs
        ends = downs + [len(times)]

        segments = []

        #logger.debug(str(recorded_segments[-1]))
        #logger.debug("calculating breakpoints, {}".format(list(zip(starts, ends))))

        for start, end in zip(starts, ends):
            #logger.debug("{} -> {}".format(start, end))

            if end - start > 1:
                part = X[start:end]
                model = "l1"  # "l2", "rbf"
                algo = rpt.BottomUp(model=model, min_size=1, jump=1).fit(part)
                sigma = 2
                breaks = algo.predict(pen=np.log(part.shape[0]) *
                                      part.shape[1] * sigma**2)
                breaks = (np.array(breaks) + start).tolist()
                breaks[-1] -= 1  # avoid index out of range
                part_intervals = [
                    list(a) for a in zip([start] + breaks[:-1], breaks)
                ]
                segments.extend(part_intervals)
            else:
                # avoid index out of range
                end -= 1
                segments.append((start, end))

        segments = [(max(s, last_fixed_index), e) for s, e in segments
                    if e > last_fixed_index]
        segment_times = [(times[s], times[e]) for s, e in segments]

        fix_threshold = max(len(times) - min_fit_size // 2, 0)

        record_segments(self.username, segments, segment_times,
                        times[last_fixed_index], times[fix_threshold])

        segments = recorded_segments + segments
        segment_times = [(times[s], times[e]) for s, e in segments]

        return segments, segment_times, cam_segments, times[last_fixed_index]
def adaptative_sampling(x: pd.Series, std_dev: float = None) -> np.ndarray:
    """
    Apply Bottom-up segmentation and "findpeaks" for robustness.
    This algorithm will retrieve the change-point location.

    Parameters
    ----------
    x : pd.Series
        initial serie to find the change-point
    std_dev : float, optional
        standard-deviation of your serie, can be local or global value
        , by default None - revert to local value

    Returns
    -------
    np.ndarray
        boolean array which locates the change-points
    """
    if std_dev is None:
        std_dev = x.std()

    if x.shape[0] == 0:
        return pd.Series(dtype='float64', name=x.name)

    # piecewise-segmentation with BottomUp algorithm
    X = list(range(len(x)))
    signal = np.column_stack((x.to_numpy().reshape(-1, 1), X))
    bottom_up = rpt.BottomUp(model='linear', jump=10)\
        .fit_predict(signal, pen=std_dev*np.log(len(x)))

    # add the peaks to be robust
    peaks_p = find_peaks(x, prominence=std_dev)[0]
    peaks_n = find_peaks(-x, prominence=std_dev)[0]

    # concatenate and sort all
    segments = sorted(list(set([*peaks_p, *peaks_n, *bottom_up])))

    # convert from position to boolean
    cond = np.zeros(x.shape, dtype=bool)
    # last value in "segment" is the length
    cond[segments[:-1]] = True
    # convert all non-selected values to NaN
    return cond
Beispiel #4
0
def changePointDetection(glacier, attr, startdate=None, enddate=None, \
    n_breakpoints=1, method='window', model='l1', wwidth=5):
    """Use ruptures package to identify change points in glacier time series. Acceptable methods are 'window' (sliding window), 'binseg' (binary segmentation), and bottomup (bottom-up). See https://centre-borelli.github.io/ruptures-docs/user-guide for further information."""
    attrs, dates = glacier.filterDates(attr, startdate, enddate)
    signal = attrs.values
    sigma = signal.std()
    n = len(signal)
    if method == 'window':
        algo = rpt.Window(width=wwidth, model=model).fit(signal)
    elif method == 'binseg':
        algo = rpt.Binseg(model=model).fit(signal)
    elif method == 'bottomup':
        algo = rpt.BottomUp(model=model).fit(signal)
    breakpoints = algo.predict(n_bkps=n_breakpoints)
    # remove breakpoints at beginning/end of time series
    if dates.index[0] - 1 in breakpoints:
        breakpoints.remove(dates.index[0] - 1)
    if dates.index[-1] in breakpoints:
        breakpoints.remove(dates.index[-1])
    breakpoint_dates = dates[breakpoints]
    return breakpoint_dates, signal, breakpoints
Beispiel #5
0
def get_change_point(series, jump=5, n_bkps=5, pen=10):
    """

    series: numpy array please
    jump: размер сэмпла
    n_bkps: количество возвращаемых остановок
    pen: пенальти для Pelt

    """
    series = series.values
    alg_dynp = rpt.Dynp(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_pelt = rpt.Pelt(jump=jump).fit_predict(series, pen=pen)

    alg_bin = rpt.Binseg(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_bot = rpt.BottomUp(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_win = rpt.Window(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_cumsum = change_point_detection(series.tolist())

    # Получили разладки от нескольких алгоритмов
    # Теперь найдём точки, которые предсказывались алгоритмами несколько раз
    res = {}
    for i in alg_dynp + alg_pelt + alg_bin + alg_bot + alg_win + alg_cumsum:
        if i in res:
            res[i] += 1
        else:
            res[i] = 1

    del res[0]
    del res[len(series)]

    itemMaxValue = max(res.items(), key=lambda x: x[1])
    listOfKeys = []
    for key, value in res.items():
        if value == itemMaxValue[1]:
            listOfKeys.append(key)
    return listOfKeys
Beispiel #6
0
def detect_data_shifts(series,
                       filtering=True,
                       use_default_models=True,
                       method=None,
                       cost=None,
                       penalty=40):
    """
    Detect data shifts in a time series of daily values.

    .. warning:: If the passed time series is less than 2 years in length,
        it will not be corrected for seasonality. Data shift detection will
        be run on the min-max normalized time series with no seasonality
        correction.

    Parameters
    ----------
    series : Pandas series with datetime index.
        Time series of daily PV data values, which can include irradiance
        and power data.
    filtering : Boolean, default True.
        Whether or not to filter out outliers and stale data from the time
        series. If True, then this data is filtered out before running the
        data shift detection sequence. If False, this data is not filtered
        out. Default set to True.
    use_default_models: Boolean, default True
        If True, then default change point detection search parameters are
        used. For time series shorter than 2 years in length, the search
        function is `rpt.Window`  with `model='rbf'`, `width=50` and
        `penalty=30`. For time series 2 years or longer in length, the
        search function is `rpt.BottomUp` with `model='rbf'`
        and `penalty=40`.
    method: ruptures search method instance or None, default None.
        Ruptures search method instance. See
        https://centre-borelli.github.io/ruptures-docs/user-guide/.
    cost: str or None, default None
        Cost function passed to the ruptures changepoint search instance.
        See https://centre-borelli.github.io/ruptures-docs/user-guide/
    penalty: int, default 40
        Penalty value passed to the ruptures changepoint detection method.
        Default set to 40.

    Returns
    -------
    Pandas Series
        Series of boolean values with the input Series' datetime index, where
        detected changepoints are labeled as True, and all other values are
        labeled as False.

    References
    -------
    .. [1] Perry K., and Muller, M. "Automated shift detection in sensor-based
       PV power and irradiance time series", 2022 IEEE 48th Photovoltaic
       Specialists Conference (PVSC).
    """
    try:
        import ruptures as rpt
    except ImportError:
        raise ImportError("data_shifts() requires ruptures.")
    # Run data checks on cleaned data to make sure that the data can be run
    # successfully through the routine
    _run_data_checks(series)
    # Run the filtering sequence, if marked as True
    if filtering:
        series_filtered = _erroneous_filter(series)
    # Drop any duplicated data from the time series
    series_filtered = series_filtered.drop_duplicates()
    # Check if the time series is more than 2 years long. If so, remove
    # seasonality. If not, run analysis on the normalized time series
    if (series_filtered.index.max() - series_filtered.index.min()).days <= 730:
        series_processed = _preprocess_data(series_filtered,
                                            remove_seasonality=False)
        seasonality_rmv = False
    else:
        # Perform pre-processing on the time series, to get the
        # seasonality-removed time series.
        series_processed = _preprocess_data(series_filtered,
                                            remove_seasonality=True)
        seasonality_rmv = True
    points = np.array(series_processed.dropna())
    # If seasonality has been removed and default model is used, run
    # BottomUp method
    if (seasonality_rmv) & (use_default_models):
        algo = rpt.BottomUp(model='rbf').fit(points)
        result = algo.predict(pen=40)
    # If there is no seasonality but default model is used, run
    # Window-based method
    elif (not seasonality_rmv) & (use_default_models):
        algo = rpt.Window(model='rbf', width=50).fit(points)
        result = algo.predict(pen=30)
    # Otherwise run changepoint detection with the passed parameters
    else:
        algo = method(model=cost).fit(points)
        result = algo.predict(pen=penalty)
    # Remove the last index of the time series, if present
    if len(points) in result:
        result.remove(len(points))
    # Return a list of dates where changepoints are detected
    series_processed.index.name = "datetime"
    mask = pd.Series(False, index=series_processed.index)
    mask.iloc[result] = True
    # Re-index the mask to include any timestamps that were
    # filtered out as outliers
    mask = mask.reindex(series.index, fill_value=False)
    return mask
Beispiel #7
0
    def calculate_breakpoints(self, X):
        model = "l1"  # "l2", "rbf"
        algo = rpt.BottomUp(model=model, min_size=1, jump=1).fit(X)
        breaks = algo.predict(pen=np.log(X.shape[0]) * X.shape[1] * 2**2)

        return breaks
Beispiel #8
0
    # need ResultsTrend 
    ofn = "./ResultsCPD/" + os.path.basename(fn).split(".")[0]

    no_cases = len(df['category_column'].unique())
    df[df.columns[0]] = df[df.columns[0]].apply(pd.to_numeric)
    df[df.columns[1]] = pd.to_datetime(df[df.columns[1]], format="%Y-%m-%d %H:%M:%S")
    df[df.columns[2]] = pd.to_datetime(df[df.columns[2]], format="%Y-%m-%d %H:%M:%S")
    df[df.columns[3:]] = df[df.columns[3:]].apply(pd.to_numeric)
  
    df = df.set_index(df[df.columns[0]])
    for category in df['category_column'].unique():
        if category == 1:
            X = df.loc[df['category_column'] == category]['p1_current'].values
            indexes = df.loc[df['category_column'] == category].index.values
            
            algo = rpt.BottomUp(model="l2")
            result = algo.fit_predict(X, n_bkps=n_bkps)
            x = []
            for idx in result[:-1]:
                x.append(indexes[idx])
            y = []
            for idx in x:
                y.append(df.loc[df.index == idx]['p1_current'].values[0]) 

            plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal')
            plt.scatter(x, y, label='outlier', color='red', marker='o')
            plt.title("Change Finder Bottom Up p1_current")
            plt.xlabel('Date Time')
            plt.ylabel('p1_current')
            plt.savefig( ofn + "_BottomUp_p1_current.png")
            plt.show()