def detect_change_points(self, ys: np.ndarray, **kwargs) -> Sequence[int]: ''' @param model: "l1", "rbf", "linear", "normal", "ar" (default is "l2") :return: list of estimated change points ''' model = kwargs["model"] if "model" in kwargs else "l2" estimator = ruptures.BottomUp(model=model).fit(ys) return estimator.predict(pen=3)
def calculate_segments(self, X, times, start_time, end_time): min_fit_size = 1000 recorded_segments, last_fixed_index, start_fit_index = \ check_recorded_segments(times, self.username, start_time, end_time, min_fit_size) # TODO inefficient repetition downs = (np.where(self.get_down_times(times))[0] + 1).tolist() starts = [0] + downs ends = downs + [len(times)] cam_segments = [list(a) for a in zip(starts, ends)] downs = (np.where(self.get_down_times(times[start_fit_index:]))[0] + 1 + start_fit_index).tolist() starts = [start_fit_index] + downs ends = downs + [len(times)] segments = [] #logger.debug(str(recorded_segments[-1])) #logger.debug("calculating breakpoints, {}".format(list(zip(starts, ends)))) for start, end in zip(starts, ends): #logger.debug("{} -> {}".format(start, end)) if end - start > 1: part = X[start:end] model = "l1" # "l2", "rbf" algo = rpt.BottomUp(model=model, min_size=1, jump=1).fit(part) sigma = 2 breaks = algo.predict(pen=np.log(part.shape[0]) * part.shape[1] * sigma**2) breaks = (np.array(breaks) + start).tolist() breaks[-1] -= 1 # avoid index out of range part_intervals = [ list(a) for a in zip([start] + breaks[:-1], breaks) ] segments.extend(part_intervals) else: # avoid index out of range end -= 1 segments.append((start, end)) segments = [(max(s, last_fixed_index), e) for s, e in segments if e > last_fixed_index] segment_times = [(times[s], times[e]) for s, e in segments] fix_threshold = max(len(times) - min_fit_size // 2, 0) record_segments(self.username, segments, segment_times, times[last_fixed_index], times[fix_threshold]) segments = recorded_segments + segments segment_times = [(times[s], times[e]) for s, e in segments] return segments, segment_times, cam_segments, times[last_fixed_index]
def adaptative_sampling(x: pd.Series, std_dev: float = None) -> np.ndarray: """ Apply Bottom-up segmentation and "findpeaks" for robustness. This algorithm will retrieve the change-point location. Parameters ---------- x : pd.Series initial serie to find the change-point std_dev : float, optional standard-deviation of your serie, can be local or global value , by default None - revert to local value Returns ------- np.ndarray boolean array which locates the change-points """ if std_dev is None: std_dev = x.std() if x.shape[0] == 0: return pd.Series(dtype='float64', name=x.name) # piecewise-segmentation with BottomUp algorithm X = list(range(len(x))) signal = np.column_stack((x.to_numpy().reshape(-1, 1), X)) bottom_up = rpt.BottomUp(model='linear', jump=10)\ .fit_predict(signal, pen=std_dev*np.log(len(x))) # add the peaks to be robust peaks_p = find_peaks(x, prominence=std_dev)[0] peaks_n = find_peaks(-x, prominence=std_dev)[0] # concatenate and sort all segments = sorted(list(set([*peaks_p, *peaks_n, *bottom_up]))) # convert from position to boolean cond = np.zeros(x.shape, dtype=bool) # last value in "segment" is the length cond[segments[:-1]] = True # convert all non-selected values to NaN return cond
def changePointDetection(glacier, attr, startdate=None, enddate=None, \ n_breakpoints=1, method='window', model='l1', wwidth=5): """Use ruptures package to identify change points in glacier time series. Acceptable methods are 'window' (sliding window), 'binseg' (binary segmentation), and bottomup (bottom-up). See https://centre-borelli.github.io/ruptures-docs/user-guide for further information.""" attrs, dates = glacier.filterDates(attr, startdate, enddate) signal = attrs.values sigma = signal.std() n = len(signal) if method == 'window': algo = rpt.Window(width=wwidth, model=model).fit(signal) elif method == 'binseg': algo = rpt.Binseg(model=model).fit(signal) elif method == 'bottomup': algo = rpt.BottomUp(model=model).fit(signal) breakpoints = algo.predict(n_bkps=n_breakpoints) # remove breakpoints at beginning/end of time series if dates.index[0] - 1 in breakpoints: breakpoints.remove(dates.index[0] - 1) if dates.index[-1] in breakpoints: breakpoints.remove(dates.index[-1]) breakpoint_dates = dates[breakpoints] return breakpoint_dates, signal, breakpoints
def get_change_point(series, jump=5, n_bkps=5, pen=10): """ series: numpy array please jump: размер сэмпла n_bkps: количество возвращаемых остановок pen: пенальти для Pelt """ series = series.values alg_dynp = rpt.Dynp(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_pelt = rpt.Pelt(jump=jump).fit_predict(series, pen=pen) alg_bin = rpt.Binseg(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_bot = rpt.BottomUp(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_win = rpt.Window(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_cumsum = change_point_detection(series.tolist()) # Получили разладки от нескольких алгоритмов # Теперь найдём точки, которые предсказывались алгоритмами несколько раз res = {} for i in alg_dynp + alg_pelt + alg_bin + alg_bot + alg_win + alg_cumsum: if i in res: res[i] += 1 else: res[i] = 1 del res[0] del res[len(series)] itemMaxValue = max(res.items(), key=lambda x: x[1]) listOfKeys = [] for key, value in res.items(): if value == itemMaxValue[1]: listOfKeys.append(key) return listOfKeys
def detect_data_shifts(series, filtering=True, use_default_models=True, method=None, cost=None, penalty=40): """ Detect data shifts in a time series of daily values. .. warning:: If the passed time series is less than 2 years in length, it will not be corrected for seasonality. Data shift detection will be run on the min-max normalized time series with no seasonality correction. Parameters ---------- series : Pandas series with datetime index. Time series of daily PV data values, which can include irradiance and power data. filtering : Boolean, default True. Whether or not to filter out outliers and stale data from the time series. If True, then this data is filtered out before running the data shift detection sequence. If False, this data is not filtered out. Default set to True. use_default_models: Boolean, default True If True, then default change point detection search parameters are used. For time series shorter than 2 years in length, the search function is `rpt.Window` with `model='rbf'`, `width=50` and `penalty=30`. For time series 2 years or longer in length, the search function is `rpt.BottomUp` with `model='rbf'` and `penalty=40`. method: ruptures search method instance or None, default None. Ruptures search method instance. See https://centre-borelli.github.io/ruptures-docs/user-guide/. cost: str or None, default None Cost function passed to the ruptures changepoint search instance. See https://centre-borelli.github.io/ruptures-docs/user-guide/ penalty: int, default 40 Penalty value passed to the ruptures changepoint detection method. Default set to 40. Returns ------- Pandas Series Series of boolean values with the input Series' datetime index, where detected changepoints are labeled as True, and all other values are labeled as False. References ------- .. [1] Perry K., and Muller, M. "Automated shift detection in sensor-based PV power and irradiance time series", 2022 IEEE 48th Photovoltaic Specialists Conference (PVSC). """ try: import ruptures as rpt except ImportError: raise ImportError("data_shifts() requires ruptures.") # Run data checks on cleaned data to make sure that the data can be run # successfully through the routine _run_data_checks(series) # Run the filtering sequence, if marked as True if filtering: series_filtered = _erroneous_filter(series) # Drop any duplicated data from the time series series_filtered = series_filtered.drop_duplicates() # Check if the time series is more than 2 years long. If so, remove # seasonality. If not, run analysis on the normalized time series if (series_filtered.index.max() - series_filtered.index.min()).days <= 730: series_processed = _preprocess_data(series_filtered, remove_seasonality=False) seasonality_rmv = False else: # Perform pre-processing on the time series, to get the # seasonality-removed time series. series_processed = _preprocess_data(series_filtered, remove_seasonality=True) seasonality_rmv = True points = np.array(series_processed.dropna()) # If seasonality has been removed and default model is used, run # BottomUp method if (seasonality_rmv) & (use_default_models): algo = rpt.BottomUp(model='rbf').fit(points) result = algo.predict(pen=40) # If there is no seasonality but default model is used, run # Window-based method elif (not seasonality_rmv) & (use_default_models): algo = rpt.Window(model='rbf', width=50).fit(points) result = algo.predict(pen=30) # Otherwise run changepoint detection with the passed parameters else: algo = method(model=cost).fit(points) result = algo.predict(pen=penalty) # Remove the last index of the time series, if present if len(points) in result: result.remove(len(points)) # Return a list of dates where changepoints are detected series_processed.index.name = "datetime" mask = pd.Series(False, index=series_processed.index) mask.iloc[result] = True # Re-index the mask to include any timestamps that were # filtered out as outliers mask = mask.reindex(series.index, fill_value=False) return mask
def calculate_breakpoints(self, X): model = "l1" # "l2", "rbf" algo = rpt.BottomUp(model=model, min_size=1, jump=1).fit(X) breaks = algo.predict(pen=np.log(X.shape[0]) * X.shape[1] * 2**2) return breaks
# need ResultsTrend ofn = "./ResultsCPD/" + os.path.basename(fn).split(".")[0] no_cases = len(df['category_column'].unique()) df[df.columns[0]] = df[df.columns[0]].apply(pd.to_numeric) df[df.columns[1]] = pd.to_datetime(df[df.columns[1]], format="%Y-%m-%d %H:%M:%S") df[df.columns[2]] = pd.to_datetime(df[df.columns[2]], format="%Y-%m-%d %H:%M:%S") df[df.columns[3:]] = df[df.columns[3:]].apply(pd.to_numeric) df = df.set_index(df[df.columns[0]]) for category in df['category_column'].unique(): if category == 1: X = df.loc[df['category_column'] == category]['p1_current'].values indexes = df.loc[df['category_column'] == category].index.values algo = rpt.BottomUp(model="l2") result = algo.fit_predict(X, n_bkps=n_bkps) x = [] for idx in result[:-1]: x.append(indexes[idx]) y = [] for idx in x: y.append(df.loc[df.index == idx]['p1_current'].values[0]) plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal') plt.scatter(x, y, label='outlier', color='red', marker='o') plt.title("Change Finder Bottom Up p1_current") plt.xlabel('Date Time') plt.ylabel('p1_current') plt.savefig( ofn + "_BottomUp_p1_current.png") plt.show()