def membership_mat(bkps): """Return membership matrix for the given segmentation.""" n_samples = bkps[-1] m_mat = np.zeros((n_samples, n_samples)) for start, end in pairwise([0] + bkps): m_mat[start:end, start:end] = 1 return m_mat
def _seg(self, n_bkps=None, pen=None, epsilon=None): """Computes the binary segmentation. The stopping rule depends on the parameter passed to the function. Args: n_bkps (int): number of breakpoints to find before stopping. penalty (float): penalty value (>0) epsilon (float): reconstruction budget (>0) Returns: dict: partition dict {(start, end): cost value,...} """ # initialization bkps = [self.n_samples] stop = False while not stop: stop = True new_bkps = [ self.single_bkp(start, end) for start, end in pairwise([0] + bkps) ] bkp, gain = max(new_bkps, key=lambda x: x[1]) if bkp is None: # all possible configuration have been explored. break stop = True if n_bkps is not None: if len(bkps) - 1 < n_bkps: stop = False elif pen is not None: if gain > pen: stop = False elif epsilon is not None: error = self.cost.sum_of_costs(bkps) if error > epsilon: stop = False if not stop: bkps.append(bkp) bkps.sort() partition = {(start, end): self.cost.error(start, end) for start, end in pairwise([0] + bkps)} return partition
def sum_of_costs(self, bkps): """Returns the sum of segments cost for the given segmentation. Args: bkps (list): list of change points. By convention, bkps[-1]==n_samples. Returns: float: sum of costs """ soc = sum(self.error(start, end) for start, end in pairwise([0] + bkps)) return soc
def _seg(self, n_bkps=None, pen=None, epsilon=None): """Computes the binary segmentation. The stopping rule depends on the parameter passed to the function. Args: n_bkps (int): number of breakpoints to find before stopping. penalty (float): penalty value (>0) epsilon (float): reconstruction budget Returns: list: list of breakpoint indexes """ stop = False bkps = [self.n_samples] residual = self.signal inds = np.arange(1, self.n_samples) correction = 1 / inds + 1 / inds[::-1] while not stop: res_norm = norm(residual) # greedy search raw_corr = np.sum(residual.cumsum(axis=0)**2, axis=1) correlation = raw_corr[:-1].flatten() * correction bkp_opt, _ = max(enumerate(correlation, start=1), key=lambda x: x[1]) # orthogonal projection proj = np.zeros(self.signal.shape) for (start, end) in pairwise(sorted([0, bkp_opt] + bkps)): proj[start:end] = self.signal[start:end].mean(axis=0) residual = self.signal - proj # stopping criterion stop = True if n_bkps is not None: if len(bkps) - 1 < n_bkps: stop = False elif pen is not None: if res_norm - norm(residual) > pen: stop = False elif epsilon is not None: if norm(residual) > epsilon: stop = False # update if not stop: res_norm = norm(residual) bkps.append(bkp_opt) bkps.sort() return bkps
def plot_cp(df_signal, lst_cp_true, lst_cp_det=None, title=False): ''' Visualisation of signal and cps. True cps are represented by change in background color, detected ones as dashed lines. Visualisation was created by altering ruptures.show [1] Parameters ---------- df_signal (pd.DataFrame): signal in df form - index can be timestamp lst_cp_true (list): list of ints representing true cp-locations lst_cp_det (list): optional list of ints representing detected cp-locations title (string): optional figure title Returns ------- none ''' font = {'size': 12} plt.rc('font', **font) color_cycle = cycle(["silver", [.65, 0, 0]]) alpha = 0.2 fig, ax = plt.subplots(figsize=(10, 2.25)) if type(title) == str: ax.set_title(title) ax.plot(df_signal, c='k') ax.set_xlim(df_signal.index[0], df_signal.index[-1]) bkps = [0] + lst_cp_true bkps[-1] = bkps[-1] - 1 index = df_signal.index for (start, end), col in zip(pairwise(bkps), color_cycle): ax.axvspan(index[start], index[end], facecolor=col, alpha=alpha) if type(lst_cp_det) == list: bkps = lst_cp_det[:-1] for bkp_det in bkps: ax.axvline(index[bkp_det], c='darkred', lw=3, ls='--') plt.legend(['signal (pre-processed)', 'CP detected'])
def _seg(self, n_bkps=None, pen=None, epsilon=None): """Compute the bottom-up segmentation. The stopping rule depends on the parameter passed to the function. Args: n_bkps (int): number of breakpoints to find before stopping. penalty (float): penalty value (>0) epsilon (float): reconstruction budget (>0) Returns: dict: partition dict {(start, end): cost value,...} """ leaves = list(self.leaves) # bottom up fusion stop = False while not stop: stop = True leaves.sort(key=lambda n: n.start) merged = (self.merge(left, right) for left, right in pairwise(leaves)) # find segment to merge try: leaf = min(merged, key=lambda n: n.gain) except ValueError: # if merged is empty (all nodes have been merged). break if n_bkps is not None: if len(leaves) > n_bkps + 1: stop = False elif pen is not None: if leaf.gain < pen: stop = False elif epsilon is not None: if sum(leaf_tmp.val for leaf_tmp in leaves) < epsilon: stop = False if not stop: leaves.remove(leaf.left) leaves.remove(leaf.right) leaves += [leaf] partition = {(leaf.start, leaf.end): leaf.val for leaf in leaves} return partition
def _seg(self, n_bkps=None, pen=None, epsilon=None): """Computes the binary segmentation. The stopping rule depends on the parameter passed to the function. Args: n_bkps (int): number of breakpoints to find before stopping. penalty (float): penalty value (>0) epsilon (float): reconstruction budget Returns: list: list of breakpoint indexes """ stop = False bkps = [self.n_samples] inds = np.arange(1, self.n_samples + 1) csum = self.gram.cumsum(axis=0).cumsum(axis=1) residual = csum[-1, -1] while not stop: # greedy search correlation = np.diag(csum) * self.n_samples * self.n_samples correlation += inds**2 * csum[-1, -1] correlation -= 2 * self.n_samples * inds * csum[-1] correlation /= inds * inds[::-1] bkp = np.argmax(correlation) + 1 # orthogonal projection (matrix form) # adj = np.zeros(self.gram.shape) # adjacency matrix # for start, end in pairwise(sorted([0, bkp] + bkps)): # duree = end - start # adj[start:end, start:end] = np.ones(duree, duree) / duree # gram_new = self.gram + adj @ self.gram @ adj - adj @ self.gram - self.gram @ adj # csum = gram_new.cumsum(axis=0).cumsum(axis=1) # orthogonal projection (vectorized form) gram_new = self.gram.copy() # cross product cross_g = np.zeros(self.gram.shape) for start, end in pairwise(sorted([0, bkp] + bkps)): val = self.gram[:, start:end].mean(axis=1).reshape(-1, 1) cross_g[:, start:end] = val gram_new -= cross_g + cross_g.T # products of segment means for p, q in product(pairwise(sorted([0, bkp] + bkps)), repeat=2): start1, end1 = p start2, end2 = q gram_new[start1:end1, start2:end2] += self.gram[start1:end1, start2:end2].mean() csum = gram_new.cumsum(axis=0).cumsum(axis=1) # stopping criterion stop = True if n_bkps is not None: if len(bkps) - 1 < n_bkps: stop = False elif pen is not None: if residual - csum[-1, -1] > pen: stop = False elif epsilon is not None: if csum[-1, -1] > epsilon: stop = False # update if not stop: residual = csum[-1, -1] bkps.append(bkp) bkps.sort() return bkps
def _seg(self, n_bkps=None, pen=None, epsilon=None): """Compute the bottom-up segmentation. The stopping rule depends on the parameter passed to the function. Args: n_bkps (int): number of breakpoints to find before stopping. penalty (float): penalty value (>0) epsilon (float): reconstruction budget (>0) Returns: dict: partition dict {(start, end): cost value,...} """ leaves = sorted(self.leaves) removed = set() merged = [] for left, right in pairwise(leaves): candidate = self.merge(left, right) heapq.heappush(merged, (candidate.gain, candidate)) # bottom up fusion stop = False while not stop: stop = True try: gain, leaf = heapq.heappop(merged) # Ignore any merge candidates whose left or right children # no longer exist (because they were merged with another node). # It's cheaper to do this here than during the initial merge. while leaf.left in removed or leaf.right in removed: gain, leaf = heapq.heappop(merged) # if merged is empty (all nodes have been merged). except IndexError: break if n_bkps is not None: if len(leaves) > n_bkps + 1: stop = False elif pen is not None: if gain < pen: stop = False elif epsilon is not None: if sum(leaf_tmp.val for leaf_tmp in leaves) < epsilon: stop = False if not stop: # updates the list of leaves (i.e. segments of the partitions) # find the merged segments indexes keys = [leaf.start for leaf in leaves] left_idx = bisect_left(keys, leaf.left.start) leaves[left_idx] = leaf # replace leaf.left del leaves[left_idx + 1] # remove leaf.right # add to the set of removed segments. removed.add(leaf.left) removed.add(leaf.right) # add new merge candidates if left_idx > 0: left_candidate = self.merge(leaves[left_idx - 1], leaf) heapq.heappush(merged, (left_candidate.gain, left_candidate)) if left_idx < len(leaves) - 1: right_candidate = self.merge(leaf, leaves[left_idx + 1]) heapq.heappush(merged, (right_candidate.gain, right_candidate)) partition = {(leaf.start, leaf.end): leaf.val for leaf in leaves} return partition
def display(signal, true_chg_pts, computed_chg_pts=None, **kwargs): """ Display a signal and the change points provided in alternating colors. If another set of change point is provided, they are displayed with dashed vertical dashed lines. Args: signal (array): signal array, shape (n_samples,) or (n_samples, n_features). true_chg_pts (list): list of change point indexes. computed_chg_pts (list, optional): list of change point indexes. Returns: tuple: (figure, axarr) with a :class:`matplotlib.figure.Figure` object and an array of Axes objects. """ try: import matplotlib.pyplot as plt except ImportError: raise MatplotlibMissingError( 'This feature requires the optional dependency matpotlib, you can install it using `pip install matplotlib`.' ) if type(signal) != np.ndarray: # Try to get array from Pandas dataframe signal = signal.values if signal.ndim == 1: signal = signal.reshape(-1, 1) n_samples, n_features = signal.shape # let's set all options figsize = (10, 2 * n_features) # figure size alpha = 0.2 # transparency of the colored background color = "k" # color of the lines indicating the computed_chg_pts linewidth = 3 # linewidth of the lines indicating the computed_chg_pts linestyle = "--" # linestyle of the lines indicating the computed_chg_pts if "figsize" in kwargs: figsize = kwargs["figsize"] if "alpha" in kwargs: alpha = kwargs["alpha"] if "color" in kwargs: color = kwargs["color"] if "linewidth" in kwargs: linewidth = kwargs["linewidth"] if "linestyle" in kwargs: linestyle = kwargs["linestyle"] fig, axarr = plt.subplots(n_features, figsize=figsize, sharex=True) if n_features == 1: axarr = [axarr] for axe, sig in zip(axarr, signal.T): color_cycle = cycle(COLOR_CYCLE) # plot s axe.plot(range(n_samples), sig) # color each (true) regime bkps = [0] + sorted(true_chg_pts) for (start, end), col in zip(pairwise(bkps), color_cycle): axe.axvspan(max(0, start - 0.5), end - 0.5, facecolor=col, alpha=alpha) # vertical lines to mark the computed_chg_pts if computed_chg_pts is not None: for bkp in computed_chg_pts: if bkp != 0 and bkp < n_samples: axe.axvline(x=bkp - 0.5, color=color, linewidth=linewidth, linestyle=linestyle) fig.tight_layout() return fig, axarr
def display(signal, true_chg_pts, computed_chg_pts=None, **kwargs): """ Display a signal and the change points provided in alternating colors. If another set of change point is provided, they are displayed with dashed vertical dashed lines. The following matplotlib subplots options is set by default, but can be changed when calling `display`): - "figsize": (10, 2 * n_features), # figure size Args: signal (array): signal array, shape (n_samples,) or (n_samples, n_features). true_chg_pts (list): list of change point indexes. computed_chg_pts (list, optional): list of change point indexes. **kwargs : all additional keyword arguments are passed to the plt.subplots call. Returns: tuple: (figure, axarr) with a :class:`matplotlib.figure.Figure` object and an array of Axes objects. """ try: import matplotlib.pyplot as plt except ImportError: raise MatplotlibMissingError( 'This feature requires the optional dependency matpotlib, you can install it using `pip install matplotlib`.' ) if type(signal) != np.ndarray: # Try to get array from Pandas dataframe signal = signal.values if signal.ndim == 1: signal = signal.reshape(-1, 1) n_samples, n_features = signal.shape # let's set a sensible defaut size for the subplots matplotlib_options = { "figsize": (10, 2 * n_features), # figure size } # add/update the options given by the user matplotlib_options.update(kwargs) # create plots fig, axarr = plt.subplots(n_features, sharex=True, **matplotlib_options) if n_features == 1: axarr = [axarr] for axe, sig in zip(axarr, signal.T): color_cycle = cycle(COLOR_CYCLE) # plot s axe.plot(range(n_samples), sig) # color each (true) regime bkps = [0] + sorted(true_chg_pts) alpha = 0.2 # transparency of the colored background for (start, end), col in zip(pairwise(bkps), color_cycle): axe.axvspan(max(0, start - 0.5), end - 0.5, facecolor=col, alpha=alpha) color = "k" # color of the lines indicating the computed_chg_pts linewidth = 3 # linewidth of the lines indicating the computed_chg_pts linestyle = "--" # linestyle of the lines indicating the computed_chg_pts # vertical lines to mark the computed_chg_pts if computed_chg_pts is not None: for bkp in computed_chg_pts: if bkp != 0 and bkp < n_samples: axe.axvline(x=bkp - 0.5, color=color, linewidth=linewidth, linestyle=linestyle) fig.tight_layout() return fig, axarr
def seg(self, n_bkps=None, pen=None, epsilon=None): """Computes the greedy segmentation. The stopping rule depends on the parameter passed to the function. Args: n_bkps (int): number of breakpoints to find before stopping. penalty (float): penalty value (>0) epsilon (float): reconstruction budget Returns: list: list of breakpoint indexes """ stop = False bkps = [self.n_samples] inds = np.arange(self.jump, self.n_samples - self.jump, self.jump) residual = self.signal res_norm = residual.var() * self.n_samples while not stop: # greedy search res_list = list() for ind in inds: # greedy search res_tmp = 0 y_left, y_right = residual[:ind], residual[ind:] x_left, x_right = self.covariates[:ind], self.covariates[ind:] for x, y in zip((x_left, x_right), (y_left, y_right)): # linear fit _, res_sub, _, _ = lstsq(x, y) # error on sub-signal res_tmp += res_sub res_list.append(res_tmp) # find best index _, bkp_opt = min(zip(res_list, inds)) # orthogonal projection proj = np.zeros(self.signal.shape) for start, end in pairwise(sorted([0, bkp_opt] + bkps)): y = self.signal[start:end] x = self.covariates[start:end] coef, _, _, _ = lstsq(x, y) proj[start:end] = x.dot(coef).reshape(-1, 1) residual = self.signal - proj # stopping criterion stop = True if n_bkps is not None: if len(bkps) - 1 < n_bkps: stop = False elif pen is not None: if res_norm - residual.var() * self.n_samples > pen: stop = False elif epsilon is not None: if residual.var() * self.n_samples > epsilon: stop = False # update if not stop: res_norm = residual.var() * self.n_samples bkps.append(bkp_opt) bkps.sort() return bkps