Beispiel #1
0
def membership_mat(bkps):
    """Return membership matrix for the given segmentation."""
    n_samples = bkps[-1]
    m_mat = np.zeros((n_samples, n_samples))
    for start, end in pairwise([0] + bkps):
        m_mat[start:end, start:end] = 1
    return m_mat
Beispiel #2
0
    def _seg(self, n_bkps=None, pen=None, epsilon=None):
        """Computes the binary segmentation.

        The stopping rule depends on the parameter passed to the function.

        Args:
            n_bkps (int): number of breakpoints to find before stopping.
            penalty (float): penalty value (>0)
            epsilon (float): reconstruction budget (>0)

        Returns:
            dict: partition dict {(start, end): cost value,...}
        """

        # initialization
        bkps = [self.n_samples]
        stop = False
        while not stop:
            stop = True
            new_bkps = [
                self.single_bkp(start, end)
                for start, end in pairwise([0] + bkps)
            ]
            bkp, gain = max(new_bkps, key=lambda x: x[1])

            if bkp is None:  # all possible configuration have been explored.
                break

            stop = True
            if n_bkps is not None:
                if len(bkps) - 1 < n_bkps:
                    stop = False
            elif pen is not None:
                if gain > pen:
                    stop = False
            elif epsilon is not None:
                error = self.cost.sum_of_costs(bkps)
                if error > epsilon:
                    stop = False

            if not stop:
                bkps.append(bkp)
                bkps.sort()
        partition = {(start, end): self.cost.error(start, end)
                     for start, end in pairwise([0] + bkps)}
        return partition
Beispiel #3
0
    def sum_of_costs(self, bkps):
        """Returns the sum of segments cost for the given segmentation.

        Args:
            bkps (list): list of change points. By convention, bkps[-1]==n_samples.

        Returns:
            float: sum of costs
        """
        soc = sum(self.error(start, end) for start, end in pairwise([0] + bkps))
        return soc
Beispiel #4
0
    def _seg(self, n_bkps=None, pen=None, epsilon=None):
        """Computes the binary segmentation.

        The stopping rule depends on the parameter passed to the function.

        Args:
            n_bkps (int): number of breakpoints to find before stopping.
            penalty (float): penalty value (>0)
            epsilon (float): reconstruction budget

        Returns:
            list: list of breakpoint indexes
        """
        stop = False
        bkps = [self.n_samples]
        residual = self.signal
        inds = np.arange(1, self.n_samples)
        correction = 1 / inds + 1 / inds[::-1]

        while not stop:
            res_norm = norm(residual)
            # greedy search
            raw_corr = np.sum(residual.cumsum(axis=0)**2, axis=1)
            correlation = raw_corr[:-1].flatten() * correction
            bkp_opt, _ = max(enumerate(correlation, start=1),
                             key=lambda x: x[1])

            # orthogonal projection
            proj = np.zeros(self.signal.shape)
            for (start, end) in pairwise(sorted([0, bkp_opt] + bkps)):
                proj[start:end] = self.signal[start:end].mean(axis=0)
            residual = self.signal - proj

            # stopping criterion
            stop = True
            if n_bkps is not None:
                if len(bkps) - 1 < n_bkps:
                    stop = False
            elif pen is not None:
                if res_norm - norm(residual) > pen:
                    stop = False
            elif epsilon is not None:
                if norm(residual) > epsilon:
                    stop = False
            # update
            if not stop:
                res_norm = norm(residual)
                bkps.append(bkp_opt)

        bkps.sort()
        return bkps
Beispiel #5
0
def plot_cp(df_signal, lst_cp_true, lst_cp_det=None, title=False):
    '''
    Visualisation of signal and cps. True cps are represented by change in 
    background color, detected ones as dashed lines. Visualisation was created
    by altering ruptures.show [1]

    Parameters
    ----------
    df_signal (pd.DataFrame): signal in df form - index can be timestamp
    lst_cp_true (list): list of ints representing true cp-locations
    lst_cp_det (list): optional list of ints representing detected cp-locations
    title (string): optional figure title

    Returns
    -------
    none


    '''

    font = {'size': 12}
    plt.rc('font', **font)
    color_cycle = cycle(["silver", [.65, 0, 0]])
    alpha = 0.2

    fig, ax = plt.subplots(figsize=(10, 2.25))

    if type(title) == str:
        ax.set_title(title)

    ax.plot(df_signal, c='k')
    ax.set_xlim(df_signal.index[0], df_signal.index[-1])

    bkps = [0] + lst_cp_true
    bkps[-1] = bkps[-1] - 1
    index = df_signal.index

    for (start, end), col in zip(pairwise(bkps), color_cycle):
        ax.axvspan(index[start], index[end], facecolor=col, alpha=alpha)

    if type(lst_cp_det) == list:
        bkps = lst_cp_det[:-1]

        for bkp_det in bkps:
            ax.axvline(index[bkp_det], c='darkred', lw=3, ls='--')

        plt.legend(['signal (pre-processed)', 'CP detected'])
Beispiel #6
0
    def _seg(self, n_bkps=None, pen=None, epsilon=None):
        """Compute the bottom-up segmentation.

        The stopping rule depends on the parameter passed to the function.

        Args:
            n_bkps (int): number of breakpoints to find before stopping.
            penalty (float): penalty value (>0)
            epsilon (float): reconstruction budget (>0)

        Returns:
            dict: partition dict {(start, end): cost value,...}
        """
        leaves = list(self.leaves)
        # bottom up fusion
        stop = False
        while not stop:
            stop = True
            leaves.sort(key=lambda n: n.start)
            merged = (self.merge(left, right)
                      for left, right in pairwise(leaves))
            # find segment to merge
            try:
                leaf = min(merged, key=lambda n: n.gain)
            except ValueError:  # if merged is empty (all nodes have been merged).
                break

            if n_bkps is not None:
                if len(leaves) > n_bkps + 1:
                    stop = False
            elif pen is not None:
                if leaf.gain < pen:
                    stop = False
            elif epsilon is not None:
                if sum(leaf_tmp.val for leaf_tmp in leaves) < epsilon:
                    stop = False

            if not stop:
                leaves.remove(leaf.left)
                leaves.remove(leaf.right)
                leaves += [leaf]

        partition = {(leaf.start, leaf.end): leaf.val for leaf in leaves}
        return partition
Beispiel #7
0
    def _seg(self, n_bkps=None, pen=None, epsilon=None):
        """Computes the binary segmentation.

        The stopping rule depends on the parameter passed to the function.

        Args:
            n_bkps (int): number of breakpoints to find before stopping.
            penalty (float): penalty value (>0)
            epsilon (float): reconstruction budget

        Returns:
            list: list of breakpoint indexes
        """
        stop = False
        bkps = [self.n_samples]
        inds = np.arange(1, self.n_samples + 1)
        csum = self.gram.cumsum(axis=0).cumsum(axis=1)
        residual = csum[-1, -1]

        while not stop:
            # greedy search
            correlation = np.diag(csum) * self.n_samples * self.n_samples
            correlation += inds**2 * csum[-1, -1]
            correlation -= 2 * self.n_samples * inds * csum[-1]
            correlation /= inds * inds[::-1]
            bkp = np.argmax(correlation) + 1

            # orthogonal projection (matrix form)
            # adj = np.zeros(self.gram.shape)  # adjacency matrix
            # for start, end in pairwise(sorted([0, bkp] + bkps)):
            #     duree = end - start
            #     adj[start:end, start:end] = np.ones(duree, duree) / duree
            # gram_new = self.gram + adj @ self.gram @ adj - adj @ self.gram - self.gram @ adj
            # csum = gram_new.cumsum(axis=0).cumsum(axis=1)

            # orthogonal projection (vectorized form)
            gram_new = self.gram.copy()
            # cross product
            cross_g = np.zeros(self.gram.shape)
            for start, end in pairwise(sorted([0, bkp] + bkps)):
                val = self.gram[:, start:end].mean(axis=1).reshape(-1, 1)
                cross_g[:, start:end] = val
            gram_new -= cross_g + cross_g.T
            # products of segment means
            for p, q in product(pairwise(sorted([0, bkp] + bkps)), repeat=2):
                start1, end1 = p
                start2, end2 = q
                gram_new[start1:end1,
                         start2:end2] += self.gram[start1:end1,
                                                   start2:end2].mean()
            csum = gram_new.cumsum(axis=0).cumsum(axis=1)

            # stopping criterion
            stop = True
            if n_bkps is not None:
                if len(bkps) - 1 < n_bkps:
                    stop = False
            elif pen is not None:
                if residual - csum[-1, -1] > pen:
                    stop = False
            elif epsilon is not None:
                if csum[-1, -1] > epsilon:
                    stop = False
            # update
            if not stop:
                residual = csum[-1, -1]
                bkps.append(bkp)

        bkps.sort()
        return bkps
Beispiel #8
0
    def _seg(self, n_bkps=None, pen=None, epsilon=None):
        """Compute the bottom-up segmentation.

        The stopping rule depends on the parameter passed to the function.

        Args:
            n_bkps (int): number of breakpoints to find before stopping.
            penalty (float): penalty value (>0)
            epsilon (float): reconstruction budget (>0)

        Returns:
            dict: partition dict {(start, end): cost value,...}
        """
        leaves = sorted(self.leaves)
        removed = set()
        merged = []
        for left, right in pairwise(leaves):
            candidate = self.merge(left, right)
            heapq.heappush(merged, (candidate.gain, candidate))
        # bottom up fusion
        stop = False
        while not stop:
            stop = True

            try:
                gain, leaf = heapq.heappop(merged)
                # Ignore any merge candidates whose left or right children
                # no longer exist (because they were merged with another node).
                # It's cheaper to do this here than during the initial merge.
                while leaf.left in removed or leaf.right in removed:
                    gain, leaf = heapq.heappop(merged)
            # if merged is empty (all nodes have been merged).
            except IndexError:
                break

            if n_bkps is not None:
                if len(leaves) > n_bkps + 1:
                    stop = False
            elif pen is not None:
                if gain < pen:
                    stop = False
            elif epsilon is not None:
                if sum(leaf_tmp.val for leaf_tmp in leaves) < epsilon:
                    stop = False

            if not stop:
                # updates the list of leaves (i.e. segments of the partitions)
                # find the merged segments indexes
                keys = [leaf.start for leaf in leaves]
                left_idx = bisect_left(keys, leaf.left.start)
                leaves[left_idx] = leaf  # replace leaf.left
                del leaves[left_idx + 1]  # remove leaf.right
                # add to the set of removed segments.
                removed.add(leaf.left)
                removed.add(leaf.right)
                # add new merge candidates
                if left_idx > 0:
                    left_candidate = self.merge(leaves[left_idx - 1], leaf)
                    heapq.heappush(merged,
                                   (left_candidate.gain, left_candidate))
                if left_idx < len(leaves) - 1:
                    right_candidate = self.merge(leaf, leaves[left_idx + 1])
                    heapq.heappush(merged,
                                   (right_candidate.gain, right_candidate))

        partition = {(leaf.start, leaf.end): leaf.val for leaf in leaves}
        return partition
Beispiel #9
0
def display(signal, true_chg_pts, computed_chg_pts=None, **kwargs):
    """
    Display a signal and the change points provided in alternating colors. If another set of change
    point is provided, they are displayed with dashed vertical dashed lines.

    Args:
        signal (array): signal array, shape (n_samples,) or (n_samples, n_features).
        true_chg_pts (list): list of change point indexes.
        computed_chg_pts (list, optional): list of change point indexes.

    Returns:
        tuple: (figure, axarr) with a :class:`matplotlib.figure.Figure` object and an array of Axes objects.

    """
    try:
        import matplotlib.pyplot as plt
    except ImportError:
        raise MatplotlibMissingError(
            'This feature requires the optional dependency matpotlib, you can install it using `pip install matplotlib`.'
        )

    if type(signal) != np.ndarray:
        # Try to get array from Pandas dataframe
        signal = signal.values

    if signal.ndim == 1:
        signal = signal.reshape(-1, 1)
    n_samples, n_features = signal.shape
    # let's set all options
    figsize = (10, 2 * n_features)  # figure size
    alpha = 0.2  # transparency of the colored background
    color = "k"  # color of the lines indicating the computed_chg_pts
    linewidth = 3  # linewidth of the lines indicating the computed_chg_pts
    linestyle = "--"  # linestyle of the lines indicating the computed_chg_pts

    if "figsize" in kwargs:
        figsize = kwargs["figsize"]
    if "alpha" in kwargs:
        alpha = kwargs["alpha"]
    if "color" in kwargs:
        color = kwargs["color"]
    if "linewidth" in kwargs:
        linewidth = kwargs["linewidth"]
    if "linestyle" in kwargs:
        linestyle = kwargs["linestyle"]

    fig, axarr = plt.subplots(n_features, figsize=figsize, sharex=True)
    if n_features == 1:
        axarr = [axarr]

    for axe, sig in zip(axarr, signal.T):
        color_cycle = cycle(COLOR_CYCLE)
        # plot s
        axe.plot(range(n_samples), sig)

        # color each (true) regime
        bkps = [0] + sorted(true_chg_pts)

        for (start, end), col in zip(pairwise(bkps), color_cycle):
            axe.axvspan(max(0, start - 0.5),
                        end - 0.5,
                        facecolor=col,
                        alpha=alpha)

        # vertical lines to mark the computed_chg_pts
        if computed_chg_pts is not None:
            for bkp in computed_chg_pts:
                if bkp != 0 and bkp < n_samples:
                    axe.axvline(x=bkp - 0.5,
                                color=color,
                                linewidth=linewidth,
                                linestyle=linestyle)

    fig.tight_layout()

    return fig, axarr
Beispiel #10
0
def display(signal, true_chg_pts, computed_chg_pts=None, **kwargs):
    """
    Display a signal and the change points provided in alternating colors. If another set of change
    point is provided, they are displayed with dashed vertical dashed lines.
    The following matplotlib subplots options is set by default, but can be changed when calling `display`):
    - "figsize": (10, 2 * n_features),  # figure size

    Args:
        signal (array): signal array, shape (n_samples,) or (n_samples, n_features).
        true_chg_pts (list): list of change point indexes.
        computed_chg_pts (list, optional): list of change point indexes.
        **kwargs : all additional keyword arguments are passed to the plt.subplots call.

    Returns:
        tuple: (figure, axarr) with a :class:`matplotlib.figure.Figure` object and an array of Axes objects.

    """
    try:
        import matplotlib.pyplot as plt
    except ImportError:
        raise MatplotlibMissingError(
            'This feature requires the optional dependency matpotlib, you can install it using `pip install matplotlib`.'
        )

    if type(signal) != np.ndarray:
        # Try to get array from Pandas dataframe
        signal = signal.values

    if signal.ndim == 1:
        signal = signal.reshape(-1, 1)
    n_samples, n_features = signal.shape

    # let's set a sensible defaut size for the subplots
    matplotlib_options = {
        "figsize": (10, 2 * n_features),  # figure size
    }
    # add/update the options given by the user
    matplotlib_options.update(kwargs)

    # create plots
    fig, axarr = plt.subplots(n_features, sharex=True, **matplotlib_options)
    if n_features == 1:
        axarr = [axarr]

    for axe, sig in zip(axarr, signal.T):
        color_cycle = cycle(COLOR_CYCLE)
        # plot s
        axe.plot(range(n_samples), sig)

        # color each (true) regime
        bkps = [0] + sorted(true_chg_pts)
        alpha = 0.2  # transparency of the colored background

        for (start, end), col in zip(pairwise(bkps), color_cycle):
            axe.axvspan(max(0, start - 0.5),
                        end - 0.5,
                        facecolor=col,
                        alpha=alpha)

        color = "k"  # color of the lines indicating the computed_chg_pts
        linewidth = 3  # linewidth of the lines indicating the computed_chg_pts
        linestyle = "--"  # linestyle of the lines indicating the computed_chg_pts
        # vertical lines to mark the computed_chg_pts
        if computed_chg_pts is not None:
            for bkp in computed_chg_pts:
                if bkp != 0 and bkp < n_samples:
                    axe.axvline(x=bkp - 0.5,
                                color=color,
                                linewidth=linewidth,
                                linestyle=linestyle)

    fig.tight_layout()

    return fig, axarr
Beispiel #11
0
    def seg(self, n_bkps=None, pen=None, epsilon=None):
        """Computes the greedy segmentation.

        The stopping rule depends on the parameter passed to the function.

        Args:
            n_bkps (int): number of breakpoints to find before stopping.
            penalty (float): penalty value (>0)
            epsilon (float): reconstruction budget

        Returns:
            list: list of breakpoint indexes
        """
        stop = False
        bkps = [self.n_samples]
        inds = np.arange(self.jump, self.n_samples - self.jump, self.jump)
        residual = self.signal
        res_norm = residual.var() * self.n_samples

        while not stop:
            # greedy search
            res_list = list()
            for ind in inds:  # greedy search
                res_tmp = 0
                y_left, y_right = residual[:ind], residual[ind:]
                x_left, x_right = self.covariates[:ind], self.covariates[ind:]
                for x, y in zip((x_left, x_right), (y_left, y_right)):
                    # linear fit
                    _, res_sub, _, _ = lstsq(x, y)
                    # error on sub-signal
                    res_tmp += res_sub
                res_list.append(res_tmp)
            # find best index
            _, bkp_opt = min(zip(res_list, inds))

            # orthogonal projection
            proj = np.zeros(self.signal.shape)
            for start, end in pairwise(sorted([0, bkp_opt] + bkps)):
                y = self.signal[start:end]
                x = self.covariates[start:end]
                coef, _, _, _ = lstsq(x, y)
                proj[start:end] = x.dot(coef).reshape(-1, 1)
            residual = self.signal - proj

            # stopping criterion
            stop = True
            if n_bkps is not None:
                if len(bkps) - 1 < n_bkps:
                    stop = False
            elif pen is not None:
                if res_norm - residual.var() * self.n_samples > pen:
                    stop = False
            elif epsilon is not None:
                if residual.var() * self.n_samples > epsilon:
                    stop = False
            # update
            if not stop:
                res_norm = residual.var() * self.n_samples
                bkps.append(bkp_opt)

            bkps.sort()
        return bkps