Example #1
0
def test_paa():
    """Testing 'paa'"""

    # Parameter
    X = np.arange(30)
    X_size = 30

    # Test 1
    arr_actual = paa(X, X_size, window_size=2, overlapping=0.)
    arr_desired = np.arange(0.5, 30, 2)
    np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)

    # Test 2
    arr_actual = paa(X, X_size, window_size=3, overlapping=0.)
    arr_desired = np.arange(1, 30, 3)
    np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)

    # Test 3
    arr_actual = paa(X, X_size, window_size=5, overlapping=0.)
    arr_desired = np.arange(2, 30, 5)
    np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)
Example #2
0
def plot_paa(ts,
             window_size=None,
             output_size=None,
             overlapping=True,
             output_file=None,
             **kwargs):
    """Plot the time series before and after PAA transformation.

    Parameters
    ----------
    ts : np.array, shape = [n_features]
        time series to plot

    window_size : int or None (default = None)
        size of the sliding window

    output_size : int or None (default = None)
        size of the returned time series

    overlapping : bool (default = True)
        when output_size is specified, the window_size is fixed
        if overlapping is True and may vary if overlapping is False.
        Ignored if window_size is specified.

    output_file : str or None (default = None)
        if str, save the figure.

    kwargs : keyword arguments
        kwargs for matplotlib.pyplot.plot
    """

    # Check input data
    if not (isinstance(ts, np.ndarray) and ts.ndim == 1):
        raise ValueError("'ts' must be a 1-dimensional np.ndarray.")

    # Size of ts
    ts_size = ts.size

    # Check parameters and compute window_size if output_size is given
    if (window_size is None and output_size is None):
        raise ValueError("'window_size' xor 'output_size' must be specified.")
    elif (window_size is not None and output_size is not None):
        raise ValueError("'window_size' xor 'output_size' must be specified.")
    elif (window_size is not None and output_size is None):
        if not isinstance(overlapping, (float, int)):
            raise TypeError("'overlapping' must be a boolean.")
        if not isinstance(window_size, int):
            raise TypeError("'window_size' must be an integer.")
        if window_size < 1:
            raise ValueError("'window_size' must be greater or equal than 1.")
        if window_size > ts_size:
            raise ValueError(
                "'window_size' must be lower or equal than the size of each time series."
            )
    else:
        if not isinstance(overlapping, (float, int)):
            raise TypeError("'overlapping' must be a boolean.")
        if not isinstance(output_size, int):
            raise TypeError("'output_size' must be an integer.")
        if output_size < 1:
            raise ValueError("'output_size' must be greater or equal than 1.")
        if output_size > ts_size:
            raise ValueError(
                "'output_size' must be lower or equal than the size of each time series."
            )
        window_size = ts_size // output_size
        window_size += 0 if ts_size % output_size == 0 else 1

    indices, mean = paa(ts,
                        ts.size,
                        window_size,
                        overlapping,
                        output_size,
                        plot=True)
    indices_len = len(indices)

    plt.plot(ts, color='#1f77b4', **kwargs)
    for i in range(indices_len):
        plt.plot(indices[i], np.repeat(mean[i], indices[i].size), 'r-')

    plt.axvline(x=indices[0][0], ls='--', linewidth=1, color='k')
    for i in range(indices_len - 1):
        plt.axvline(x=(indices[i][-1] + indices[i + 1][0]) / 2,
                    ls='--',
                    linewidth=1,
                    color='k')
    plt.axvline(x=indices[indices_len - 1][-1],
                ls='--',
                linewidth=1,
                color='k')

    if output_file is not None:
        plt.savefig(output_file)

    # Show plot
    plt.show()
Example #3
0
def plot_paa_sax(ts,
                 window_size=None,
                 output_size=None,
                 overlapping=True,
                 n_bins=8,
                 quantiles='gaussian',
                 output_file=None,
                 **kwargs):
    """Plot the original time series, the time series after PAA
    transformation and the time series after PAA and SAX transformations.

    Parameters
    ----------
    ts : np.array, shape = [n_features]
        time series to plot

    window_size : int or None (default = None)
        size of the sliding window

    output_size : int or None (default = None)
        size of the returned time series

    overlapping : bool (default = True)
        when output_size is specified, the window_size is fixed
        if overlapping is True and may vary if overlapping is False.
        Ignored if window_size is specified.

    n_bins : int (default = 8)
        number of bins (also known as the size of the alphabet)

    quantiles : str (default = 'gaussian')
        the way to compute quantiles. Possible values:

            - 'gaussian' : quantiles from a gaussian distribution N(0,1)
            - 'empirical' : empirical quantiles

    output_file : str or None (default = None)
        if str, save the figure.

    kwargs : keyword arguments
        kwargs for matplotlib.pyplot.plot
    """

    # Check input data
    if not (isinstance(ts, np.ndarray) and ts.ndim == 1):
        raise ValueError("'ts' must be a 1-dimensional np.ndarray.")

    # Size of ts
    ts_size = ts.size

    # Check parameters for PAA and compute window_size if output_size is given
    if (window_size is None and output_size is None):
        raise ValueError("'window_size' xor 'output_size' must be specified.")
    elif (window_size is not None and output_size is not None):
        raise ValueError("'window_size' xor 'output_size' must be specified.")
    elif (window_size is not None and output_size is None):
        if not isinstance(overlapping, (float, int)):
            raise TypeError("'overlapping' must be a boolean.")
        if not isinstance(window_size, int):
            raise TypeError("'window_size' must be an integer.")
        if window_size < 1:
            raise ValueError("'window_size' must be greater or equal than 1.")
        if window_size > ts_size:
            raise ValueError(
                "'window_size' must be lower or equal than the size 'ts'.")
    else:
        if not isinstance(overlapping, (float, int)):
            raise TypeError("'overlapping' must be a boolean.")
        if not isinstance(output_size, int):
            raise TypeError("'output_size' must be an integer.")
        if output_size < 1:
            raise ValueError("'output_size' must be greater or equal than 1.")
        if output_size > ts_size:
            raise ValueError(
                "'output_size' must be lower or equal than the size of 'ts'.")
        window_size = ts_size // output_size
        window_size += 0 if ts_size % output_size == 0 else 1

    # Check parameters for SAX
    if not isinstance(n_bins, int):
        raise TypeError("'n_bins' must be an integer")
    if n_bins < 2:
        raise ValueError("'n_bins' must be greater or equal than 2")
    if n_bins > 52:
        raise ValueError("'n_bins' must be lower or equal than 52")
    if quantiles not in ['gaussian', 'empirical']:
        raise ValueError(
            "'quantiles' must be either 'gaussian' or 'empirical'")

    indices, ts_paa = paa(ts,
                          ts.size,
                          window_size,
                          overlapping,
                          output_size,
                          plot=True)
    indices_len = len(indices)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    plt.plot(ts, color='#1f77b4', **kwargs)
    for i in range(indices_len):
        plt.plot(indices[i], np.repeat(ts_paa[i], indices[i].size), 'r-')

    plt.axvline(x=indices[0][0], ls='--', linewidth=1, color='k')
    for i in range(indices_len - 1):
        plt.axvline(x=(indices[i][-1] + indices[i + 1][0]) / 2,
                    ls='--',
                    linewidth=1,
                    color='k')
    plt.axvline(x=indices[indices_len - 1][-1],
                ls='--',
                linewidth=1,
                color='k')

    # Alphabet
    alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

    # Compute gaussian quantiles if quantiles == 'gaussian'
    if quantiles == 'gaussian':
        quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:])
        ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True)
    else:
        quantiles, ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True)

    for i in range(n_bins - 1):
        plt.axhline(y=quantiles[i], ls='--', lw=1, color='g')

    x_lim = ax.get_xlim()
    y_lim = ax.get_ylim()
    for i in range(indices_len):
        x_pos = (np.percentile(indices[i], [50]) - x_lim[0]) / (x_lim[1] -
                                                                x_lim[0])
        y_pos = (ts_paa[i] - y_lim[0]) / (y_lim[1] - y_lim[0])
        ax.text(x_pos,
                y_pos,
                ts_sax[i],
                horizontalalignment='center',
                verticalalignment='bottom',
                transform=ax.transAxes,
                color='m',
                fontsize=25)

    if output_file is not None:
        plt.savefig(output_file)

    # Show plot
    plt.show()