Exemple #1
0
def test_sax():
    """Testing 'sax'"""

    # Test 1
    X = np.tile(np.arange(3), 5)
    str_actual = sax(X, n_bins=3, quantiles='empirical', alphabet='abc')
    str_desired = ''.join(["a", "b", "c"] * 5)
    np.testing.assert_string_equal(str_actual, str_desired)

    # Test 2
    X = np.repeat(np.arange(-0.75, 1, 0.5), 3)
    str_actual = sax(X, n_bins=4, quantiles='gaussian', alphabet='abcd')
    str_desired = ''.join([a for a in "abcd" for _ in range(3)])
    np.testing.assert_string_equal(str_actual, str_desired)
Exemple #2
0
def plot_sax(ts, n_bins, quantiles='gaussian', output_file=None, **kwargs):
    """Plot the time series before and after SAX transformation.

    Parameters
    ----------
    ts : np.array, shape = [n_features]
        time series to plot

    n_bins : int (default = 8)
        number of bins (also known as the size of the alphabet)

    quantiles : str (default = 'gaussian')
        the way to compute quantiles. Possible values:

            - 'gaussian' : quantiles from a gaussian distribution N(0,1)
            - 'empirical' : empirical quantiles

    output_file : str or None (default = None)
        if str, save the figure.

    kwargs : keyword arguments
        kwargs for matplotlib.pyplot.plot
    """

    # Check input data
    if not (isinstance(ts, np.ndarray) and ts.ndim == 1):
        raise ValueError("'ts' must be a 1-dimensional np.ndarray.")

    # Check parameters
    if not isinstance(n_bins, int):
        raise TypeError("'n_bins' must be an integer.")
    if n_bins < 2:
        raise ValueError("'n_bins' must be greater or equal than 2.")
    if n_bins > 52:
        raise ValueError("'n_bins' must be lower or equal than 52.")
    if quantiles not in ['gaussian', 'empirical']:
        raise ValueError(
            "'quantiles' must be either 'gaussian' or 'empirical'.")

    # Alphabet
    alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

    # Compute gaussian quantiles if quantiles == 'gaussian'
    if quantiles == 'gaussian':
        quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:])
        ts_sax = sax(ts, n_bins, quantiles, alphabet, plot=True)
    else:
        quantiles, ts_sax = sax(ts, n_bins, quantiles, alphabet, plot=True)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    plt.plot(ts, color='r', **kwargs)

    for i in range(n_bins - 1):
        plt.axhline(y=quantiles[i], ls='--', lw=1, color='g')

    if 1:
        x_lim = ax.get_xlim()
        y_lim = ax.get_ylim()
        for i in range(len(ts_sax)):
            x_pos = (i - x_lim[0]) / (x_lim[1] - x_lim[0])
            y_pos = (ts[i] - y_lim[0]) / (y_lim[1] - y_lim[0])
            ax.text(x_pos,
                    y_pos,
                    ts_sax[i],
                    horizontalalignment='center',
                    verticalalignment='bottom',
                    transform=ax.transAxes,
                    color='m',
                    fontsize=25)

    if output_file is not None:
        plt.savefig(output_file)

    # Show plot
    plt.show()
Exemple #3
0
def plot_paa_sax(ts,
                 window_size=None,
                 output_size=None,
                 overlapping=True,
                 n_bins=8,
                 quantiles='gaussian',
                 output_file=None,
                 **kwargs):
    """Plot the original time series, the time series after PAA
    transformation and the time series after PAA and SAX transformations.

    Parameters
    ----------
    ts : np.array, shape = [n_features]
        time series to plot

    window_size : int or None (default = None)
        size of the sliding window

    output_size : int or None (default = None)
        size of the returned time series

    overlapping : bool (default = True)
        when output_size is specified, the window_size is fixed
        if overlapping is True and may vary if overlapping is False.
        Ignored if window_size is specified.

    n_bins : int (default = 8)
        number of bins (also known as the size of the alphabet)

    quantiles : str (default = 'gaussian')
        the way to compute quantiles. Possible values:

            - 'gaussian' : quantiles from a gaussian distribution N(0,1)
            - 'empirical' : empirical quantiles

    output_file : str or None (default = None)
        if str, save the figure.

    kwargs : keyword arguments
        kwargs for matplotlib.pyplot.plot
    """

    # Check input data
    if not (isinstance(ts, np.ndarray) and ts.ndim == 1):
        raise ValueError("'ts' must be a 1-dimensional np.ndarray.")

    # Size of ts
    ts_size = ts.size

    # Check parameters for PAA and compute window_size if output_size is given
    if (window_size is None and output_size is None):
        raise ValueError("'window_size' xor 'output_size' must be specified.")
    elif (window_size is not None and output_size is not None):
        raise ValueError("'window_size' xor 'output_size' must be specified.")
    elif (window_size is not None and output_size is None):
        if not isinstance(overlapping, (float, int)):
            raise TypeError("'overlapping' must be a boolean.")
        if not isinstance(window_size, int):
            raise TypeError("'window_size' must be an integer.")
        if window_size < 1:
            raise ValueError("'window_size' must be greater or equal than 1.")
        if window_size > ts_size:
            raise ValueError(
                "'window_size' must be lower or equal than the size 'ts'.")
    else:
        if not isinstance(overlapping, (float, int)):
            raise TypeError("'overlapping' must be a boolean.")
        if not isinstance(output_size, int):
            raise TypeError("'output_size' must be an integer.")
        if output_size < 1:
            raise ValueError("'output_size' must be greater or equal than 1.")
        if output_size > ts_size:
            raise ValueError(
                "'output_size' must be lower or equal than the size of 'ts'.")
        window_size = ts_size // output_size
        window_size += 0 if ts_size % output_size == 0 else 1

    # Check parameters for SAX
    if not isinstance(n_bins, int):
        raise TypeError("'n_bins' must be an integer")
    if n_bins < 2:
        raise ValueError("'n_bins' must be greater or equal than 2")
    if n_bins > 52:
        raise ValueError("'n_bins' must be lower or equal than 52")
    if quantiles not in ['gaussian', 'empirical']:
        raise ValueError(
            "'quantiles' must be either 'gaussian' or 'empirical'")

    indices, ts_paa = paa(ts,
                          ts.size,
                          window_size,
                          overlapping,
                          output_size,
                          plot=True)
    indices_len = len(indices)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    plt.plot(ts, color='#1f77b4', **kwargs)
    for i in range(indices_len):
        plt.plot(indices[i], np.repeat(ts_paa[i], indices[i].size), 'r-')

    plt.axvline(x=indices[0][0], ls='--', linewidth=1, color='k')
    for i in range(indices_len - 1):
        plt.axvline(x=(indices[i][-1] + indices[i + 1][0]) / 2,
                    ls='--',
                    linewidth=1,
                    color='k')
    plt.axvline(x=indices[indices_len - 1][-1],
                ls='--',
                linewidth=1,
                color='k')

    # Alphabet
    alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

    # Compute gaussian quantiles if quantiles == 'gaussian'
    if quantiles == 'gaussian':
        quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:])
        ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True)
    else:
        quantiles, ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True)

    for i in range(n_bins - 1):
        plt.axhline(y=quantiles[i], ls='--', lw=1, color='g')

    x_lim = ax.get_xlim()
    y_lim = ax.get_ylim()
    for i in range(indices_len):
        x_pos = (np.percentile(indices[i], [50]) - x_lim[0]) / (x_lim[1] -
                                                                x_lim[0])
        y_pos = (ts_paa[i] - y_lim[0]) / (y_lim[1] - y_lim[0])
        ax.text(x_pos,
                y_pos,
                ts_sax[i],
                horizontalalignment='center',
                verticalalignment='bottom',
                transform=ax.transAxes,
                color='m',
                fontsize=25)

    if output_file is not None:
        plt.savefig(output_file)

    # Show plot
    plt.show()
# Putting breakpoints to PAA and SAX visualization.

n_bins = 8
tss = [Digi_PAA_n8[0], Maxis_PAA_n8[0]]
colors = ["y", "r", "b", "c", "m"]
labels = ["Digi", "Maxis"]
alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

fig = plt.figure(figsize=(20, 18))

for ts, color, label, e in zip(tss, colors, labels, range(5)):
    ax = fig.add_subplot(3, 2, 1 + e)
    #for the break point formula
    quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:])
    ts_sax = sax(ts, n_bins, quantiles, alphabet, plot=True)
    plt.plot(ts, color=color, label=label)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=5, borderaxespad=0.)

    #for the break point visualization
    for i in range(n_bins - 1):
        plt.axhline(y=quantiles[i], ls='--', lw=1, color='g')

    if 1:
        x_lim = ax.get_xlim()
        y_lim = ax.get_ylim()
        for i in range(len(ts_sax)):
            x_pos = (i - x_lim[0]) / (x_lim[1] - x_lim[0])
            y_pos = (ts[i] - y_lim[0]) / (y_lim[1] - y_lim[0])
            ax.text(x_pos,