def test_sax(): """Testing 'sax'""" # Test 1 X = np.tile(np.arange(3), 5) str_actual = sax(X, n_bins=3, quantiles='empirical', alphabet='abc') str_desired = ''.join(["a", "b", "c"] * 5) np.testing.assert_string_equal(str_actual, str_desired) # Test 2 X = np.repeat(np.arange(-0.75, 1, 0.5), 3) str_actual = sax(X, n_bins=4, quantiles='gaussian', alphabet='abcd') str_desired = ''.join([a for a in "abcd" for _ in range(3)]) np.testing.assert_string_equal(str_actual, str_desired)
def plot_sax(ts, n_bins, quantiles='gaussian', output_file=None, **kwargs): """Plot the time series before and after SAX transformation. Parameters ---------- ts : np.array, shape = [n_features] time series to plot n_bins : int (default = 8) number of bins (also known as the size of the alphabet) quantiles : str (default = 'gaussian') the way to compute quantiles. Possible values: - 'gaussian' : quantiles from a gaussian distribution N(0,1) - 'empirical' : empirical quantiles output_file : str or None (default = None) if str, save the figure. kwargs : keyword arguments kwargs for matplotlib.pyplot.plot """ # Check input data if not (isinstance(ts, np.ndarray) and ts.ndim == 1): raise ValueError("'ts' must be a 1-dimensional np.ndarray.") # Check parameters if not isinstance(n_bins, int): raise TypeError("'n_bins' must be an integer.") if n_bins < 2: raise ValueError("'n_bins' must be greater or equal than 2.") if n_bins > 52: raise ValueError("'n_bins' must be lower or equal than 52.") if quantiles not in ['gaussian', 'empirical']: raise ValueError( "'quantiles' must be either 'gaussian' or 'empirical'.") # Alphabet alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # Compute gaussian quantiles if quantiles == 'gaussian' if quantiles == 'gaussian': quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:]) ts_sax = sax(ts, n_bins, quantiles, alphabet, plot=True) else: quantiles, ts_sax = sax(ts, n_bins, quantiles, alphabet, plot=True) fig = plt.figure() ax = fig.add_subplot(111) plt.plot(ts, color='r', **kwargs) for i in range(n_bins - 1): plt.axhline(y=quantiles[i], ls='--', lw=1, color='g') if 1: x_lim = ax.get_xlim() y_lim = ax.get_ylim() for i in range(len(ts_sax)): x_pos = (i - x_lim[0]) / (x_lim[1] - x_lim[0]) y_pos = (ts[i] - y_lim[0]) / (y_lim[1] - y_lim[0]) ax.text(x_pos, y_pos, ts_sax[i], horizontalalignment='center', verticalalignment='bottom', transform=ax.transAxes, color='m', fontsize=25) if output_file is not None: plt.savefig(output_file) # Show plot plt.show()
def plot_paa_sax(ts, window_size=None, output_size=None, overlapping=True, n_bins=8, quantiles='gaussian', output_file=None, **kwargs): """Plot the original time series, the time series after PAA transformation and the time series after PAA and SAX transformations. Parameters ---------- ts : np.array, shape = [n_features] time series to plot window_size : int or None (default = None) size of the sliding window output_size : int or None (default = None) size of the returned time series overlapping : bool (default = True) when output_size is specified, the window_size is fixed if overlapping is True and may vary if overlapping is False. Ignored if window_size is specified. n_bins : int (default = 8) number of bins (also known as the size of the alphabet) quantiles : str (default = 'gaussian') the way to compute quantiles. Possible values: - 'gaussian' : quantiles from a gaussian distribution N(0,1) - 'empirical' : empirical quantiles output_file : str or None (default = None) if str, save the figure. kwargs : keyword arguments kwargs for matplotlib.pyplot.plot """ # Check input data if not (isinstance(ts, np.ndarray) and ts.ndim == 1): raise ValueError("'ts' must be a 1-dimensional np.ndarray.") # Size of ts ts_size = ts.size # Check parameters for PAA and compute window_size if output_size is given if (window_size is None and output_size is None): raise ValueError("'window_size' xor 'output_size' must be specified.") elif (window_size is not None and output_size is not None): raise ValueError("'window_size' xor 'output_size' must be specified.") elif (window_size is not None and output_size is None): if not isinstance(overlapping, (float, int)): raise TypeError("'overlapping' must be a boolean.") if not isinstance(window_size, int): raise TypeError("'window_size' must be an integer.") if window_size < 1: raise ValueError("'window_size' must be greater or equal than 1.") if window_size > ts_size: raise ValueError( "'window_size' must be lower or equal than the size 'ts'.") else: if not isinstance(overlapping, (float, int)): raise TypeError("'overlapping' must be a boolean.") if not isinstance(output_size, int): raise TypeError("'output_size' must be an integer.") if output_size < 1: raise ValueError("'output_size' must be greater or equal than 1.") if output_size > ts_size: raise ValueError( "'output_size' must be lower or equal than the size of 'ts'.") window_size = ts_size // output_size window_size += 0 if ts_size % output_size == 0 else 1 # Check parameters for SAX if not isinstance(n_bins, int): raise TypeError("'n_bins' must be an integer") if n_bins < 2: raise ValueError("'n_bins' must be greater or equal than 2") if n_bins > 52: raise ValueError("'n_bins' must be lower or equal than 52") if quantiles not in ['gaussian', 'empirical']: raise ValueError( "'quantiles' must be either 'gaussian' or 'empirical'") indices, ts_paa = paa(ts, ts.size, window_size, overlapping, output_size, plot=True) indices_len = len(indices) fig = plt.figure() ax = fig.add_subplot(111) plt.plot(ts, color='#1f77b4', **kwargs) for i in range(indices_len): plt.plot(indices[i], np.repeat(ts_paa[i], indices[i].size), 'r-') plt.axvline(x=indices[0][0], ls='--', linewidth=1, color='k') for i in range(indices_len - 1): plt.axvline(x=(indices[i][-1] + indices[i + 1][0]) / 2, ls='--', linewidth=1, color='k') plt.axvline(x=indices[indices_len - 1][-1], ls='--', linewidth=1, color='k') # Alphabet alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # Compute gaussian quantiles if quantiles == 'gaussian' if quantiles == 'gaussian': quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:]) ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True) else: quantiles, ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True) for i in range(n_bins - 1): plt.axhline(y=quantiles[i], ls='--', lw=1, color='g') x_lim = ax.get_xlim() y_lim = ax.get_ylim() for i in range(indices_len): x_pos = (np.percentile(indices[i], [50]) - x_lim[0]) / (x_lim[1] - x_lim[0]) y_pos = (ts_paa[i] - y_lim[0]) / (y_lim[1] - y_lim[0]) ax.text(x_pos, y_pos, ts_sax[i], horizontalalignment='center', verticalalignment='bottom', transform=ax.transAxes, color='m', fontsize=25) if output_file is not None: plt.savefig(output_file) # Show plot plt.show()
# Putting breakpoints to PAA and SAX visualization. n_bins = 8 tss = [Digi_PAA_n8[0], Maxis_PAA_n8[0]] colors = ["y", "r", "b", "c", "m"] labels = ["Digi", "Maxis"] alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" fig = plt.figure(figsize=(20, 18)) for ts, color, label, e in zip(tss, colors, labels, range(5)): ax = fig.add_subplot(3, 2, 1 + e) #for the break point formula quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:]) ts_sax = sax(ts, n_bins, quantiles, alphabet, plot=True) plt.plot(ts, color=color, label=label) plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.legend(bbox_to_anchor=(1.05, 1), loc=5, borderaxespad=0.) #for the break point visualization for i in range(n_bins - 1): plt.axhline(y=quantiles[i], ls='--', lw=1, color='g') if 1: x_lim = ax.get_xlim() y_lim = ax.get_ylim() for i in range(len(ts_sax)): x_pos = (i - x_lim[0]) / (x_lim[1] - x_lim[0]) y_pos = (ts[i] - y_lim[0]) / (y_lim[1] - y_lim[0]) ax.text(x_pos,