def saa_pax(dataset, title): """ Show the graph of PAA and SAX of time series data :param dataset: time series of a stock :return: """ n_ts, sz, d = 1, 100, 1 scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series " + title) plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA " + title) plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols) plt.subplot(2, 2, 4) # Finally, 1d-SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg, n_sax_symbols_slope)) plt.tight_layout() plt.show()
def sax(self, data): n_paa_segments = 10 n_sax_symbols_avg = 8 n_sax_symbols_slop = 8 sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slop) Sax_data = sax.inverse_transform(sax.fit_transform(data)) data_new = np.reshape(Sax_data, (Sax_data.shape[0], Sax_data.shape[1])) return data_new
def test_1dsax(): unfitted_1dsax = OneD_SymbolicAggregateApproximation(n_segments=3, alphabet_size_avg=2, alphabet_size_slope=2) data = [[-1., 2., 0.1, -1., 1., -1.], [1., 3.2, -1., -3., 1., -1.]] np.testing.assert_raises(ValueError, unfitted_1dsax.distance, data[0], data[1])
def build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50): one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols, alphabet_size_slope=4) return TSLearnTransformerWrapper(one_d_sax, supports_approximation=False)
def genList1D_SAX(instances_nor, windowSize, timestamp, n_sax_symbols_avg=5, n_sax_symbols_slope=5): one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=windowSize, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(instances_nor) one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data) return { "sketchInstances": list(one_d_sax_dataset_inv[0].ravel()), "timestamp": timestamp }
def test_serialize_1dsax(): n_paa_segments = 10 n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) _check_not_fitted(one_d_sax) X = _get_random_walk() one_d_sax.fit(X) _check_params_predict(one_d_sax, X, ['transform'])
def get_sax_transformation(df, features_to_compute='probability', segments=10, symbols=8): """ Re sort dataframe station / ts Aggr time serie for each station Symbolic Aggregate approXimation If the time serie can't be divide by segment. We take lhe last x value en df df : DataFrame features_to_compute : string - column's name of the features we want to agg segments : int - number of point we want to agg. symbols : int - Number of SAX symbols to use to describe slopes """ sax_list_result = [] df = df.reset_index() df = df.sort_values(['station', 'ts']) for station in df.station.unique(): data = df[df.station == station].copy() n_paa_segments = round((len(data) * segments / 100) - 0.5) n_sax_symbols_avg = round((len(data) * symbols / 100) - 0.5) n_sax_symbols_slope = round((len(data) * symbols / 100) - 0.5) one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) sax_list_result.extend( one_d_sax.inverse_transform( one_d_sax.fit_transform( data[features_to_compute][0:n_paa_segments * segments].values)).ravel()) if len(sax_list_result) != len(data): sax_list_result.extend( data[features_to_compute][n_paa_segments * segments:len(data)].values) result = sax_list_result df['sax'] = result df['sax'] = df['sax'].astype('float') df = df.sort_values(['ts', 'station']) df = df.set_index('ts') return df
def test_1dsax(): unfitted_1dsax = OneD_SymbolicAggregateApproximation(n_segments=3, alphabet_size_avg=2, alphabet_size_slope=2) data = [[-1., 2., 0.1, -1., 1., -1.], [1., 3.2, -1., -3., 1., -1.]] np.testing.assert_raises(NotFittedError, unfitted_1dsax.distance, data[0], data[1]) sax1d_est_no_scale = unfitted_1dsax sax1d_est_scale = clone(sax1d_est_no_scale) sax1d_est_scale.set_params(scale=True) n, sz, d = 2, 10, 3 rng = np.random.RandomState(0) X = rng.randn(n, sz, d) for sax1d_est in [sax1d_est_no_scale, sax1d_est_scale]: sax1d = sax1d_est.fit_transform(X) np.testing.assert_allclose( sax1d_est.distance(X[0], X[1]), sax1d_est.distance_1d_sax(sax1d[0], sax1d[1]))
# PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series: " + stockCode) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA: " + stockCode)
# Generate a random walk ts = np.random.normal(size=700) ts = np.cumsum(ts) ts = ts - np.mean(ts) ts /= np.std(ts, ddof=1) print('length of ts', len(ts)) n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 n_paa_segments = 10 # 1d-SAX transform one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope, sigma_l=np.sqrt(0.03 / (np.floor(len(ts) / n_paa_segments)))) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(ts)) # Our oneD_SAX width = len(ts) // n_paa_segments onedsax = oneD_SAX(w=width, k_slope=n_sax_symbols_slope, k_intercept=n_sax_symbols_avg) onedsax_ts = onedsax.transform(ts) recon_onedsax = onedsax.inverse_transform(onedsax_ts) # plot plt.figure()