def saa_pax(dataset, title):
    """
    Show the graph of PAA and SAX of time series data
    :param dataset: time series of a stock
    :return:
    """
    n_ts, sz, d = 1, 100, 1
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    plt.figure()
    plt.subplot(2, 2, 1)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series " + title)

    plt.subplot(2, 2, 2)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA " + title)

    plt.subplot(2, 2, 3)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.title("SAX, %d symbols" % n_sax_symbols)

    plt.subplot(2, 2, 4)  # Finally, 1d-SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" %
              (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg,
               n_sax_symbols_slope))

    plt.tight_layout()
    plt.show()
Beispiel #2
0
 def sax(self, data):
     n_paa_segments = 10
     n_sax_symbols_avg = 8
     n_sax_symbols_slop = 8
     sax = OneD_SymbolicAggregateApproximation(
         n_segments=n_paa_segments,
         alphabet_size_avg=n_sax_symbols_avg,
         alphabet_size_slope=n_sax_symbols_slop)
     Sax_data = sax.inverse_transform(sax.fit_transform(data))
     data_new = np.reshape(Sax_data, (Sax_data.shape[0], Sax_data.shape[1]))
     return data_new
def test_1dsax():
    unfitted_1dsax = OneD_SymbolicAggregateApproximation(n_segments=3,
                                                         alphabet_size_avg=2,
                                                         alphabet_size_slope=2)
    data = [[-1., 2., 0.1, -1., 1., -1.], [1., 3.2, -1., -3., 1., -1.]]
    np.testing.assert_raises(ValueError, unfitted_1dsax.distance, data[0],
                             data[1])
Beispiel #4
0
 def build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50):
     one_d_sax = OneD_SymbolicAggregateApproximation(
         n_segments=n_paa_segments,
         alphabet_size_avg=n_sax_symbols,
         alphabet_size_slope=4)
     return TSLearnTransformerWrapper(one_d_sax,
                                      supports_approximation=False)
def genList1D_SAX(instances_nor,
                  windowSize,
                  timestamp,
                  n_sax_symbols_avg=5,
                  n_sax_symbols_slope=5):
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=windowSize,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    transformed_data = one_d_sax.fit_transform(instances_nor)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

    return {
        "sketchInstances": list(one_d_sax_dataset_inv[0].ravel()),
        "timestamp": timestamp
    }
Beispiel #6
0
def test_serialize_1dsax():

    n_paa_segments = 10
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8

    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)

    _check_not_fitted(one_d_sax)

    X = _get_random_walk()
    one_d_sax.fit(X)

    _check_params_predict(one_d_sax, X, ['transform'])
Beispiel #7
0
def get_sax_transformation(df,
                           features_to_compute='probability',
                           segments=10,
                           symbols=8):
    """
    Re sort dataframe station / ts
    Aggr time serie for each station
    Symbolic Aggregate approXimation
    If the time serie can't be divide by segment. We take lhe last x value en df
    df : DataFrame
    features_to_compute : string - column's name of the features we want to agg
    segments : int - number of point we want to agg.
    symbols : int - Number of SAX symbols to use to describe slopes
    """

    sax_list_result = []
    df = df.reset_index()
    df = df.sort_values(['station', 'ts'])

    for station in df.station.unique():
        data = df[df.station == station].copy()
        n_paa_segments = round((len(data) * segments / 100) - 0.5)
        n_sax_symbols_avg = round((len(data) * symbols / 100) - 0.5)
        n_sax_symbols_slope = round((len(data) * symbols / 100) - 0.5)
        one_d_sax = OneD_SymbolicAggregateApproximation(
            n_segments=n_paa_segments,
            alphabet_size_avg=n_sax_symbols_avg,
            alphabet_size_slope=n_sax_symbols_slope)

        sax_list_result.extend(
            one_d_sax.inverse_transform(
                one_d_sax.fit_transform(
                    data[features_to_compute][0:n_paa_segments *
                                              segments].values)).ravel())
        if len(sax_list_result) != len(data):
            sax_list_result.extend(
                data[features_to_compute][n_paa_segments *
                                          segments:len(data)].values)

        result = sax_list_result

    df['sax'] = result
    df['sax'] = df['sax'].astype('float')
    df = df.sort_values(['ts', 'station'])
    df = df.set_index('ts')
    return df
Beispiel #8
0
def test_1dsax():
    unfitted_1dsax = OneD_SymbolicAggregateApproximation(n_segments=3,
                                                         alphabet_size_avg=2,
                                                         alphabet_size_slope=2)
    data = [[-1., 2., 0.1, -1., 1., -1.], [1., 3.2, -1., -3., 1., -1.]]
    np.testing.assert_raises(NotFittedError, unfitted_1dsax.distance, data[0],
                             data[1])

    sax1d_est_no_scale = unfitted_1dsax
    sax1d_est_scale = clone(sax1d_est_no_scale)
    sax1d_est_scale.set_params(scale=True)
    n, sz, d = 2, 10, 3
    rng = np.random.RandomState(0)
    X = rng.randn(n, sz, d)
    for sax1d_est in [sax1d_est_no_scale, sax1d_est_scale]:
        sax1d = sax1d_est.fit_transform(X)
        np.testing.assert_allclose(
            sax1d_est.distance(X[0], X[1]),
            sax1d_est.distance_1d_sax(sax1d[0], sax1d[1]))
    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series: " + stockCode)

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA: " + stockCode)
Beispiel #10
0
    # Generate a random walk
    ts = np.random.normal(size=700)
    ts = np.cumsum(ts)
    ts = ts - np.mean(ts)
    ts /= np.std(ts, ddof=1)
    print('length of ts', len(ts))

    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    n_paa_segments = 10

    # 1d-SAX transform
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope,
        sigma_l=np.sqrt(0.03 / (np.floor(len(ts) / n_paa_segments))))
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(ts))

    # Our oneD_SAX
    width = len(ts) // n_paa_segments
    onedsax = oneD_SAX(w=width,
                       k_slope=n_sax_symbols_slope,
                       k_intercept=n_sax_symbols_avg)
    onedsax_ts = onedsax.transform(ts)
    recon_onedsax = onedsax.inverse_transform(onedsax_ts)

    # plot
    plt.figure()