def saa_pax(dataset, title):
    """
    Show the graph of PAA and SAX of time series data
    :param dataset: time series of a stock
    :return:
    """
    n_ts, sz, d = 1, 100, 1
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    plt.figure()
    plt.subplot(2, 2, 1)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series " + title)

    plt.subplot(2, 2, 2)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA " + title)

    plt.subplot(2, 2, 3)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.title("SAX, %d symbols" % n_sax_symbols)

    plt.subplot(2, 2, 4)  # Finally, 1d-SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" %
              (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg,
               n_sax_symbols_slope))

    plt.tight_layout()
    plt.show()
Exemple #2
0
 def sax(self, data):
     n_paa_segments = 10
     n_sax_symbols_avg = 8
     n_sax_symbols_slop = 8
     sax = OneD_SymbolicAggregateApproximation(
         n_segments=n_paa_segments,
         alphabet_size_avg=n_sax_symbols_avg,
         alphabet_size_slope=n_sax_symbols_slop)
     Sax_data = sax.inverse_transform(sax.fit_transform(data))
     data_new = np.reshape(Sax_data, (Sax_data.shape[0], Sax_data.shape[1]))
     return data_new
def genList1D_SAX(instances_nor,
                  windowSize,
                  timestamp,
                  n_sax_symbols_avg=5,
                  n_sax_symbols_slope=5):
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=windowSize,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    transformed_data = one_d_sax.fit_transform(instances_nor)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

    return {
        "sketchInstances": list(one_d_sax_dataset_inv[0].ravel()),
        "timestamp": timestamp
    }
Exemple #4
0
def get_sax_transformation(df,
                           features_to_compute='probability',
                           segments=10,
                           symbols=8):
    """
    Re sort dataframe station / ts
    Aggr time serie for each station
    Symbolic Aggregate approXimation
    If the time serie can't be divide by segment. We take lhe last x value en df
    df : DataFrame
    features_to_compute : string - column's name of the features we want to agg
    segments : int - number of point we want to agg.
    symbols : int - Number of SAX symbols to use to describe slopes
    """

    sax_list_result = []
    df = df.reset_index()
    df = df.sort_values(['station', 'ts'])

    for station in df.station.unique():
        data = df[df.station == station].copy()
        n_paa_segments = round((len(data) * segments / 100) - 0.5)
        n_sax_symbols_avg = round((len(data) * symbols / 100) - 0.5)
        n_sax_symbols_slope = round((len(data) * symbols / 100) - 0.5)
        one_d_sax = OneD_SymbolicAggregateApproximation(
            n_segments=n_paa_segments,
            alphabet_size_avg=n_sax_symbols_avg,
            alphabet_size_slope=n_sax_symbols_slope)

        sax_list_result.extend(
            one_d_sax.inverse_transform(
                one_d_sax.fit_transform(
                    data[features_to_compute][0:n_paa_segments *
                                              segments].values)).ravel())
        if len(sax_list_result) != len(data):
            sax_list_result.extend(
                data[features_to_compute][n_paa_segments *
                                          segments:len(data)].values)

        result = sax_list_result

    df['sax'] = result
    df['sax'] = df['sax'].astype('float')
    df = df.sort_values(['ts', 'station'])
    df = df.set_index('ts')
    return df
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series: " + stockCode)

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA: " + stockCode)

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
Exemple #6
0
# SAX transform
n_sax_symbols = 256
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                     alphabet_size_avg=n_sax_symbols)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
print("a")

# 1d-SAX transform
n_sax_symbols_avg = 8
n_sax_symbols_slope = 8
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.fit_transform(dataset)
one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

plt.figure()
plt.subplot(2, 2, 1)  # First, raw time series
plt.plot(dataset[0].ravel(), "b-")
plt.title("Raw time series")

plt.subplot(2, 2, 2)  # Second, PAA
plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
plt.plot(paa_dataset_inv[0].ravel(), "b-")
plt.title("PAA")

plt.subplot(2, 2, 3)  # Then SAX
plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
plt.plot(sax_dataset_inv[0].ravel(), "b-")
plt.title("SAX, %d symbols" % n_sax_symbols)
Exemple #7
0
from numpy import fft

# 想法四:利用PAA等技术
# 然后使用sklearn等库
from tslearn.piecewise import PiecewiseAggregateApproximation
from tslearn.piecewise import SymbolicAggregateApproximation, OneD_SymbolicAggregateApproximation
import time
# 1dSAX
n_paa_segments = 40
n_sax_symbols_avg = 30
n_sax_symbols_slope = 30
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.inverse_transform(
    one_d_sax.fit_transform(stdData))

from sklearn.cluster import MiniBatchKMeans, KMeans, DBSCAN, SpectralClustering, Birch
from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score

n_cluster = 100

#Kmeans 结果
# 超参数:k的取值
s = time.time()
km = KMeans(n_clusters=n_cluster, random_state=0)
y_pre = km.fit_predict(transformed_data)
e = time.time()
print(e - s, "s")
print(davies_bouldin_score(transformed_data, y_pre))
Exemple #8
0
 records = len(df_red[[i]])
 print("stockname" + str(i))
 scaleddata = scaler.fit_transform(df_red[[i]])
 #print(scaleddata)
 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
 paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata))
 # SAX transform
 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                      alphabet_size_avg=n_sax_symbols)
 sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata))
 # 1d-SAX transform
 one_d_sax = OneD_SymbolicAggregateApproximation(
     n_segments=n_paa_segments,
     alphabet_size_avg=n_sax_symbols_avg,
     alphabet_size_slope=n_sax_symbols_slope)
 one_d_sax_dataset_inv = one_d_sax.inverse_transform(
     one_d_sax.fit_transform(scaleddata))
 plt.figure()
 # First, raw time series
 plt.subplot(2, 2, 1)
 plt.plot(scaleddata[0].ravel(), "b-")
 plt.title("Raw time series")
 # Second, PAA
 plt.subplot(2, 2, 2)
 plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4)
 plt.plot(paa_dataset_inv[0].ravel(), "b-")
 plt.title("PAA")
 #SAX plot
 plt.subplot(2, 2, 3)  # Then SAX
 plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4)
 plt.plot(sax_dataset_inv[0].ravel(), "b-")
 plt.title("SAX, %d symbols" % n_sax_symbols)