def transform(self, data=None):
     sax = SymbolicAggregateApproximation(n_segments=self.n_paa, alphabet_size_avg=self.n_sax)    
     self.trans_dataset = sax.fit_transform(self.norm_dataset)
     if data == None:
         self.invTrans_dataset = sax.inverse_transform(self.trans_dataset)
     else:
         self.invTrans_dataset = sax.inverse_transform(data)
Esempio n. 2
0
def perform_sax(dataset, gram_number, symbols, segments):
    scaler = TimeSeriesScalerMeanVariance(
        mu=0., std=np.std(dataset))  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # SAX transform
    sax = SymbolicAggregateApproximation(n_segments=segments,
                                         alphabet_size_avg=symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
    # print(pd.DataFrame(sax_dataset_inv[0])[0].value_counts())
    #     sax_dataset_inv = sax.fit_transform(dataset)
    #     print(len(sax_dataset_inv[0]))

    # Convert result to strings
    df_sax = pd.DataFrame(sax_dataset_inv[0])
    sax_series = df_sax[0]

    # Convert sax from numeric to characters
    sax_values = sax_series.unique()
    alphabet = 'abcdefghijklmnopqrstuvw'
    sax_dict = {x: alphabet[i] for i, x in enumerate(sax_values)}
    sax_list = [sax_dict[x] for x in sax_series]

    # Convert the list of characters to n_grams based on input parameter
    tri = n_grams(gram_number, sax_list)
    #     print(Counter(tri))
    return tri
Esempio n. 3
0
def genListSAX(instances_nor, windowSize, timestamp, n_sax_symbols=25):
    sax = SymbolicAggregateApproximation(n_segments=windowSize,
                                         alphabet_size_avg=n_sax_symbols)
    sax_result = sax.fit_transform(instances_nor)
    sax_dataset_inv = sax.inverse_transform(sax_result)
    return {
        "sketchInstances": list(sax_dataset_inv[0].ravel()),
        "timestamp": timestamp
    }
def saa_pax(dataset, title):
    """
    Show the graph of PAA and SAX of time series data
    :param dataset: time series of a stock
    :return:
    """
    n_ts, sz, d = 1, 100, 1
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    plt.figure()
    plt.subplot(2, 2, 1)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series " + title)

    plt.subplot(2, 2, 2)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA " + title)

    plt.subplot(2, 2, 3)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.title("SAX, %d symbols" % n_sax_symbols)

    plt.subplot(2, 2, 4)  # Finally, 1d-SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" %
              (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg,
               n_sax_symbols_slope))

    plt.tight_layout()
    plt.show()
Esempio n. 5
0
def discretize(raw_signal, window_size, paa_segments, alphabet_size):
    sax = SymbolicAggregateApproximation(n_segments=paa_segments, alphabet_size_avg=alphabet_size)
    discrete_signal = []
    num = len(raw_signal)//window_size

    for i in range(num):
        raw_data = raw_signal[i*window_size : (i+1)*window_size]
        disc = sax.inverse_transform(sax.fit_transform(raw_data))
        discrete_signal.append(np.squeeze(disc))
    discrete_signal = [x for sublist in discrete_signal for x in sublist]

    return discrete_signal
Esempio n. 6
0
class SAXStateRecognition(BaseMLModelTemplate):
    def build_model(self, **kwargs):
        self.his_len = kwargs['his_len']
        self.segment_dim = kwargs['segment_dim']
        self.model_obj = SymbolicAggregateApproximation(
            n_segments=self.his_len, alphabet_size_avg=self.param.n_state)

    def fit(self, x, y=None):
        self.store(self.param.model_save_path)

    def predict(self, x):
        self.restore(self.param.model_save_path)

        sax_dataset_inv = self.model_obj.inverse_transform(
            self.model_obj.fit_transform(x))
        uniques = sorted(np.unique(sax_dataset_inv))
        print('sax numbers:', len(uniques))
        state_pattern = np.eye(len(uniques))

        state_proba = np.zeros(
            [x.shape[0], self.his_len, len(uniques)], dtype=np.float)
        tmpstates = np.reshape(sax_dataset_inv,
                               [-1, self.his_len, self.segment_dim])
        for i in range(tmpstates.shape[0]):
            for j in range(tmpstates.shape[1]):
                index = uniques.index(tmpstates[i, j, 0])
                state_proba[i, j, index] = tmpstates[i, j, 0]

        return np.reshape(state_proba,
                          [-1, self.his_len, self.param.n_state]).astype(
                              np.float32), np.array(state_pattern,
                                                    dtype=np.float32)

    def store(self, path, **kwargs):
        save_model_name = "sax_{}_{}.state_model".format(
            self.param.data_name, self.param.n_state)
        joblib.dump(self.model_obj, os.path.join(path, save_model_name))

    def restore(self, path, **kwargs):
        save_model_name = "sax_{}_{}.state_model".format(
            self.param.data_name, self.param.n_state)
        self.model_obj = joblib.load(os.path.join(path, save_model_name))
for stockCode in pos_relatedStock:

    dataset = dfpivot['v_updownpercent'][stockCode]
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series: " + stockCode)
for i in range(len(y_test)):
    for j in range(len(y_train)):
        dist4 = paa.distance(Xtest_paa[i,:],Xtrain_paa[j,:])
        PAADist_test.append(dist4)   

PAADist_train = np.array(PAADist_train)
PAADist_train.resize(y_train.shape[0],int(len(PAADist_train)/y_train.shape[0]))
PAADist_test = np.array(PAADist_test)
PAADist_test.resize(y_test.shape[0],int(len(PAADist_test)/y_test.shape[0]))
'''
#SAX Transform + SAX feature extraction

sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                     alphabet_size_avg=n_sax_symbols)
Xtrain_sax = sax.inverse_transform(sax.fit_transform(X_train))
Xtest_sax = sax.inverse_transform(sax.fit_transform(X_test))

SAX_test = Xtest_sax[:, :, 0]
SAX_train = Xtrain_sax[:, :, 0]
'''
#SAX distance calculation
SAXDist_train = []
SAXDist_test = []

for i in range(len(y_train)):
    for j in range(len(y_train)):
        dist3 = sax.distance(Xtrain_sax[i,:],Xtest_sax[j,:])
        SAXDist_train.append(dist3)

for i in range(len(y_test)):
        dataset = []
        with open('output.ou') as f: #aqui eu só carrego os valores do meu dataset
            for linha in f:
                linha = linha.strip()
                if linha:
                    valores = linha.split(',')
                    a,b = int(valores[0]), float(valores[1])
                    dataset.append(b)
        self.dataset = dataset[:] #gambito do welsu pra não passar por referência

        # exit = transform(300,300,dataset)
        # print exit
        # print len(exit[0])
np.set_printoptions(threshold='nan')
trans = Transform(300,300)
trans.read()
# trans.norm()
# trans.transform()
# trans.norm-(0)
# print trans.dataset
# print trans.invTrans_dataset[0]

sax = SymbolicAggregateApproximation(n_segments=300, alphabet_size_avg=300) 
trans.norm()   
aux = sax.fit_transform(trans.dataset)
aux1 = sax.inverse_transform(aux)
print trans.dataset
print aux
print aux1

Esempio n. 10
0
scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
n_paa_segments = 10
n_sax_symbols = 10
n_sax_symbols_avg = 10
n_sax_symbols_slope = 6
for i in listnew:
    records = len(df_red[[i]])
    print("stockname" + str(i))
    scaleddata = scaler.fit_transform(df_red[[i]])
    #print(scaleddata)
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata))
    # SAX transform
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata))
    # 1d-SAX transform
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(scaleddata))
    plt.figure()
    # First, raw time series
    plt.subplot(2, 2, 1)
    plt.plot(scaleddata[0].ravel(), "b-")
    plt.title("Raw time series")
    # Second, PAA
    plt.subplot(2, 2, 2)
    plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4)
Esempio n. 11
0
def main():
    x = []  #x que será passado para o knn
    y = []  #y que será passado para o knn
    y_aux = []  #valores originais do dataset
    x_aux = []  #datas originais do dataset
    y_saida = [
    ]  #possui 80% de seus valores como sendo os valores das bolsas originais e os 20% restante vão ser da prdição
    with open('output.ou') as f:  #aqui eu só carrego os valores do meu dataset
        for linha in f:
            linha = linha.strip()
            if linha:
                valores = linha.split(',')
                a, b = trataValores(valores)
                x_aux.append(a)
                y_aux.append(b)

    x_aux, y_aux = ndtw.suavizacao(x_aux,
                                   y_aux)  #função que suaviza os gráficos
    # maior = max(y_aux) #essa e as proximas 2 linhas normalizam os dados pois
    # y_aux = np.array(y_aux)#é necessário que os valores estejam entre
    # y_aux = y_aux/maior#0 e 1 pra que o PAA e consequentemente o SAX funcionem

    y_aux = ndtw.sigmoid(y_aux, 1)
    sax = SymbolicAggregateApproximation(n_segments=N_PAA,
                                         alphabet_size_avg=N_SAX)
    temp = sax.fit_transform(y_aux)
    classes_sax = []
    for i in temp[0]:
        classes_sax.append(i[0])

    count = 0
    cel = []
    for i in classes_sax[0:int(
            len(classes_sax) * 0.8
    )]:  #for que itera até 80% da lista criando o meu x e y que serão passados pra o knn
        #nesse caso x = [val1, val2, val3,...,valn] e y = val. y tem o tamnho de WIN_SIZE
        #basicamente to criando o dataset de entrada do knn com a janela deslizante
        count += 1
        y_saida.append(i)
        if (count % (WIN_SIZE + 1) == 0 and count != 0):
            cel.append(i)
            # cel = ndtw.sliding_window_normalizations([],cel,1) #faço as normalizações com média e desvio padrão
            y.append(cel[-1:])  #o ultimo valor normalizado é meu y
            x.append(cel[:WIN_SIZE])  #os primeiro WIN_SIZE valores são o meu x
            cel = []
        else:
            cel.append(i)

    obj = KNeighborsClassifier(metric=dtw, n_neighbors=1)

    # print "\n"
    # print y_saida

    obj.fit(x, y)

    # for i in range(int(len(y_aux)*0.2)+1): #slicing lists like a BALLLSS
    #     passar = np.array(y_saida[-WIN_SIZE:]).reshape(1,-1) #transformo a janela em numpy array e dou um reshape pq o knn reclama
    #     volta = np.copy(passar) #esse volta é uma cópia de passar que serve para armazenar os valores originais antes da normalização com a média e o desvio padrão pra que futuramente eu possa reverter a normalização pra apresentar os dados
    #     passar = ndtw.sliding_window_normalizations([],passar,1) #normalizo com a média e desvio padrão
    #     pred = obj.predict(passar)[0] #pego a predição normalizada
    #     passar = np.append(passar,pred) #adiciono ela nos valores da qual a predição foi feita (os valores e a predição estão normalizados)
    #     passar = ndtw.sliding_window_normalizations(volta,passar,0) #tiro a normlização pra jogar na lista de saida
    #     y_saida.append(passar[-1:]) #coloco o valor obtido na lista de saída

    for i in range(int(len(classes_sax) * 0.2) +
                   1):  #slicing lists like a BALLLSS
        passar = np.array(y_saida[-WIN_SIZE:]).reshape(
            1, -1
        )  #transformo a janela em numpy array e dou um reshape pq o knn reclama
        pred = obj.predict(passar)[0]  #pego a predição normalizada
        passar = np.append(
            passar, pred
        )  #adiciono ela nos valores da qual a predição foi feita (os valores e a predição estão normalizados)
        y_saida.append(
            passar[-1:][0])  #coloco o valor obtido na lista de saída

    saida = []
    saida.append([])

    for i in y_saida:  #gambito pq não sei usar reshape
        saida[0].append([i])

    y_saida = sax.inverse_transform(saida)
    y_saida = np.array(y_saida)

    saida = []  #se o takashi ver isso ele vai me bater (pray for dave)
    for i in y_saida:  #doooooooooooble gambito pq não sei usar reshape
        for j in i:
            for k in j:
                saida.append(k)

    y_aux = ndtw.sigmoid(y_aux, 0)
    return x_aux, y_aux, saida
Esempio n. 12
0
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tslearn.piecewise import PiecewiseAggregateApproximation
from tslearn.piecewise import SymbolicAggregateApproximation


url ="C:/Users/Βασίλης/IdeaProjects/MyThesisApp/Data sets/Total_Vehicle_Sales.csv"

df = pd.read_csv(url)
series = np.array(df.Value)
print(series)

n_paa_segments = 4
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(series))
plt.plot(series.ravel(), "b-", alpha=0.4)
plt.plot(paa_dataset_inv.ravel(), "r-")



n_sax_symbols = 4
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
print(sax)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(series))
print(sax_dataset_inv.ravel())

plt.plot(sax_dataset_inv.ravel(), "y-")
plt.title("SAX, %d symbols" % n_sax_symbols)
plt.show()
Esempio n. 13
0
# data = ut_mdf.getDataFromFile("light_curve_Gaia-DR2_51856511715955968_date20191130")
# data = ut_mdf.getDataFromFile("light_curve_Gaia-DR2_602712283908074752_date20200130")
# lc_nor = TimeSeriesScalerMeanVariance(mu=0.,std=1.).fit_transform([data['instances']])
timestamps = data["timestamp"]


# PAA transform (and inverse transform) of the data
n_paa_segments = 8
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(lc_nor))

# SAX transform
n_sax_symbols = 25
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                     alphabet_size_avg=n_sax_symbols)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(lc_nor))

# 1d-SAX transform
n_sax_symbols_avg = 5
n_sax_symbols_slope = 5
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.fit_transform(lc_nor)
one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

#dynamic binning
lc_nor_list = list(lc_nor[0].ravel())
corePlot = sketchDyBinService(windowSize=n_paa_segments,
                              initialBin=3, isOnline=False)
 def step_run(self, data):
     sax = SymbolicAggregateApproximation(n_segments=self.nb_segment,
                                          alphabet_size_avg=self.nb_symbol)
     sax_dataset = sax.fit_transform(data)
     sax_dataset_inv = sax.inverse_transform(sax_dataset)
     return sax_dataset, sax_dataset_inv
Esempio n. 15
0
if __name__ == "__main__":
    import matplotlib.pyplot as plt
    from tslearn.piecewise import SymbolicAggregateApproximation

    # Generate a random walk
    ts = np.random.normal(size = 700)
    ts = np.cumsum(ts)
    ts = ts - np.mean(ts)
    ts /= np.std(ts, ddof=1)

    n_sax_symbols = 8
    n_paa_segments = 10

    # tslearn SAX implementation
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(ts))

    # Our SAX implementation
    width = len(ts) // n_paa_segments
    sax = SAX(w = width, k = n_sax_symbols)
    sax_ts = sax.transform(ts)
    recon_ts = sax.inverse_transform(sax_ts)

    plt.figure()
    plt.plot(ts, "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.plot(recon_ts, 'r--')
    plt.legend(['original', 'tslearn SAX implementation', 'our SAX implementation'])
    plt.title("SAX, %d symbols" % n_sax_symbols)
    plt.show()