Ejemplo n.º 1
0
def genListPAA(instances_nor, windowSize, timestamp):
    paa = PiecewiseAggregateApproximation(n_segments=windowSize)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(instances_nor))

    return {
        "sketchInstances": list(paa_dataset_inv[0].ravel()),
        "timestamp": timestamp
    }
Ejemplo n.º 2
0
def saa_pax(dataset, title):
    """
    Show the graph of PAA and SAX of time series data
    :param dataset: time series of a stock
    :return:
    """
    n_ts, sz, d = 1, 100, 1
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    plt.figure()
    plt.subplot(2, 2, 1)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series " + title)

    plt.subplot(2, 2, 2)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA " + title)

    plt.subplot(2, 2, 3)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.title("SAX, %d symbols" % n_sax_symbols)

    plt.subplot(2, 2, 4)  # Finally, 1d-SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" %
              (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg,
               n_sax_symbols_slope))

    plt.tight_layout()
    plt.show()
Ejemplo n.º 3
0
def ApplyPaa(n_paa_segments, df, ckt):
    circuito = ckt
    print("Quantidade de segmentos de PAA: {}".format(n_paa_segments))
    paa = PiecewiseAggregateApproximation(n_paa_segments)
    scaler = TimeSeriesScalerMeanVariance()
    dadosPaa = df
    for i in range(0, len(df)):
        dataset = scaler.fit_transform(df[i])
        dadosPaa[i] = paa.inverse_transform(paa.fit_transform(dataset))[0]
    dadosPaa = dadosPaa.T

    return dadosPaa
def ApplyPaa(n_paa_segments,df):
    '''
    Aplica o PAA no dataframe fornecido.

    :param n_paa_segments: quantidade de segmento do PAA para redução de dados
    :param df: dataframe com dados em que se deseja aplicar o PAA
    :return: df após aplicação do PAA
    '''
    df = df.values.T.tolist()
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)
    dadosPaa = scaler.fit_transform(df)
    print("Quantidade de segmentos de PAA: {}".format(n_paa_segments))
    paa = PiecewiseAggregateApproximation(n_paa_segments)
    dadosPaa = paa.inverse_transform(paa.fit_transform(dadosPaa))

    df = pd.DataFrame()

    for i in range(len(dadosPaa.T)):
        for j in range(len(dadosPaa.T[0])):
            df[j] = dadosPaa.T[i][j]

    return df
# Plotting Graph
plt.figure()
graph_idx = 0

# Transform PAA, SAX, 1d-SAX,
for stockCode in pos_relatedStock:

    dataset = dfpivot['v_updownpercent'][stockCode]
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
Ejemplo n.º 6
0
# 然后再进行层次聚类 可以考虑自己实现
# 问题:没有本质区别
# 加速想法二:使用rank-base对一天的数据进行处理后直接Kmeans分100类,再对每一类进行区间聚类
#################################################################
# 初始,根据原始数据计算新数据
# 从paa这里

# 需要在聚类前将训练数据划分完毕。ratio必须要精心选择使得paa的值能够成为整数

ratio = 0.9
n_paa_segments = 18
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
originData = stdData[:, :int(ratio * stdData.shape[1])]  #训练部分已经知道的原始数据
paa_mid = paa.fit_transform(originData)
paa_mid = paa_mid.reshape(paa_mid.shape[0], paa_mid.shape[1])
baseData = paa.inverse_transform(paa_mid)  #提取基线数据
restData = originData - baseData  # 计算得到残差数据

# 模式提取(直接加和取平均后求rank-base处理,或者再做标准化进行SAX处理)
# 初步想法:将每天24小时的流量重复叠加取平均,进行rank-base处理,然后跑MSE用Kmeans进行100聚类
# 想法二:在自己
# 对于100聚类中的每个聚类,再跑层次聚类进行细分,最小调到1。聚类结果衡量用类内最大相似度来进行衡量(有多不相近)
# 使用SAX的dayPattern

# 做法01:使用SAX提取前三天的残差信息,进行20聚类。对每个聚类内部跑complete,0.5的层次聚类。考虑到500量级要跑3分钟,平均大约是一个小时。
from sklearn.cluster import AgglomerativeClustering
import time

dayPattern = []
for index in range(restData.shape[0]):
    cuData = restData[index].ravel()
EDist_train = []
for i in range(len(y_train)):
    for j in range(len(y_train)):
        dist1 = np.sqrt(
            np.sum((np.array(X_train[i, :]) - np.array(X_train[j, :]))**2))
        EDist_train.append(dist1)

EDist_train = np.array(EDist_train)
EDist_train.resize(y_train.shape[0], int(len(EDist_train) / y_train.shape[0]))
EDist_test = np.array(EDist_test)
EDist_test.resize(y_test.shape[0], int(len(EDist_test) / y_test.shape[0]))

#PAA transform + PAA feature extraction

paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
Xtrain_paa = paa.inverse_transform(paa.fit_transform(X_train))
Xtest_paa = paa.inverse_transform(paa.fit_transform(X_test))

PAA_test = Xtest_paa[:, :, 0]
PAA_train = Xtrain_paa[:, :, 0]
'''
#PAA distance calculation

PAADist_train = []
PAADist_test = []

for i in range(len(y_train)):
    for j in range(len(y_train)):
        dist3 = paa.distance(Xtrain_paa[i,:],Xtest_paa[j,:])
        PAADist_train.append(dist3)
Ejemplo n.º 8
0
print(list_new)
df_red = df_new.set_index(['name', 'day']).dif.dropna()
print(df_red)

scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
n_paa_segments = 10
n_sax_symbols = 10
n_sax_symbols_avg = 10
n_sax_symbols_slope = 6
for i in listnew:
    records = len(df_red[[i]])
    print("stockname" + str(i))
    scaleddata = scaler.fit_transform(df_red[[i]])
    #print(scaleddata)
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata))
    # SAX transform
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata))
    # 1d-SAX transform
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(scaleddata))
    plt.figure()
    # First, raw time series
    plt.subplot(2, 2, 1)
    plt.plot(scaleddata[0].ravel(), "b-")
# add columns' name 
df_price = pd.DataFrame(df_price, columns = day_features)

dataset = df_price.values
print("price feature sample: ")
print(df_price.head())


# PAA transformation
# PAA transform (and inverse transform) of the data
n_paa_segments = 3
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
paa_list = []
for item in df_price.values:
    item = item.reshape((1,5,1))
    paa_price_inv = paa.inverse_transform(paa.fit_transform(item))
    paa_list.append(paa_price_inv)
paa_array = np.array(paa_list)

paa_data = paa_array.reshape(1904, 5)
paa_df = pd.DataFrame(paa_data, columns = day_features)
print("save time series data after PAA")
paa_df.to_csv("./paa_stock_data_time_series.csv", sep=',', encoding='utf-8')
print("PAA sample: ")
print(paa_df.head())


n_sax_symbols = 3
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
Ejemplo n.º 10
0
                stdData[index][vi] = maxNum


# 2.对去除后的数据进行归一化处理
#再进行一次归一化
from tslearn.preprocessing import TimeSeriesScalerMinMax
scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)
originStdData = stdData # 保存,为了日后恢复
stdData = scaler.fit_transform(stdData)

# 3.然后进行PAA处理,得到基线和残余值
from tslearn.piecewise import PiecewiseAggregateApproximation
n_paa_segments = 20
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
paa_mid = paa.fit_transform(stdData)
paa_inv = paa.inverse_transform(paa_mid)
paa_inv = paa_inv.reshape(paa_inv.shape[0],paa_inv.shape[1])

# 4.对PAA后的数据进行简单k-means,聚类数量不超过10,分数按照CH分数判断,选出最大的
# 再进行rank-base处理,然后做简单聚类
from sklearn.cluster import MiniBatchKMeans,KMeans,DBSCAN,SpectralClustering,Birch
from sklearn.metrics import calinski_harabasz_score,davies_bouldin_score

n_cluster = 1000
s = time.time()
km = KMeans(n_clusters = n_cluster,random_state = 0)
y_pre = km.fit_predict(paa_inv)
e = time.time()
print(e-s,"s")
print(calinski_harabasz_score(paa_inv,y_pre))
Ejemplo n.º 11
0
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tslearn.piecewise import PiecewiseAggregateApproximation
from tslearn.piecewise import SymbolicAggregateApproximation


url ="C:/Users/Βασίλης/IdeaProjects/MyThesisApp/Data sets/Total_Vehicle_Sales.csv"

df = pd.read_csv(url)
series = np.array(df.Value)
print(series)

n_paa_segments = 4
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(series))
plt.plot(series.ravel(), "b-", alpha=0.4)
plt.plot(paa_dataset_inv.ravel(), "r-")



n_sax_symbols = 4
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
print(sax)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(series))
print(sax_dataset_inv.ravel())

plt.plot(sax_dataset_inv.ravel(), "y-")
plt.title("SAX, %d symbols" % n_sax_symbols)
plt.show()
Ejemplo n.º 12
0
duration = 60

listFile = ut_lc.getListLight(height=height,duration=duration)
data = ut_lc.getDataFromFile(fileName=listFile[339],height=height,duration=duration)
lc_nor = TimeSeriesScalerMeanVariance(mu=0.,std=1.).fit_transform([data['instances']])

# data = ut_mdf.getDataFromFile("light_curve_Gaia-DR2_51856511715955968_date20191130")
# data = ut_mdf.getDataFromFile("light_curve_Gaia-DR2_602712283908074752_date20200130")
# lc_nor = TimeSeriesScalerMeanVariance(mu=0.,std=1.).fit_transform([data['instances']])
timestamps = data["timestamp"]


# PAA transform (and inverse transform) of the data
n_paa_segments = 8
paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(lc_nor))

# SAX transform
n_sax_symbols = 25
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                     alphabet_size_avg=n_sax_symbols)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(lc_nor))

# 1d-SAX transform
n_sax_symbols_avg = 5
n_sax_symbols_slope = 5
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.fit_transform(lc_nor)