def genListPAA(instances_nor, windowSize, timestamp): paa = PiecewiseAggregateApproximation(n_segments=windowSize) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(instances_nor)) return { "sketchInstances": list(paa_dataset_inv[0].ravel()), "timestamp": timestamp }
def saa_pax(dataset, title): """ Show the graph of PAA and SAX of time series data :param dataset: time series of a stock :return: """ n_ts, sz, d = 1, 100, 1 scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series " + title) plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA " + title) plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols) plt.subplot(2, 2, 4) # Finally, 1d-SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg, n_sax_symbols_slope)) plt.tight_layout() plt.show()
def ApplyPaa(n_paa_segments, df, ckt): circuito = ckt print("Quantidade de segmentos de PAA: {}".format(n_paa_segments)) paa = PiecewiseAggregateApproximation(n_paa_segments) scaler = TimeSeriesScalerMeanVariance() dadosPaa = df for i in range(0, len(df)): dataset = scaler.fit_transform(df[i]) dadosPaa[i] = paa.inverse_transform(paa.fit_transform(dataset))[0] dadosPaa = dadosPaa.T return dadosPaa
def get_paa_transformation(df, features_to_compute='probability', segments=10): """ Re sort dataframe station / ts Aggr time serie for each station Take the mean of each segment If the time serie can't be divide by segment. We add the last mean agg. df : DataFrame features_to_compute : string - column's name of the features we want to agg semgnets : int - number of point we want to agg. """ paa_list_result = [] df = df.reset_index() df = df.sort_values(['station', 'ts']) for station in df.station.unique(): data = df[df.station == station] n_paa_segments = round((len(data) * segments / 100) - 0.5) paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_inv_transf = np.repeat(paa.fit_transform( data[features_to_compute].values)[0], segments, axis=0) if len(data) != len(paa_inv_transf): nb_to_add = len(data) - len(paa_inv_transf) value_to_add = np.repeat(np.mean( data[features_to_compute].values[-nb_to_add:]), nb_to_add, axis=0) # Take the last X one and mean it result = np.append( paa_inv_transf, value_to_add) # Append regular paa and last segment mean paa_list_result.extend(result) else: result = paa_inv_transf paa_list_result.extend(result) df['paa'] = paa_list_result df['paa'] = df['paa'].astype('float') df = df.sort_values(['ts', 'station']) df = df.set_index('ts') return df
def ApplyPaa(n_paa_segments,df): ''' Aplica o PAA no dataframe fornecido. :param n_paa_segments: quantidade de segmento do PAA para redução de dados :param df: dataframe com dados em que se deseja aplicar o PAA :return: df após aplicação do PAA ''' df = df.values.T.tolist() scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) dadosPaa = scaler.fit_transform(df) print("Quantidade de segmentos de PAA: {}".format(n_paa_segments)) paa = PiecewiseAggregateApproximation(n_paa_segments) dadosPaa = paa.inverse_transform(paa.fit_transform(dadosPaa)) df = pd.DataFrame() for i in range(len(dadosPaa.T)): for j in range(len(dadosPaa.T[0])): df[j] = dadosPaa.T[i][j] return df
# Plotting Graph plt.figure() graph_idx = 0 # Transform PAA, SAX, 1d-SAX, for stockCode in pos_relatedStock: dataset = dfpivot['v_updownpercent'][stockCode] scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform(
for i in range(len(ans)): ans[i] = np.where(ele == ans[i])[0][0] del ele return ans ################################################################# # 初始,根据原始数据计算新数据 # 从paa这里 # 需要在聚类前将训练数据划分完毕。ratio必须要精心选择使得paa的值能够成为整数 ratio = 0.9 n_paa_segments = 18 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_mid = paa.fit_transform(stdData[:, :int(ratio * stdData.shape[1])]) paa_mid = paa_mid.reshape(paa_mid.shape[0], paa_mid.shape[1]) first_clus = paa_mid.copy() for i in range(len(first_clus)): first_clus[i] = rankbased(paa_mid[i]) ################################################################# # 第一次聚类使用Birch跑出初始,然后使用Kmeans细分。数据使用rank-base # 改进:直接使用原始数据,调整Birch的threshold data = first_clus s = time.time() y_pre = Birch(n_clusters=None, threshold=getEpsilon(data, 0.8)).fit_predict(data) y_pre = KMeans(n_clusters=max(y_pre) + 1, random_state=0).fit_predict(data) e = time.time()
# 第一步:初步筛查。由于聚类本身的特性,所以有很多的数据其实根本就不需要考虑。 # 直接从每个元素开始对每个元素进行遍历,将其分为多个集合。(已经在一个集合里面的不用访问)所有能访问到的元素都放在一个集合里面。 # 然后再进行层次聚类 可以考虑自己实现 # 问题:没有本质区别 # 加速想法二:使用rank-base对一天的数据进行处理后直接Kmeans分100类,再对每一类进行区间聚类 ################################################################# # 初始,根据原始数据计算新数据 # 从paa这里 # 需要在聚类前将训练数据划分完毕。ratio必须要精心选择使得paa的值能够成为整数 ratio = 0.9 n_paa_segments = 18 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) originData = stdData[:, :int(ratio * stdData.shape[1])] #训练部分已经知道的原始数据 paa_mid = paa.fit_transform(originData) paa_mid = paa_mid.reshape(paa_mid.shape[0], paa_mid.shape[1]) baseData = paa.inverse_transform(paa_mid) #提取基线数据 restData = originData - baseData # 计算得到残差数据 # 模式提取(直接加和取平均后求rank-base处理,或者再做标准化进行SAX处理) # 初步想法:将每天24小时的流量重复叠加取平均,进行rank-base处理,然后跑MSE用Kmeans进行100聚类 # 想法二:在自己 # 对于100聚类中的每个聚类,再跑层次聚类进行细分,最小调到1。聚类结果衡量用类内最大相似度来进行衡量(有多不相近) # 使用SAX的dayPattern # 做法01:使用SAX提取前三天的残差信息,进行20聚类。对每个聚类内部跑complete,0.5的层次聚类。考虑到500量级要跑3分钟,平均大约是一个小时。 from sklearn.cluster import AgglomerativeClustering import time dayPattern = []
class ForestISAX: """ ForestISAX class containing one or more trees and pretreatment functions on the data contained in these trees :param int size_word: The size of the SAX words :param int threshold: The maximum threshold of nodes :param numpy.ndarray data_ts: The sequences to be inserted to extract the stats :param int base_cardinality: The smallest cardinality for encoding *i*\ SAX :param int number_tree: The number of TreeISAX trees in the forest :param list indices_partition: a list of index list where, for each tree, specifies the indices of sequences to be inserted :param int max_card_alphabet: if ``boolean_card_max == True``, the maximum cardinality of encoding *i*\ SAX in each of the trees :param boolean boolean_card_max: if ``== True``, Defines a maximum cardinality for encoding *i*\ SAX Sequences in each of the trees :ivar list length_partition: The length of the SAX words in each tree (``== [size_word]`` if ``number_tree == 1``) """ def __init__(self, size_word: int, threshold: int, data_ts: np_ndarray, base_cardinality: int = 2, number_tree: int = 1, indices_partition: list = None, max_card_alphabet: int = 128, boolean_card_max: bool = True): """ Initialization function of the TreeISAX class :returns: a forest pointing to one or more iSAX trees :rtype: ForestISAX """ # Number of cover contained in the SAX word self.size_word = size_word # threshold of split node self.threshold = threshold # Cardinality of each letter at level 1 of the tree self.base_cardinality = base_cardinality # Max cardinality self.max_cardinality = base_cardinality self._paa = PiecewiseAggregateApproximation(self.size_word) self.forest = {} self.number_tree = number_tree self.indices_partition = indices_partition self._init_trees(data_ts, max_card_alphabet, boolean_card_max) def _init_trees(self, data_ts: np_ndarray, max_card_alphabet: int, boolean_card_max: bool): """ Function that initializes the tree (s) when creating a ForestISAX object :param numpy.ndarray data_ts: The sequences to be inserted to extract the stats :param int max_card_alphabet: if ``boolean_card_max == True``, The maximum cardinality of encoding *i*\ SAX dans chacun des arbres :param boolean boolean_card_max: if ``boolean_card_max == True``, defines maximum cardinality for encoding *i*\ SAX sequences in each tree """ if self.number_tree == 1: """ if there is only one tree""" self.forest[0] = TreeISAX(size_word=self.size_word, threshold=self.threshold, data_ts=data_ts, base_cardinality=self.base_cardinality, max_card_alphabet=max_card_alphabet, boolean_card_max=boolean_card_max) self.length_partition = [self.size_word] self.indices_partition = [list(range(self.size_word))] elif self.indices_partition is None: """ If there is no tree and the indices are not defined """ self.length_partition = [int(self.size_word / self.number_tree) ] * self.number_tree for reste in range(self.size_word - sum(self.length_partition)): self.length_partition[reste] += 1 self.indices_partition = [] for i in range(self.number_tree): self.forest[i] = TreeISAX( size_word=self.length_partition[i], threshold=self.threshold, data_ts=data_ts[:, i:self.size_word:self.number_tree], base_cardinality=2, max_card_alphabet=max_card_alphabet, boolean_card_max=boolean_card_max) self.indices_partition.append( list(range(i, self.size_word, self.number_tree))) else: # List of letter number in each tree self.length_partition = [] for part_tmp in self.indices_partition: self.length_partition.append(len(part_tmp)) for i in range(self.number_tree): self.forest[i] = TreeISAX( size_word=self.length_partition[i], threshold=self.threshold, data_ts=data_ts[:, self.indices_partition[i]], base_cardinality=2, max_card_alphabet=max_card_alphabet, boolean_card_max=boolean_card_max) def index_data(self, new_sequences: np_ndarray): """ The Index_Data function allows you to insert a large number of sequences :param numpy.ndarray new_sequences: The sequences to be inserted :returns: The number of sequences (sub sequences) insert into the tree (in the trees) :rtype: numpy.array """ # Ts Conversion to PAA if new_sequences.shape[-1] > 1: # add dim to avoid tslearn warning new_sequences = new_sequences.reshape(new_sequences.shape + (1, )) npaa = self._paa.fit_transform(new_sequences) # To count the number of objects in each tree cmpt_insert = np_zeros(shape=self.number_tree) for i, tree in self.forest.items(): # Retrieves the indices of the tree, in the multi-tree case npaa_tmp = npaa[:, self.indices_partition[i]] npaa_tmp = npaa_tmp.reshape(npaa_tmp.shape[:-1]) for npa_tp in npaa_tmp: tree.insert_paa(npa_tp) cmpt_insert[i] += 1 # Returns array[tree_index] with the number of inserted objects for each tree return cmpt_insert def _count_nodes(self, id_tree: int): """ The _count_nodes function returns the number of nodes and leaf nodes for a given tree. Uses :func:`~pyCFOFiSAX.tree_iSAX.TreeISAX.count_nodes_by_tree`. :param int id_tree: The tree ID to be analyzed :returns: the number of internal nodes, the number of leaf nodes :rtype: int, int """ tree = self.forest[id_tree] return tree.count_nodes_by_tree() def list_nodes(self, id_tree: int, bool_print: bool = False): """ Returns lists of nodes and barycenters of the tree id_tree.Displays statistics on standard output if ``bool_print == True`` Uses :func:`~pyCFOFiSAX.tree_iSAX.TreeISAX.get_list_nodes_and_barycentre`. :param int id_tree: The tree ID to be analyzed :param boolean bool_print: Displays the nodes stats on the standard output :returns: The list of nodes, the list of internal nodes, the list of barycenters :rtype: list, list, list """ tree = self.forest[id_tree] node_list, node_list_leaf, node_leaf_ndarray_mean = tree.get_list_nodes_and_barycentre( ) if bool_print: print( f"{len(node_list)} nodes whose {len(node_list_leaf)} leafs in tree {id_tree}" ) return node_list, node_list_leaf, node_leaf_ndarray_mean def preprocessing_forest_for_icfof(self, ntss: np_ndarray, bool_print: bool = False, count_num_node: bool = False): """ Allows us to call, for the ``id_tree`` to the pre-treatment for the calculation *i*\ CFOF :param ntss: Reference sequences :param boolean bool_print: if True, displays the times of each pre-treatment step :param boolean count_num_node: if True, count the number of nodes :returns: if count_num_node, returns the number of nodes contained in each tree :rtype: numpy.array """ total_num_node = np_zeros(self.number_tree) for id_tree, tmp_tree in self.forest.items(): ntss_tmp = ntss[:, self.indices_partition[id_tree]] total_num_node[id_tree] = tmp_tree.preprocessing_for_icfof( ntss_tmp, bool_print=bool_print, count_num_node=count_num_node) if count_num_node: return total_num_node def number_nodes_visited(self, query: np_array, ntss: np_ndarray): """ Count the number of average visited nodes in each tree for calculating the approximation. :param numpy.array query: The sequence to be evaluated :param numpy.ndarray ntss: Reference sequences :returns: Returns the number of nodes visited in each tree for the approximation *i*\ CFOF :rtype: numpy.array """ total_num_node = np_zeros(self.number_tree * 2) for id_tree, tmp_tree in self.forest.items(): sub_query = query[self.indices_partition[id_tree]] ntss_tmp = np_array(ntss)[:, self.indices_partition[id_tree]] total_num_node[id_tree], total_num_node[self.number_tree + id_tree] = \ tmp_tree.number_nodes_visited(sub_query, ntss_tmp) return total_num_node
EDist_train = [] for i in range(len(y_train)): for j in range(len(y_train)): dist1 = np.sqrt( np.sum((np.array(X_train[i, :]) - np.array(X_train[j, :]))**2)) EDist_train.append(dist1) EDist_train = np.array(EDist_train) EDist_train.resize(y_train.shape[0], int(len(EDist_train) / y_train.shape[0])) EDist_test = np.array(EDist_test) EDist_test.resize(y_test.shape[0], int(len(EDist_test) / y_test.shape[0])) #PAA transform + PAA feature extraction paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) Xtrain_paa = paa.inverse_transform(paa.fit_transform(X_train)) Xtest_paa = paa.inverse_transform(paa.fit_transform(X_test)) PAA_test = Xtest_paa[:, :, 0] PAA_train = Xtrain_paa[:, :, 0] ''' #PAA distance calculation PAADist_train = [] PAADist_test = [] for i in range(len(y_train)): for j in range(len(y_train)): dist3 = paa.distance(Xtrain_paa[i,:],Xtest_paa[j,:]) PAADist_train.append(dist3)
print(list_new) df_red = df_new.set_index(['name', 'day']).dif.dropna() print(df_red) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series n_paa_segments = 10 n_sax_symbols = 10 n_sax_symbols_avg = 10 n_sax_symbols_slope = 6 for i in listnew: records = len(df_red[[i]]) print("stockname" + str(i)) scaleddata = scaler.fit_transform(df_red[[i]]) #print(scaleddata) paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata)) # SAX transform sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata)) # 1d-SAX transform one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(scaleddata)) plt.figure() # First, raw time series plt.subplot(2, 2, 1) plt.plot(scaleddata[0].ravel(), "b-")
# add columns' name df_price = pd.DataFrame(df_price, columns = day_features) dataset = df_price.values print("price feature sample: ") print(df_price.head()) # PAA transformation # PAA transform (and inverse transform) of the data n_paa_segments = 3 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_list = [] for item in df_price.values: item = item.reshape((1,5,1)) paa_price_inv = paa.inverse_transform(paa.fit_transform(item)) paa_list.append(paa_price_inv) paa_array = np.array(paa_list) paa_data = paa_array.reshape(1904, 5) paa_df = pd.DataFrame(paa_data, columns = day_features) print("save time series data after PAA") paa_df.to_csv("./paa_stock_data_time_series.csv", sep=',', encoding='utf-8') print("PAA sample: ") print(paa_df.head()) n_sax_symbols = 3 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
else: stdData[index][vi] = maxNum # 2.对去除后的数据进行归一化处理 #再进行一次归一化 from tslearn.preprocessing import TimeSeriesScalerMinMax scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) originStdData = stdData # 保存,为了日后恢复 stdData = scaler.fit_transform(stdData) # 3.然后进行PAA处理,得到基线和残余值 from tslearn.piecewise import PiecewiseAggregateApproximation n_paa_segments = 20 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_mid = paa.fit_transform(stdData) paa_inv = paa.inverse_transform(paa_mid) paa_inv = paa_inv.reshape(paa_inv.shape[0],paa_inv.shape[1]) # 4.对PAA后的数据进行简单k-means,聚类数量不超过10,分数按照CH分数判断,选出最大的 # 再进行rank-base处理,然后做简单聚类 from sklearn.cluster import MiniBatchKMeans,KMeans,DBSCAN,SpectralClustering,Birch from sklearn.metrics import calinski_harabasz_score,davies_bouldin_score n_cluster = 1000 s = time.time() km = KMeans(n_clusters = n_cluster,random_state = 0) y_pre = km.fit_predict(paa_inv) e = time.time() print(e-s,"s") print(calinski_harabasz_score(paa_inv,y_pre))
import matplotlib.pyplot as plt import pandas as pd import numpy as np from tslearn.piecewise import PiecewiseAggregateApproximation from tslearn.piecewise import SymbolicAggregateApproximation url ="C:/Users/Βασίλης/IdeaProjects/MyThesisApp/Data sets/Total_Vehicle_Sales.csv" df = pd.read_csv(url) series = np.array(df.Value) print(series) n_paa_segments = 4 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(series)) plt.plot(series.ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv.ravel(), "r-") n_sax_symbols = 4 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) print(sax) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(series)) print(sax_dataset_inv.ravel()) plt.plot(sax_dataset_inv.ravel(), "y-") plt.title("SAX, %d symbols" % n_sax_symbols) plt.show()
duration = 60 listFile = ut_lc.getListLight(height=height,duration=duration) data = ut_lc.getDataFromFile(fileName=listFile[339],height=height,duration=duration) lc_nor = TimeSeriesScalerMeanVariance(mu=0.,std=1.).fit_transform([data['instances']]) # data = ut_mdf.getDataFromFile("light_curve_Gaia-DR2_51856511715955968_date20191130") # data = ut_mdf.getDataFromFile("light_curve_Gaia-DR2_602712283908074752_date20200130") # lc_nor = TimeSeriesScalerMeanVariance(mu=0.,std=1.).fit_transform([data['instances']]) timestamps = data["timestamp"] # PAA transform (and inverse transform) of the data n_paa_segments = 8 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(lc_nor)) # SAX transform n_sax_symbols = 25 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(lc_nor)) # 1d-SAX transform n_sax_symbols_avg = 5 n_sax_symbols_slope = 5 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(lc_nor)
from tslearn.piecewise import PiecewiseAggregateApproximation from tslearn.piecewise import SymbolicAggregateApproximation, \ OneD_SymbolicAggregateApproximation np.random.seed(0) # Generate a random walk time series n_ts, sz, d = 1, 100, 1 dataset = random_walks(n_ts=n_ts, sz=sz, d=d) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 5 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) a = paa.fit_transform(dataset) paa_dataset_inv = paa.inverse_transform(dataset) plt.figure() plt.subplot(2, 1, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series") plt.subplot(2, 1, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA")