def _get_random_walk(): numpy.random.seed(0) # Generate a random walk time series n_ts, sz, d = 1, 100, 1 dataset = random_walks(n_ts=n_ts, sz=sz, d=d) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) return scaler.fit_transform(dataset)
def __generateRefPrice(self, curPrice, seedPrice, priceRange): priceMin = min(curPrice, seedPrice / 1.05 * (1 + numpy.random.uniform(-priceRange * 0.1, priceRange * 0.4))) priceMax = max(curPrice, seedPrice * 1.05 * (1 + numpy.random.uniform(-priceRange * 0.4, priceRange * 0.1))) data_len = numpy.random.randint(10000, 30000) # assert curPrice>=priceMin and curPrice<=priceMax,f"error: {curPrice}, {priceMin}, {priceMax}" def smooth_data(data): x = numpy.arange(0, len(data), 1) x_new = numpy.arange(0, max(x), 0.01) func = interpolate.interp1d(x, data, kind='quadratic') smoothed = func(x_new) return smoothed while True: dataset = random_walks(n_ts=1, sz=data_len * 2) scaler = TimeSeriesScalerMinMax(min=float(priceMin), max=float(priceMax)) dataset_scaled = scaler.fit_transform(dataset)[0, :, 0] for i in range(0, data_len): if abs(dataset_scaled[i] - curPrice) / curPrice < 0.001: # return list(smooth_data(dataset_scaled[i:i+data_len])) with open('price.txt', 'w+') as f: f.writelines([f'{p}\n' for p in dataset_scaled[i:i + data_len]]) return list(dataset_scaled[i:i + data_len])
import numpy import matplotlib.pyplot as plt from tslearn.generators import random_walks from tslearn.preprocessing import TimeSeriesScalerMeanVariance from tslearn.piecewise import PiecewiseAggregateApproximation from tslearn.piecewise import SymbolicAggregateApproximation, \ OneD_SymbolicAggregateApproximation numpy.random.seed(0) # Generate a random walk time series n_ts, sz, d = 1, 100, 1 dataset = random_walks(n_ts=n_ts, sz=sz, d=d) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 5 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 256 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) print("a") # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8
#%% from tslearn.clustering import TimeSeriesKMeans from tslearn.generators import random_walks from tslearn.utils import to_time_series_dataset #%% X = random_walks(n_ts=50, sz=32, d=1) km = TimeSeriesKMeans(n_clusters=3, metric="euclidean", max_iter=5, random_state=0).fit(X) km.cluster_centers_.shape #%% km_dba = TimeSeriesKMeans(n_clusters=3, metric="dtw", max_iter=5, max_iter_barycenter=5, random_state=0).fit(X) km_dba.cluster_centers_.shape #%% km_sdtw = TimeSeriesKMeans(n_clusters=3, metric="softdtw", max_iter=5, max_iter_barycenter=5, metric_params={ "gamma": .5 }, random_state=0).fit(X) km_sdtw.cluster_centers_.shape #%%
# Example 1 : Length of the arc between two angles on a circle def arc_length(angle_1, angle_2, r=1.): """Length of the arc between two angles (in rad) on a circle of radius r. """ # Compute the angle between the two inputs between 0 and 2*pi. theta = np.mod(angle_2 - angle_1, 2 * pi) if theta > pi: theta = theta - 2 * pi # Return the length of the arc L = r * np.abs(theta) return (L) dataset_1 = random_walks(n_ts=n_ts, sz=sz, d=1) scaler = TimeSeriesScalerMeanVariance(mu=0., std=pi) # Rescale the time series dataset_scaled_1 = scaler.fit_transform(dataset_1) # DTW using a function as the metric argument path_1, sim_1 = metrics.dtw_path_from_metric(dataset_scaled_1[0], dataset_scaled_1[1], metric=arc_length) # Example 2 : Hamming distance between 2 multi-dimensional boolean time series rw = random_walks(n_ts=n_ts, sz=sz, d=15, std=.3) dataset_2 = np.mod(np.floor(rw), 4) == 0 # DTW using one of the options of sklearn.metrics.pairwise_distances path_2, sim_2 = metrics.dtw_path_from_metric(dataset_2[0], dataset_2[1],
# In[14]: from tslearn.generators import random_walks from tslearn.clustering import TimeSeriesKMeans import pandas as pd import numpy as np import matplotlib.pyplot as plt # In[8]: X = random_walks(n_ts=50, sz=32, d=1, random_state= 0) # In[9]: km5 = TimeSeriesKMeans(n_clusters=5, metric="euclidean", max_iter=5,random_state=0).fit(X) km3 = TimeSeriesKMeans(n_clusters=3, metric="euclidean", max_iter=5,random_state=0).fit(X) # In[10]: km_dba3 = TimeSeriesKMeans(n_clusters=3, metric="dtw", max_iter=5, max_iter_barycenter=5,random_state=0).fit(X) km_dba4 = TimeSeriesKMeans(n_clusters=4, metric="dtw", max_iter=5, max_iter_barycenter=5,random_state=0).fit(X) km_dba5 = TimeSeriesKMeans(n_clusters=5, metric="dtw", max_iter=5, max_iter_barycenter=5,random_state=0).fit(X)
Conference on Data Engineering (ICDE '02). IEEE Computer Society, USA, 673. """ # Author: Daniela Duarte # License: BSD 3 clause import numpy import matplotlib.pyplot as plt from tslearn.generators import random_walks from tslearn.preprocessing import TimeSeriesScalerMeanVariance from tslearn import metrics numpy.random.seed(0) n_ts, sz, d = 2, 100, 1 dataset = random_walks(n_ts=n_ts, sz=sz, d=d, random_state=5) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset_scaled = scaler.fit_transform(dataset) lcss_path, sim_lcss = metrics.lcss_path(dataset_scaled[0, :, 0], dataset_scaled[1, :40, 0], eps=1.5) dtw_path, sim_dtw = metrics.dtw_path(dataset_scaled[0, :, 0], dataset_scaled[1, :40, 0]) plt.figure(1, figsize=(8, 8)) plt.plot(dataset_scaled[0, :, 0], "b-", label='First time series') plt.plot(dataset_scaled[1, :40, 0], "g-", label='Second time series') for positions in lcss_path:
from tslearn.clustering import KShape import matplotlib.pyplot as plt import time import numpy as np # code to test runtime of kshape # note on data: we will need to downsample and trim # lets assume we downsample to 1 hz and get the 100s containing the event fs = 2 snipLen = 300 numCluster = 10 # generate n 5 minute (300*fs) long time series n_vect = [10, 100, 1000, 10000] runtimes = np.zeros((len(n_vect), 1)) for n in range(len(n_vect)): timer = time.time() X = random_walks(n_ts=n_vect[n], sz=snipLen * fs, d=1) X = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X) ks = KShape(n_clusters=numCluster, n_init=1, random_state=0).fit(X) runtimes[n] = time.time() - timer print("Finished clustering (" + str(numCluster) + " clusters) for " + str(n_vect[n]) + " events in " + str(runtimes[n]) + " seconds") plt.scatter(n_vect, runtimes) plt.xscale('log') plt.xlabel("Number of waveforms") plt.ylabel("Runtime") plt.show()