# Our pipeline consists of two phases. First, data will be normalized using # min-max normalization. Afterwards, it is fed to a KNN classifier. For the # KNN classifier, we tune the n_neighbors and weights hyper-parameters. n_splits = 3 pipeline = GridSearchCV(Pipeline([('normalize', TimeSeriesScalerMinMax()), ('knn', KNeighborsTimeSeriesClassifier())]), { 'knn__n_neighbors': [5, 25], 'knn__weights': ['uniform', 'distance'] }, cv=StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)) X_train, y_train, _, _ = CachedDatasets().load_dataset("Trace") # Keep only timeseries of class 1, 2, 3 X_train = X_train[y_train > 0] y_train = y_train[y_train > 0] # Keep only the first 50 timeseries of both train and # retain only a small amount of each of the timeseries X_train, y_train = X_train[:50, 50:150], y_train[:50] # Plot our timeseries colors = ['g', 'b', 'r'] plt.figure() for ts, label in zip(X_train, y_train): plt.plot(ts, c=colors[label - 2], alpha=0.5) plt.title('The timeseries in the dataset')
""" # Author: Romain Tavenard # License: BSD 3 clause import numpy import matplotlib.pyplot as plt from tslearn_cuda.not_used.tslearn.clustering import GlobalAlignmentKernelKMeans from tslearn_cuda.not_used.tslearn import sigma_gak from tslearn_cuda.not_used.tslearn.datasets import CachedDatasets from tslearn_cuda.not_used.tslearn.preprocessing import TimeSeriesScalerMeanVariance seed = 0 numpy.random.seed(seed) X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace") # Keep first 3 classes X_train = X_train[y_train < 4] numpy.random.shuffle(X_train) # Keep only 50 time series X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train[:50]) sz = X_train.shape[1] gak_km = GlobalAlignmentKernelKMeans(n_clusters=3, sigma=sigma_gak(X_train), n_init=20, verbose=True, random_state=seed) y_pred = gak_km.fit_predict(X_train) plt.figure()