def test_serialize_knn(): n, sz, d = 15, 10, 3 rng = numpy.random.RandomState(0) X = rng.randn(n, sz, d) y = rng.randint(low=0, high=3, size=n) n_neighbors = 3 knn = KNeighborsTimeSeries(n_neighbors=n_neighbors) _check_not_fitted(knn) knn.fit(X, y) _check_params_predict(knn, X, ['kneighbors'])
def main(): number_of_actions = len(mock_timestamps[0].values()) print("MUSCLE 1") # CONSTRUCTS THE MODEL X1 = to_time_series_dataset(mock_dataset_muscle1) X_train1 = np.array(X1[:-1]) X_test1 = np.array([X1[-1]]) clf1 = KNeighborsTimeSeries(n_neighbors=3, metric="dtw") # IDENTIFIES THE NEIGHBOURS # Makes the row in question always the last row nbrs_indices1 = clf1.kneighbors(np.concatenate((X_train1, X_test1)), return_distance=False) nbrs_indices1 = nbrs_indices1[-1][ 1:] # gets the closest neighbours of the test row, not including self in [0] print("NEIGHBOURS INDICES") print(nbrs_indices1) # CALCULATES THE AVERAGE TIMESTAMPS avg_timestamps_1 = np.zeros(number_of_actions) for nbrs_ind in nbrs_indices1: avg_timestamps_1 += np.array(list(mock_timestamps[nbrs_ind].values())) avg_timestamps_1 /= len(nbrs_indices1) print("AVERAGE TIMESTAMPS") print(avg_timestamps_1) print("\n") print("MUSCLE 2") # CONSTRUCTS THE MODEL X2 = to_time_series_dataset(mock_dataset_muscle2) X_train2 = np.array(X2[:-1]) X_test2 = np.array([X2[-1]]) clf2 = KNeighborsTimeSeries(n_neighbors=5, metric="dtw") # IDENTIFIES THE NEIGHBOURS # Makes the row in question always the last row nbrs_indices2 = clf2.kneighbors(np.concatenate((X_train2, X_test2)), return_distance=False) nbrs_indices2 = nbrs_indices2[-1][ 1:] # gets the closest neighbours of the test row, not including self in [0] print("NEIGHBOURS INDICES") print(nbrs_indices1) # CALCULATES THE AVERAGE TIMESTAMPS avg_timestamps_2 = np.zeros(number_of_actions) for nbrs_ind in nbrs_indices2: avg_timestamps_2 += np.array(list(mock_timestamps[nbrs_ind].values())) avg_timestamps_2 /= len(nbrs_indices2) print("AVERAGE TIMESTAMPS") print(avg_timestamps_2) print("\n") # CALCULATE THE PREDICTION pred_avg_timestamp = (avg_timestamps_1 + avg_timestamps_2) / 2 print("PREDICTED AVERAGE TIMESTAMP") print(pred_avg_timestamp) # CALCULATE THE DIFFERENCE FROM PREDICTION goal_timestamp = np.array(list(mock_timestamps[-1].values())) diff = sum(np.abs(goal_timestamp) - np.abs(pred_avg_timestamp)) print("ACTUAL") print(goal_timestamp) print("DIFFERENCE FROM ACTUAL") print(diff)
d=d, n_blobs=n_blobs) scaler = TimeSeriesScalerMinMax(min=0., max=1.) # Rescale time series X_scaled = scaler.fit_transform(X) indices_shuffle = numpy.random.permutation(n_ts_per_blob * n_blobs) X_shuffle = X_scaled[indices_shuffle] y_shuffle = y[indices_shuffle] X_train = X_shuffle[:n_ts_per_blob * n_blobs // 2] X_test = X_shuffle[n_ts_per_blob * n_blobs // 2:] y_train = y_shuffle[:n_ts_per_blob * n_blobs // 2] y_test = y_shuffle[n_ts_per_blob * n_blobs // 2:] # Nearest neighbor search knn = KNeighborsTimeSeries(n_neighbors=3, metric="dtw") knn.fit(X_train, y_train) dists, ind = knn.kneighbors(X_test) print("1. Nearest neighbour search") print("Computed nearest neighbor indices (wrt DTW)\n", ind) print("First nearest neighbor class:", y_test[ind[:, 0]]) # Nearest neighbor classification knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="dtw") knn_clf.fit(X_train, y_train) predicted_labels = knn_clf.predict(X_test) print("\n2. Nearest neighbor classification using DTW") print("Correct classification rate:", accuracy_score(y_test, predicted_labels)) # Nearest neighbor classification with a different metric (Euclidean distance) knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="euclidean")
import numpy import matplotlib.pyplot as plt from tslearn.neighbors import KNeighborsTimeSeries from tslearn.datasets import CachedDatasets seed = 0 numpy.random.seed(seed) X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace") print(X_train, y_train) n_queries = 2 n_neighbors = 4 knn = KNeighborsTimeSeries(n_neighbors=n_neighbors) knn.fit(X_train) ind = knn.kneighbors(X_test[:n_queries], return_distance=False) plt.figure() for idx_ts in range(n_queries): plt.subplot(n_neighbors + 1, n_queries, idx_ts + 1) plt.plot(X_test[idx_ts].ravel(), "k-") plt.xticks([]) for rank_nn in range(n_neighbors): plt.subplot(n_neighbors + 1, n_queries, idx_ts + (n_queries * (rank_nn + 1)) + 1) plt.plot(X_train[ind[idx_ts, rank_nn]].ravel(), "r-") plt.xticks([]) plt.suptitle("Queries (in black) and their nearest neighbors (red)")
def main(args): if args.data == 'simulation': window_size = 50 path = './data/simulated_data/' n_cluster = 4 augment = 5 if args.data == 'wf': window_size = 2500 path = './data/waveform_data/processed' n_cluster = 4 augment = 500 if args.data == 'har': window_size = 5 path = './data/HAR_data/' n_cluster = 6 augment = 100 with open(os.path.join(path, 'x_train.pkl'), 'rb') as f: x = pickle.load(f) with open(os.path.join(path, 'state_train.pkl'), 'rb') as f: y = pickle.load(f) with open(os.path.join(path, 'x_test.pkl'), 'rb') as f: x_test = pickle.load(f) with open(os.path.join(path, 'state_test.pkl'), 'rb') as f: y_test = pickle.load(f) T = x.shape[-1] t = np.random.randint(window_size, T - window_size, len(x) * augment) x_window = np.array([ x[i // augment, :, tt - window_size // 2:tt + window_size // 2] for i, tt in enumerate(t) ]) y_window = np.round( np.mean( np.array([ y[i // augment, tt - window_size // 2:tt + window_size // 2] for i, tt in enumerate(t) ]), -1)) if args.data == 'wf': minority_index = np.logical_or(y_window == 1, y_window == 2) rand_index = np.random.randint(0, len(y_window), 200) y_window = np.concatenate( [y_window[minority_index], y_window[rand_index]], 0) x_window = np.concatenate( [x_window[minority_index], x_window[rand_index]], 0) x_window = x_window.transpose((0, 2, 1)) # shape:[n_samples, t_len, d] x_window = x_window[:, ::2, :] # Decimate measurements for efficiency else: x_window = x_window.transpose((0, 2, 1)) # shape:[n_samples, t_len, d] t = np.random.randint(window_size, T - window_size, len(x_test) * augment) x_test_window = np.array([ x_test[i // augment, :, tt - window_size // 2:tt + window_size // 2] for i, tt in enumerate(t) ]) y_test_window = np.round( np.mean( np.array([ y_test[i // augment, tt - window_size // 2:tt + window_size // 2] for i, tt in enumerate(t) ]), -1)) if 0: #args.data =='wf': minority_index = np.logical_or(y_test_window == 1, y_test_window == 2) rand_index = np.random.randint(0, len(y_test_window), 150) y_test = np.concatenate( [y_test_window[minority_index], y_test_window[rand_index]], 0) x_test = np.concatenate( [x_test_window[minority_index], x_test_window[rand_index]], 0) x_test_window = x_test.transpose( (0, 2, 1)) # shape:[n_samples, t_len, d] x_test = x_test_window[:, :: 2, :] # Decimate measurements for efficiency else: y_test = y_test_window x_test = x_test_window x_test = x_test.transpose((0, 2, 1)) # shape:[n_samples, t_len, d] accuracy, s_score, db_score, auc, auprc = [], [], [], [], [] for cv in range(3): shuffled_inds = list(range(len(x_window))) random.shuffle(shuffled_inds) x_window = x_window[shuffled_inds] y_window = y_window[shuffled_inds] if args.data == 'wf': n_train = int(0.7 * len(x_window)) x_train = x_window[:n_train] y_train = y_window[:n_train] x_test = x_window[n_train:] y_test = y_window[n_train:] else: x_train = x_window y_train = y_window knn = KNeighborsTimeSeries(n_neighbors=args.K, metric='dtw').fit(x_train) kmeans = TimeSeriesKMeans(n_clusters=n_cluster, metric='dtw') cluster_labels = kmeans.fit_predict(x_test) dist, ind = knn.kneighbors(x_test, return_distance=True) predictions = np.array( [y_train[np.bincount(preds).argmax()] for preds in ind]) y_onehot = np.zeros((len(y_test), n_cluster)) y_onehot[np.arange(len(y_onehot)), y_test.astype(int)] = 1 prediction_onehot = np.zeros((len(y_test), n_cluster)) prediction_onehot[np.arange(len(prediction_onehot)), predictions.astype(int)] = 1 accuracy.append(accuracy_score(y_test, predictions)) auc.append(roc_auc_score(y_onehot, prediction_onehot)) auprc.append(average_precision_score(y_onehot, prediction_onehot)) s_score.append( silhouette_score(x_test.reshape((len(x_test), -1)), cluster_labels)) db_score.append( davies_bouldin_score(x_test.reshape((len(x_test), -1)), cluster_labels)) print('\nSummary performance:') print('Accuracy: ', np.mean(accuracy) * 100, '+-', np.std(accuracy) * 100) print('AUC: ', np.mean(auc), '+-', np.std(auc)) print('AUPRC: ', np.mean(auprc), '+-', np.std(auprc)) print('Silhouette score: ', np.mean(s_score), '+-', np.std(s_score)) print('Davies Bouldin score: ', np.mean(db_score), '+-', np.std(db_score))
def softdtw_augment_train_set(x_train, y_train, classes, num_synthetic_ts, max_neighbors=5): from tslearn.neighbors import KNeighborsTimeSeries from tslearn.barycenters import softdtw_barycenter from tslearn.metrics import gamma_soft_dtw # synthetic train set and labels synthetic_x_train = [] synthetic_y_train = [] # loop through each class for c in classes: # get the MTS for this class c_x_train = x_train[np.where(y_train == 0)[0]] if len(c_x_train) == 1: # skip if there is only one time series per set continue # compute appropriate gamma for softdtw for the entire class class_gamma = gamma_soft_dtw(c_x_train) # loop through the number of synthtectic examples needed generated_samples = 0 while generated_samples < num_synthetic_ts: # Choose a random representative for the class representative_indices = np.arange(len(c_x_train)) random_representative_index = np.random.choice( representative_indices, size=1, replace=False) random_representative = c_x_train[random_representative_index] # Choose a random number of neighbors (between 1 and one minus the total number of class representatives) random_number_of_neighbors = int( np.random.uniform(1, max_neighbors, size=1)) knn = KNeighborsTimeSeries(n_neighbors=random_number_of_neighbors + 1, metric='softdtw', metric_params={ 'gamma': class_gamma }).fit(c_x_train) random_neighbor_distances, random_neighbor_indices = knn.kneighbors( X=random_representative, return_distance=True) random_neighbor_indices = random_neighbor_indices[0] random_neighbor_distances = random_neighbor_distances[0] nearest_neighbor_distance = np.sort(random_neighbor_distances)[1] # random_neighbors = np.zeros((random_number_of_neighbors+1, c_x_train.shape[1]), dtype=float) random_neighbors = np.zeros( (random_number_of_neighbors + 1, c_x_train.shape[1], c_x_train.shape[2]), dtype=float) for j, neighbor_index in enumerate(random_neighbor_indices): random_neighbors[j, :] = c_x_train[neighbor_index] # Choose a random weight vector (and then normalize it) weights = np.exp( np.log(0.5) * random_neighbor_distances / nearest_neighbor_distance) weights /= np.sum(weights) # Compute tslearn.barycenters.softdtw_barycenter with weights=random weights and gamma value specific to neighbors random_neighbors_gamma = gamma_soft_dtw(random_neighbors) generated_sample = softdtw_barycenter(random_neighbors, weights=weights, gamma=random_neighbors_gamma) synthetic_x_train.append(generated_sample) synthetic_y_train.append(c) # Repeat until you have the desired number of synthetic samples for each class generated_samples += 1 # return the synthetic set return np.array(synthetic_x_train), np.array(synthetic_y_train)