Esempio n. 1
0
def test_serialize_knn():
    n, sz, d = 15, 10, 3
    rng = numpy.random.RandomState(0)
    X = rng.randn(n, sz, d)
    y = rng.randint(low=0, high=3, size=n)

    n_neighbors = 3

    knn = KNeighborsTimeSeries(n_neighbors=n_neighbors)

    _check_not_fitted(knn)

    knn.fit(X, y)

    _check_params_predict(knn, X, ['kneighbors'])
Esempio n. 2
0
def main():
    number_of_actions = len(mock_timestamps[0].values())

    print("MUSCLE 1")

    # CONSTRUCTS THE MODEL
    X1 = to_time_series_dataset(mock_dataset_muscle1)
    X_train1 = np.array(X1[:-1])
    X_test1 = np.array([X1[-1]])
    clf1 = KNeighborsTimeSeries(n_neighbors=3, metric="dtw")

    # IDENTIFIES THE NEIGHBOURS
    # Makes the row in question always the last row
    nbrs_indices1 = clf1.kneighbors(np.concatenate((X_train1, X_test1)),
                                    return_distance=False)
    nbrs_indices1 = nbrs_indices1[-1][
        1:]  # gets the closest neighbours of the test row, not including self in [0]
    print("NEIGHBOURS INDICES")
    print(nbrs_indices1)

    # CALCULATES THE AVERAGE TIMESTAMPS
    avg_timestamps_1 = np.zeros(number_of_actions)
    for nbrs_ind in nbrs_indices1:
        avg_timestamps_1 += np.array(list(mock_timestamps[nbrs_ind].values()))
    avg_timestamps_1 /= len(nbrs_indices1)
    print("AVERAGE TIMESTAMPS")
    print(avg_timestamps_1)

    print("\n")

    print("MUSCLE 2")

    # CONSTRUCTS THE MODEL
    X2 = to_time_series_dataset(mock_dataset_muscle2)
    X_train2 = np.array(X2[:-1])
    X_test2 = np.array([X2[-1]])
    clf2 = KNeighborsTimeSeries(n_neighbors=5, metric="dtw")

    # IDENTIFIES THE NEIGHBOURS
    # Makes the row in question always the last row
    nbrs_indices2 = clf2.kneighbors(np.concatenate((X_train2, X_test2)),
                                    return_distance=False)
    nbrs_indices2 = nbrs_indices2[-1][
        1:]  # gets the closest neighbours of the test row, not including self in [0]
    print("NEIGHBOURS INDICES")
    print(nbrs_indices1)

    # CALCULATES THE AVERAGE TIMESTAMPS
    avg_timestamps_2 = np.zeros(number_of_actions)
    for nbrs_ind in nbrs_indices2:
        avg_timestamps_2 += np.array(list(mock_timestamps[nbrs_ind].values()))
    avg_timestamps_2 /= len(nbrs_indices2)
    print("AVERAGE TIMESTAMPS")
    print(avg_timestamps_2)

    print("\n")

    # CALCULATE THE PREDICTION
    pred_avg_timestamp = (avg_timestamps_1 + avg_timestamps_2) / 2
    print("PREDICTED AVERAGE TIMESTAMP")
    print(pred_avg_timestamp)

    # CALCULATE THE DIFFERENCE FROM PREDICTION
    goal_timestamp = np.array(list(mock_timestamps[-1].values()))
    diff = sum(np.abs(goal_timestamp) - np.abs(pred_avg_timestamp))
    print("ACTUAL")
    print(goal_timestamp)
    print("DIFFERENCE FROM ACTUAL")
    print(diff)
                         d=d,
                         n_blobs=n_blobs)
scaler = TimeSeriesScalerMinMax(min=0., max=1.)  # Rescale time series
X_scaled = scaler.fit_transform(X)

indices_shuffle = numpy.random.permutation(n_ts_per_blob * n_blobs)
X_shuffle = X_scaled[indices_shuffle]
y_shuffle = y[indices_shuffle]

X_train = X_shuffle[:n_ts_per_blob * n_blobs // 2]
X_test = X_shuffle[n_ts_per_blob * n_blobs // 2:]
y_train = y_shuffle[:n_ts_per_blob * n_blobs // 2]
y_test = y_shuffle[n_ts_per_blob * n_blobs // 2:]

# Nearest neighbor search
knn = KNeighborsTimeSeries(n_neighbors=3, metric="dtw")
knn.fit(X_train, y_train)
dists, ind = knn.kneighbors(X_test)
print("1. Nearest neighbour search")
print("Computed nearest neighbor indices (wrt DTW)\n", ind)
print("First nearest neighbor class:", y_test[ind[:, 0]])

# Nearest neighbor classification
knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="dtw")
knn_clf.fit(X_train, y_train)
predicted_labels = knn_clf.predict(X_test)
print("\n2. Nearest neighbor classification using DTW")
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))

# Nearest neighbor classification with a different metric (Euclidean distance)
knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="euclidean")
Esempio n. 4
0
import numpy
import matplotlib.pyplot as plt

from tslearn.neighbors import KNeighborsTimeSeries
from tslearn.datasets import CachedDatasets

seed = 0
numpy.random.seed(seed)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")

print(X_train, y_train)

n_queries = 2
n_neighbors = 4

knn = KNeighborsTimeSeries(n_neighbors=n_neighbors)
knn.fit(X_train)
ind = knn.kneighbors(X_test[:n_queries], return_distance=False)

plt.figure()
for idx_ts in range(n_queries):
    plt.subplot(n_neighbors + 1, n_queries, idx_ts + 1)
    plt.plot(X_test[idx_ts].ravel(), "k-")
    plt.xticks([])
    for rank_nn in range(n_neighbors):
        plt.subplot(n_neighbors + 1, n_queries,
                    idx_ts + (n_queries * (rank_nn + 1)) + 1)
        plt.plot(X_train[ind[idx_ts, rank_nn]].ravel(), "r-")
        plt.xticks([])

plt.suptitle("Queries (in black) and their nearest neighbors (red)")
Esempio n. 5
0
def main(args):
    if args.data == 'simulation':
        window_size = 50
        path = './data/simulated_data/'
        n_cluster = 4
        augment = 5
    if args.data == 'wf':
        window_size = 2500
        path = './data/waveform_data/processed'
        n_cluster = 4
        augment = 500
    if args.data == 'har':
        window_size = 5
        path = './data/HAR_data/'
        n_cluster = 6
        augment = 100

    with open(os.path.join(path, 'x_train.pkl'), 'rb') as f:
        x = pickle.load(f)
    with open(os.path.join(path, 'state_train.pkl'), 'rb') as f:
        y = pickle.load(f)
    with open(os.path.join(path, 'x_test.pkl'), 'rb') as f:
        x_test = pickle.load(f)
    with open(os.path.join(path, 'state_test.pkl'), 'rb') as f:
        y_test = pickle.load(f)

    T = x.shape[-1]
    t = np.random.randint(window_size, T - window_size, len(x) * augment)
    x_window = np.array([
        x[i // augment, :, tt - window_size // 2:tt + window_size // 2]
        for i, tt in enumerate(t)
    ])
    y_window = np.round(
        np.mean(
            np.array([
                y[i // augment, tt - window_size // 2:tt + window_size // 2]
                for i, tt in enumerate(t)
            ]), -1))
    if args.data == 'wf':
        minority_index = np.logical_or(y_window == 1, y_window == 2)
        rand_index = np.random.randint(0, len(y_window), 200)
        y_window = np.concatenate(
            [y_window[minority_index], y_window[rand_index]], 0)
        x_window = np.concatenate(
            [x_window[minority_index], x_window[rand_index]], 0)
        x_window = x_window.transpose((0, 2, 1))  # shape:[n_samples, t_len, d]
        x_window = x_window[:, ::2, :]  # Decimate measurements for efficiency
    else:
        x_window = x_window.transpose((0, 2, 1))  # shape:[n_samples, t_len, d]

    t = np.random.randint(window_size, T - window_size, len(x_test) * augment)
    x_test_window = np.array([
        x_test[i // augment, :, tt - window_size // 2:tt + window_size // 2]
        for i, tt in enumerate(t)
    ])
    y_test_window = np.round(
        np.mean(
            np.array([
                y_test[i // augment,
                       tt - window_size // 2:tt + window_size // 2]
                for i, tt in enumerate(t)
            ]), -1))
    if 0:  #args.data =='wf':
        minority_index = np.logical_or(y_test_window == 1, y_test_window == 2)
        rand_index = np.random.randint(0, len(y_test_window), 150)
        y_test = np.concatenate(
            [y_test_window[minority_index], y_test_window[rand_index]], 0)
        x_test = np.concatenate(
            [x_test_window[minority_index], x_test_window[rand_index]], 0)
        x_test_window = x_test.transpose(
            (0, 2, 1))  # shape:[n_samples, t_len, d]
        x_test = x_test_window[:, ::
                               2, :]  # Decimate measurements for efficiency
    else:
        y_test = y_test_window
        x_test = x_test_window
        x_test = x_test.transpose((0, 2, 1))  # shape:[n_samples, t_len, d]

    accuracy, s_score, db_score, auc, auprc = [], [], [], [], []
    for cv in range(3):
        shuffled_inds = list(range(len(x_window)))
        random.shuffle(shuffled_inds)
        x_window = x_window[shuffled_inds]
        y_window = y_window[shuffled_inds]
        if args.data == 'wf':
            n_train = int(0.7 * len(x_window))
            x_train = x_window[:n_train]
            y_train = y_window[:n_train]
            x_test = x_window[n_train:]
            y_test = y_window[n_train:]
        else:
            x_train = x_window
            y_train = y_window

        knn = KNeighborsTimeSeries(n_neighbors=args.K,
                                   metric='dtw').fit(x_train)
        kmeans = TimeSeriesKMeans(n_clusters=n_cluster, metric='dtw')
        cluster_labels = kmeans.fit_predict(x_test)

        dist, ind = knn.kneighbors(x_test, return_distance=True)
        predictions = np.array(
            [y_train[np.bincount(preds).argmax()] for preds in ind])
        y_onehot = np.zeros((len(y_test), n_cluster))
        y_onehot[np.arange(len(y_onehot)), y_test.astype(int)] = 1
        prediction_onehot = np.zeros((len(y_test), n_cluster))
        prediction_onehot[np.arange(len(prediction_onehot)),
                          predictions.astype(int)] = 1

        accuracy.append(accuracy_score(y_test, predictions))
        auc.append(roc_auc_score(y_onehot, prediction_onehot))
        auprc.append(average_precision_score(y_onehot, prediction_onehot))
        s_score.append(
            silhouette_score(x_test.reshape((len(x_test), -1)),
                             cluster_labels))
        db_score.append(
            davies_bouldin_score(x_test.reshape((len(x_test), -1)),
                                 cluster_labels))

    print('\nSummary performance:')
    print('Accuracy: ', np.mean(accuracy) * 100, '+-', np.std(accuracy) * 100)
    print('AUC: ', np.mean(auc), '+-', np.std(auc))
    print('AUPRC: ', np.mean(auprc), '+-', np.std(auprc))
    print('Silhouette score: ', np.mean(s_score), '+-', np.std(s_score))
    print('Davies Bouldin score: ', np.mean(db_score), '+-', np.std(db_score))
Esempio n. 6
0
def softdtw_augment_train_set(x_train,
                              y_train,
                              classes,
                              num_synthetic_ts,
                              max_neighbors=5):
    from tslearn.neighbors import KNeighborsTimeSeries
    from tslearn.barycenters import softdtw_barycenter
    from tslearn.metrics import gamma_soft_dtw

    # synthetic train set and labels
    synthetic_x_train = []
    synthetic_y_train = []
    # loop through each class
    for c in classes:
        # get the MTS for this class
        c_x_train = x_train[np.where(y_train == 0)[0]]
        if len(c_x_train) == 1:
            # skip if there is only one time series per set
            continue
        # compute appropriate gamma for softdtw for the entire class
        class_gamma = gamma_soft_dtw(c_x_train)
        # loop through the number of synthtectic examples needed
        generated_samples = 0
        while generated_samples < num_synthetic_ts:
            # Choose a random representative for the class
            representative_indices = np.arange(len(c_x_train))
            random_representative_index = np.random.choice(
                representative_indices, size=1, replace=False)
            random_representative = c_x_train[random_representative_index]
            # Choose a random number of neighbors (between 1 and one minus the total number of class representatives)
            random_number_of_neighbors = int(
                np.random.uniform(1, max_neighbors, size=1))
            knn = KNeighborsTimeSeries(n_neighbors=random_number_of_neighbors +
                                       1,
                                       metric='softdtw',
                                       metric_params={
                                           'gamma': class_gamma
                                       }).fit(c_x_train)
            random_neighbor_distances, random_neighbor_indices = knn.kneighbors(
                X=random_representative, return_distance=True)
            random_neighbor_indices = random_neighbor_indices[0]
            random_neighbor_distances = random_neighbor_distances[0]
            nearest_neighbor_distance = np.sort(random_neighbor_distances)[1]
            # random_neighbors = np.zeros((random_number_of_neighbors+1, c_x_train.shape[1]), dtype=float)
            random_neighbors = np.zeros(
                (random_number_of_neighbors + 1, c_x_train.shape[1],
                 c_x_train.shape[2]),
                dtype=float)

            for j, neighbor_index in enumerate(random_neighbor_indices):
                random_neighbors[j, :] = c_x_train[neighbor_index]
            # Choose a random weight vector (and then normalize it)
            weights = np.exp(
                np.log(0.5) * random_neighbor_distances /
                nearest_neighbor_distance)
            weights /= np.sum(weights)
            # Compute tslearn.barycenters.softdtw_barycenter with weights=random weights and gamma value specific to neighbors
            random_neighbors_gamma = gamma_soft_dtw(random_neighbors)
            generated_sample = softdtw_barycenter(random_neighbors,
                                                  weights=weights,
                                                  gamma=random_neighbors_gamma)
            synthetic_x_train.append(generated_sample)
            synthetic_y_train.append(c)
            # Repeat until you have the desired number of synthetic samples for each class
            generated_samples += 1
    # return the synthetic set
    return np.array(synthetic_x_train), np.array(synthetic_y_train)