Beispiel #1
0
def main():
    X1 = to_time_series_dataset(mock_dataset_muscle1)
    y1 = mock_labels
    X_train1 = X1[:-2]
    y_train1 = y1[:-2]
    X_test1 = X1[-2:]
    y_test1 = y1[-2:]
    # clf1 = KNeighborsTimeSeriesClassifier(n_neighbors=5, metric="dtw")
    clf1 = TimeSeriesKMeans(metric="dtw")
    clf1.fit(X_train1, y_train1)
    pred_train1 = clf1.predict(X_train1)
    pred_test1 = clf1.predict(X_test1)
    print("TRAINING SET 1")
    print("Prediction: " + str(pred_test1))
    print("Actual: " + str(y_test1))

    print("\n")

    X2 = to_time_series_dataset(mock_dataset_muscle2)
    y2 = mock_labels
    X_train2 = X2[:-2]
    y_train2 = y2[:-2]
    X_test2 = X2[-2:]
    y_test2 = y2[-2:]
    clf2 = TimeSeriesKMeans(metric="dtw")
    # clf2 = KNeighborsTimeSeriesClassifier(n_neighbors=5, metric="dtw")
    clf2.fit(X_train2, y_train2)
    pred_train2 = clf2.predict(X_train2)
    pred_test2 = clf2.predict(X_test2)
    print("TRAINING SET 2")
    print("Prediction: " + str(pred_test2))
    print("Actual: " + str(y_test2))

    print("\n")

    times_train = mock_times[:-2]
    times_test = mock_times[-2:]
    X_train = np.stack((pred_train1, pred_train2, times_train)).transpose()
    X_test = np.stack((pred_test1, pred_test2, times_test)).transpose()
    y_train = np.array(mock_labels[:-2]).reshape((len(X_train), ))
    y_test = mock_labels[-2:]
    sgd = SGDClassifier()
    sgd.fit(X_train, y_train)
    pred = sgd.predict(X_test)
    print("ENSEMBLE")
    print("Prediction: " + str(pred))
    print("Actual: " + str(y_test))
    print("Score: " + str(sgd.score(X_test, y_test)))
def get_dds_km(cl_lab, ds, z='LEV0', h0=0, h1=24, nc=4):
    # %%
    dds = get_ds_for_dtw_kmeans(cl_lab, ds, z, h0=h0, h1=h1)[C18]
    from tslearn.clustering import TimeSeriesKMeans
    from tslearn.metrics import dtw
    km = TimeSeriesKMeans(nc,
                          metric='dtw',
                          metric_params={'sakoe_chiba_radius': 4},
                          random_state=789)
    km.fit(dds.values)
    _ = km.cluster_centers_
    for c in _:
        plt.plot(c)
    plt.show()
    # %%
    labs = km.predict(dds.values)
    lb = xr.zeros_like(dds['date'], dtype=int) + labs
    # %%
    dds['labs'] = lb
    # %%
    dds['labs'].reset_coords(drop=True). \
        to_dataframe()['labs'].value_counts(). \
        sort_index(). \
        plot.bar()
    plt.show()
    # %%
    # dds['hour'] = xr.zeros_like(dds['time'], dtype=float) + \
    #               np.arange(0, 24, .5)
    # dds['nday'] = xr.zeros_like(
    #     dds['date'], dtype=int) + \
    #               np.arange(len(dds['date']))
    # dds = dds.swap_dims({'date': 'nday'})
    # dds = dds.swap_dims({'time': 'hour'})
    return dds, km
Beispiel #3
0
def get_cluster_labels(actions, x, n_clusters):
    km = TimeSeriesKMeans(n_clusters=n_clusters, metric='dtw').fit(x['train'])
    actions_split = {}
    for type in ['train', 'dev', 'test']:
        actions_split[type] = actions[actions['type'] == type]
        labels = km.predict(x[type])
        actions_split[type].loc[:, 'label'] = labels
    actions = pd.concat(
        [actions_split[type] for type in ['train', 'dev', 'test']])
    return actions
Beispiel #4
0
def run():
    parser = cli_parser()
    args = parser.parse_args()

    nii = image.index_img(args.input, slice(0, 30))
    masker = input_data.NiftiMasker()
    data = masker.fit_transform(nii)
    ds = to_time_series_dataset(data.T[::80, :])

    model = TimeSeriesKMeans(n_clusters=2, metric="dtw", max_iter=15)
    model.fit(ds)

    all = to_time_series_dataset(data.T)

    mask = model.predict(all)
    mask_nii = masker.inverse_transform(mask)
    mask.nii.to_filename(args.output)
Beispiel #5
0
#clustering
from tslearn.clustering import TimeSeriesKMeans, KernelKMeans, silhouette_score
#fit the algorithm on train data
#tune the hyperparameters possible metrics: euclidean, dtw, softdtw
km_dba = TimeSeriesKMeans(n_clusters=4,
                          metric="softdtw",
                          max_iter=5,
                          max_iter_barycenter=5,
                          random_state=0).fit(multivariate_time_series_train)
km_dba.cluster_centers_.shape
#prediction on train data
prediction_train = km_dba.fit_predict(multivariate_time_series_train, y=None)
len(prediction_train)
#prediction on test data
prediction_test = km_dba.predict(multivariate_time_series_test)
len(prediction_test)
prediction_test

#accuracy of the clustering on the train data
silhouette_score(multivariate_time_series_train,
                 prediction_train,
                 metric="softdtw")
#accuracy of the clustering on the test data
silhouette_score(multivariate_time_series_test,
                 prediction_test,
                 metric="softdtw")

############################################ k=2 #########################################
#select randomly time series from first cluster
def subseqeuence_clustering(sequence, changepoints, y_label='y', norm=False):
    """
    Clusters subsequences of time series indicated by the changepoints variable.
    Uses silhouette score to determine the number of clusters
    :param y_label: Name of y-label in plot
    :param norm: normlise data using MinMaxScaler
    :param sequence: np array of the time series
    :param changepoints: detected changepoints on which subseuqences are build
    :return:
    """
    from tslearn.clustering import TimeSeriesKMeans, silhouette_score
    from tslearn.utils import to_time_series_dataset
    from tslearn.preprocessing import TimeSeriesScalerMinMax

    sub_ids = []
    x_index = []
    X = []
    i = 0
    end_p = [len(sequence) - 1]
    for cp in changepoints + end_p:
        X.append(sequence[i:cp])
        index = 'sub_' + str(i) + '_' + str(cp)
        sub_ids.append(index)
        x_index.append([x_id for x_id in range(i, cp + 1)])
        i = cp

    # Normalize the data (y = (x - min) / (max - min))
    if norm:
        X = TimeSeriesScalerMinMax().fit_transform(X)
    X = to_time_series_dataset(X)
    #  Find optimal # clusters by
    #  looping through different configurations for # of clusters and store the respective values for silhouette:
    sil_scores = {}
    for n in range(2, len(changepoints)):
        model_tst = TimeSeriesKMeans(n_clusters=n, metric="dtw", n_init=10)
        model_tst.fit(X)
        sil_scores[n] = (silhouette_score(X,
                                          model_tst.predict(X),
                                          metric="dtw"))

    opt_k = max(sil_scores, key=sil_scores.get)
    print('Number of Clusters in subsequence clustering: ' + str(opt_k))
    model = TimeSeriesKMeans(n_clusters=opt_k, metric="dtw", n_init=10)
    labels = model.fit_predict(X)
    print(labels)

    # build helper df to map metrics to their cluster labels
    df_cluster = pd.DataFrame(list(zip(sub_ids, x_index, model.labels_)),
                              columns=['metric', 'x_index', 'cluster'])
    cluster_metrics_dict = df_cluster.groupby(
        ['cluster'])['metric'].apply(lambda x: [x for x in x]).to_dict()

    print('Plotting Clusters')
    #  plot changepoints as vertical lines
    for cp in changepoints:
        plt.axvline(x=cp, ls=':', lw=2, c='0.65')
    #  preprocessing for plotting cluster based
    x_scat = []
    y_scat = []
    cluster = []
    for index, row in df_cluster.iterrows():
        x_seq = row['x_index']
        x_scat.extend(x_seq)
        y_seq = sequence[x_seq[0]:x_seq[-1] + 1]
        y_scat.extend(y_seq)
        label_seq = [row['cluster']]
        cluster.extend(label_seq * len(x_seq))
        # plt.scatter(x_seq, y_seq, label=label_seq)
    # plotting cluster based
    x_scat = np.array(x_scat)
    y_scat = np.array(y_scat)
    for c in np.unique(cluster):
        i = np.where(cluster == c)
        plt.scatter(x_scat[i], y_scat[i], label=c)
    plt.legend()
    plt.title('Subsequence k-means Clustering')
    plt.xlabel('Time index')
    plt.ylabel(y_label)
    plt.show()

    return cluster_metrics_dict
Beispiel #7
0
def test_kmeans():
    n, sz, d = 15, 10, 3
    rng = np.random.RandomState(0)
    time_series = rng.randn(n, sz, d)

    km = TimeSeriesKMeans(n_clusters=3, metric="euclidean", max_iter=5,
                          verbose=False, random_state=rng).fit(time_series)
    dists = cdist(time_series.reshape((n, -1)),
                  km.cluster_centers_.reshape((3, -1)))
    np.testing.assert_allclose(km.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km.labels_, km.predict(time_series))

    km_dba = TimeSeriesKMeans(n_clusters=3,
                              metric="dtw",
                              max_iter=5,
                              verbose=False,
                              random_state=rng).fit(time_series)
    dists = cdist_dtw(time_series, km_dba.cluster_centers_)
    np.testing.assert_allclose(km_dba.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km_dba.labels_, km_dba.predict(time_series))

    km_sdtw = TimeSeriesKMeans(n_clusters=3,
                               metric="softdtw",
                               max_iter=5,
                               verbose=False,
                               random_state=rng).fit(time_series)
    dists = cdist_soft_dtw(time_series, km_sdtw.cluster_centers_)
    np.testing.assert_allclose(km_sdtw.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km_sdtw.labels_, km_sdtw.predict(time_series))

    km_nofit = TimeSeriesKMeans(n_clusters=101,
                                verbose=False,
                                random_state=rng).fit(time_series)
    assert(km_nofit._X_fit is None)

    X_bis = to_time_series_dataset([[1, 2, 3, 4],
                                    [1, 2, 3],
                                    [2, 5, 6, 7, 8, 9]])
    TimeSeriesKMeans(n_clusters=2, verbose=False, max_iter=5,
                     metric="softdtw", random_state=0).fit(X_bis)
    TimeSeriesKMeans(n_clusters=2, verbose=False, max_iter=5,
                     metric="dtw", random_state=0,
                     init="random").fit(X_bis)
    TimeSeriesKMeans(n_clusters=2, verbose=False, max_iter=5,
                     metric="dtw", random_state=0,
                     init="k-means++").fit(X_bis)
    TimeSeriesKMeans(n_clusters=2, verbose=False, max_iter=5,
                     metric="dtw", init=X_bis[:2]).fit(X_bis)

    # Barycenter size (nb of timestamps)
    # Case 1. kmeans++ / random init
    n, sz, d = 15, 10, 1
    n_clusters = 3
    time_series = rng.randn(n, sz, d)

    sizes_all_same_series = [sz] * n_clusters
    km_euc = TimeSeriesKMeans(n_clusters=3,
                              metric="euclidean",
                              max_iter=5,
                              verbose=False,
                              init="k-means++",
                              random_state=rng).fit(time_series)
    np.testing.assert_equal(sizes_all_same_series,
                            [ts_size(b) for b in km_euc.cluster_centers_])
    km_dba = TimeSeriesKMeans(n_clusters=3,
                              metric="dtw",
                              max_iter=5,
                              verbose=False,
                              init="random",
                              random_state=rng).fit(time_series)
    np.testing.assert_equal(sizes_all_same_series,
                            [ts_size(b) for b in km_dba.cluster_centers_])

    # Case 2. forced init
    barys = to_time_series_dataset([[1., 2., 3.],
                                    [1., 2., 2., 3., 4.],
                                    [3., 2., 1.]])
    sizes_all_same_bary = [barys.shape[1]] * n_clusters
    # If Euclidean is used, barycenters size should be that of the input series
    km_euc = TimeSeriesKMeans(n_clusters=3,
                              metric="euclidean",
                              max_iter=5,
                              verbose=False,
                              init=barys,
                              random_state=rng)
    np.testing.assert_raises(ValueError, km_euc.fit, time_series)

    km_dba = TimeSeriesKMeans(n_clusters=3,
                              metric="dtw",
                              max_iter=5,
                              verbose=False,
                              init=barys,
                              random_state=rng).fit(time_series)
    np.testing.assert_equal(sizes_all_same_bary,
                            [ts_size(b) for b in km_dba.cluster_centers_])
    km_sdtw = TimeSeriesKMeans(n_clusters=3,
                               metric="softdtw",
                               max_iter=5,
                               verbose=False,
                               init=barys,
                               random_state=rng).fit(time_series)
    np.testing.assert_equal(sizes_all_same_bary,
                            [ts_size(b) for b in km_sdtw.cluster_centers_])

    # A simple dataset, can we extract the correct number of clusters?
    time_series = to_time_series_dataset([[1, 2, 3],
                                   [7, 8, 9, 11],
                                   [.1, .2, 2.],
                                   [1, 1, 1, 9],
                                   [10, 20, 30, 1000]])
    preds = TimeSeriesKMeans(n_clusters=3, metric="dtw", max_iter=5,
                             random_state=rng).fit_predict(time_series)
    np.testing.assert_equal(set(preds), set(range(3)))
    preds = TimeSeriesKMeans(n_clusters=4, metric="dtw", max_iter=5,
                             random_state=rng).fit_predict(time_series)
    np.testing.assert_equal(set(preds), set(range(4)))
def k_means_clustering(sd_log):
    """
    k_means clustering of all features using dtw for multivariate time series
    :param sd_log: sd_log object
    :return: cluster_metrics_dict: dict with clusters as key and features as values
    """
    from tslearn.clustering import TimeSeriesKMeans, silhouette_score
    from tslearn.utils import to_time_series_dataset
    from tslearn.preprocessing import TimeSeriesScalerMinMax

    data = sd_log.data
    # TODO handle outliers
    tmp = sd_log.waiting_time
    data.drop(columns=[sd_log.waiting_time], inplace=True)
    X = []
    # Get data as numpy array
    for col in data.columns:
        X.append(sd_log.get_points(col))

    # Normalize the data (y = (x - min) / (max - min))
    data_norm = data.copy()
    for column in data_norm.columns:
        data_norm[column] = (data_norm[column] - data_norm[column].min()) / (
            data_norm[column].max() - data_norm[column].min())

    X = TimeSeriesScalerMinMax().fit_transform(X)
    X = to_time_series_dataset(X)

    #  Find optimal # clusters by
    #  looping through different configurations for # of clusters and store the respective values for silhouette:
    sil_scores = {}
    for n in range(2, len(data.columns)):
        model_tst = TimeSeriesKMeans(n_clusters=n, metric="dtw", n_init=10)
        model_tst.fit(X)
        sil_scores[n] = (silhouette_score(X,
                                          model_tst.predict(X),
                                          metric="dtw"))

    opt_k = max(sil_scores, key=sil_scores.get)
    model = TimeSeriesKMeans(n_clusters=opt_k, metric="dtw", n_init=10)
    labels = model.fit_predict(X)
    print(labels)

    # build helper df to map metrics to their cluster labels
    df_cluster = pd.DataFrame(list(zip(data.columns, model.labels_)),
                              columns=['metric', 'cluster'])

    # make some helper dictionaries and lists
    cluster_metrics_dict = df_cluster.groupby(
        ['cluster'])['metric'].apply(lambda x: [x for x in x]).to_dict()
    cluster_len_dict = df_cluster['cluster'].value_counts().to_dict()
    clusters_dropped = [
        cluster for cluster in cluster_len_dict
        if cluster_len_dict[cluster] == 1
    ]
    clusters_final = [
        cluster for cluster in cluster_len_dict
        if cluster_len_dict[cluster] > 1
    ]

    print('Plotting Clusters')

    fig, axs = plt.subplots(opt_k)  # , figsize=(10, 5))
    # fig.suptitle('Clusters')
    row_i = 0
    # column_j = 0
    # For each label there is,
    # plots every series with that label
    for cluster in cluster_metrics_dict:
        for feat in cluster_metrics_dict[cluster]:
            axs[row_i].plot(data_norm[feat], label=feat, alpha=0.4)
            axs[row_i].legend(loc="best")
        if len(cluster_metrics_dict[cluster]) > 100:
            # TODO draw mean in red if more than one cluster
            tmp = np.nanmean(np.vstack(cluster), axis=1)
            axs[row_i].plot(tmp, c="red")
        axs[row_i].set_title("Cluster " + str(cluster))
        row_i += 1
        # column_j += 1
        # if column_j % k == 0:
        #    row_i += 1
        #    column_j = 0
    plt.show()

    # return dict {cluster_id: features}
    return cluster_metrics_dict
        X_train = TimeSeriesScalerMeanVariance(mu=0., std=1.) \
            .fit_transform(X_train)
        X_test = TimeSeriesScalerMeanVariance(mu=0., std=1.) \
            .fit_transform(X_test)
        classes = len(np.unique(data_train[:, 0]))
        km = TimeSeriesKMeans(n_clusters=5,
                              max_iter=10,
                              n_init=10,
                              metric="euclidean",
                              verbose=0,
                              random_state=2019)
        km.fit(X_train)

        print(i, file=f)
        preds = km.predict(X_train)
        ars = adjusted_rand_score(data_train[:, 0], preds)
        print("Adjusted Rand Index on Training Set:", ars, file=f)
        kMeansDF.loc[i, "Train ARS"] = ars

        preds_test = km.predict(X_test)
        ars = adjusted_rand_score(data_test[:, 0], preds_test)
        print("Adjusted Rand Index on Test Set:", ars, file=f)
        kMeansDF.loc[i, "Test ARS"] = ars
        print(file=f)
    kMeansTime = timer.elapsed_time()

    print("Time to Run k-Means Experiment in Minutes:",
          kMeansTime / 60,
          file=f)
    kMeansDF.to_pickle(
Beispiel #10
0
for i in range(0,len(variance_perc)):
    sum(variance_perc[0:i])
    if sum(variance_perc[0:i])>=90:
        break
print ("Components accounting for <=90% of variance : " + str(i))
components=i

##--------------------------------------Cluster analysis----------------------------------------
##for theory, see https://scikit-learn.org/stable/modules/clustering.html
##for parameters setting, https://tslearn.readthedocs.io/en/stable/gen_modules/clustering/tslearn.clustering.TimeSeriesKMeans.html
# Euclidean k-means
print("Euclidean k-means")
km = TimeSeriesKMeans(n_clusters=components, max_iter=5,metric='euclidean',random_state=0).fit(df_array)
cluster_centre=km.cluster_centers_.shape
#time_series_class=km.predict(df_array_std)
time_series_class=km.predict(df_array)
labels = km.labels_
count_labels=list(Counter(labels).values())
inertia=km.inertia_


##plot the % of the clusters
labels_for_plot=list(Counter(labels).keys())
fig1, ax1 = plt.subplots()
ax1.pie(count_labels,labels=labels_for_plot, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title("% of points distribution per clusters"); props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
plt.text(0.75,-1,'no_of_samples='+str(len(labels)),verticalalignment='top',bbox=props)
plt.show()
plt.savefig('Clusters_%_distribution.png')
# plt.show() 


# In[6]:


# from scipy.spatial.distance import cdist 
from tslearn.clustering import TimeSeriesKMeans
km = TimeSeriesKMeans(n_clusters=3, metric="dtw",max_iter = 900,tol = 1e-08,random_state=3)
km.fit(X_train)


# In[7]:


predictions = km.predict(X_train)
for c in range(3):
    c_0 = np.argwhere(predictions==c)
    print(c_0.shape[0],end=' ')
c_assign = np.zeros(32)
for k in range(3):
    c_0 = np.argwhere(predictions==k)
    c_assign[c_0] = k
#     print(k,c_0)
print(c_assign)


# In[8]:


import matplotlib.pyplot as plt 
Beispiel #12
0
def test_kmeans():
    n, sz, d = 15, 10, 3
    rng = np.random.RandomState(0)
    time_series = rng.randn(n, sz, d)

    km = TimeSeriesKMeans(n_clusters=3,
                          metric="euclidean",
                          max_iter=5,
                          verbose=False,
                          random_state=rng).fit(time_series)
    dists = cdist(time_series.reshape((n, -1)),
                  km.cluster_centers_.reshape((3, -1)))
    np.testing.assert_allclose(km.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km.labels_, km.predict(time_series))

    km_dba = TimeSeriesKMeans(n_clusters=3,
                              metric="dtw",
                              max_iter=5,
                              verbose=False,
                              random_state=rng).fit(time_series)
    dists = cdist_dtw(time_series, km_dba.cluster_centers_)
    np.testing.assert_allclose(km_dba.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km_dba.labels_, km_dba.predict(time_series))

    km_sdtw = TimeSeriesKMeans(n_clusters=3,
                               metric="softdtw",
                               max_iter=5,
                               verbose=False,
                               random_state=rng).fit(time_series)
    dists = cdist_soft_dtw(time_series, km_sdtw.cluster_centers_)
    np.testing.assert_allclose(km_sdtw.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km_sdtw.labels_, km_sdtw.predict(time_series))

    km_nofit = TimeSeriesKMeans(n_clusters=101,
                                verbose=False,
                                random_state=rng).fit(time_series)
    assert (km_nofit._X_fit is None)

    X_bis = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3],
                                    [2, 5, 6, 7, 8, 9]])
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="softdtw",
                     random_state=0).fit(X_bis)
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="dtw",
                     random_state=0,
                     init="random").fit(X_bis)
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="dtw",
                     random_state=0,
                     init="k-means++").fit(X_bis)
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="dtw",
                     init=X_bis[:2]).fit(X_bis)
Beispiel #13
0
km_dba4 = TimeSeriesKMeans(n_clusters=4, metric="dtw", max_iter=5, max_iter_barycenter=5,random_state=0).fit(X)
km_dba5 = TimeSeriesKMeans(n_clusters=5, metric="dtw", max_iter=5, max_iter_barycenter=5,random_state=0).fit(X)


# In[11]:


km_sdtw3 = TimeSeriesKMeans(n_clusters=3, metric="softdtw", max_iter=5,max_iter_barycenter=5,metric_params={"gamma": .5},random_state=0).fit(X)
km_sdtw4 = TimeSeriesKMeans(n_clusters=4, metric="softdtw", max_iter=5,max_iter_barycenter=5,metric_params={"gamma": .5},random_state=0).fit(X)
km_sdtw5 = TimeSeriesKMeans(n_clusters=5, metric="softdtw", max_iter=5,max_iter_barycenter=5,metric_params={"gamma": .5},random_state=0).fit(X)


# In[12]:


km5_p = km5.predict(X)
km3_p = km3.predict(X)

km_dba3_p = km_dba3.predict(X)
km_dba4_p = km_dba4.predict(X)
km_dba5_p = km_dba5.predict(X)

km_sdtw3_p = km_sdtw3.predict(X)
km_sdtw4_p = km_sdtw4.predict(X)
km_sdtw5_p = km_sdtw5.predict(X)


# In[15]:


l0 = X[np.where(km5_p == 0)]
Beispiel #14
0
test_result = test_windows['result']

if distanceMatrix == 'eucl':
    model = TimeSeriesKMeans(n_clusters=kVal,
                             n_init=10).fit(train_data_without_attacks.values)
elif distanceMatrix == 'dtw':
    model = TimeSeriesKMeans(n_clusters=kVal, metric='dtw',
                             n_init=10).fit(train_data_without_attacks.values)

df = pd.DataFrame()
df['result'] = test_result
df['sr_value'] = -1
df['prediction'] = -1

for i in range(len(test_data)):
    pred = model.predict([test_data.loc[i].values])[0]
    closest_centroid = list(itertools.chain(*model.cluster_centers_[pred]))

    residual = test_data.loc[i].values - closest_centroid
    trans = fft(residual)
    magnitudes = np.sqrt(trans.real**2 + trans.imag**2)
    eps_index = np.where(magnitudes <= EPS)[0]
    magnitudes[eps_index] = EPS

    mag_log = np.log(magnitudes)
    mag_log[eps_index] = 0

    spectral = np.exp(mag_log - average_filter(mag_log, n=48))

    trans.real = trans.real * spectral / magnitudes
    trans.imag = trans.imag * spectral / magnitudes
Beispiel #15
0
        test.append(data[0:1000].to_numpy().reshape(-1, 1))
        test.append(data[1024:2048].to_numpy().reshape(-1, 1))
        test.append(data[2048:3072].to_numpy().reshape(-1, 1))
        test.append(data[3072:4096].to_numpy().reshape(-1, 1))
#DWTed_test = random.sample(DWTed_test, len(DWTed_test))
#test = random.sample(test, len(test))

"""EEG signals classification using the K-means clustering and a multilayer
perceptron neural network model (Umut Orhan 2011)
"""

#K-means clustering:
model = TimeSeriesKMeans(n_clusters=2, metric="softdtw", max_iter = 5)
model.fit(np.array(train))

pred = model.predict(np.array(test))
pred

a = np.zeros((320,), dtype=int)
b = np.ones((80,), dtype=int)
true = np.concatenate([a, b])

confusion_matrix(true, pred)

centers = model.cluster_centers_
centers = np.array([centers[0].flatten(), centers[1].flatten()])
centers

plt.plot(centers[0], color = 'red')
for dataset in [Z, O]:
    for i in range(1):
Beispiel #16
0
#print(my_array[500,3])
print(centroids)
print(len(labels))'''

no_clust = 10
t_series = to_time_series(my_array)
kmeans = TimeSeriesKMeans(n_clusters=no_clust,
                          metric="euclidean",
                          max_iter=8,
                          random_state=0)
kmeans.fit(t_series)
print("The cluster centers are:", kmeans.cluster_centers_)
print("Each time series belongs to:", kmeans.labels_)
labels = kmeans.labels_

y_kmeans = kmeans.predict(t_series)
plt.scatter(t_series[:, 0, 1], [2 for _ in range(length)],
            c=y_kmeans,
            s=30,
            cmap='viridis')
plt.scatter(t_series[:, 182, 1], [1.5 for _ in range(length)],
            c=y_kmeans,
            s=30,
            cmap='viridis')
plt.scatter(t_series[:, 364, 1], [1 for _ in range(length)],
            c=y_kmeans,
            s=30,
            cmap='viridis')
plt.show()

plt.scatter([i for i in range(3 * 365)], t_series[0, :, 3],