Esempio n. 1
0
        def merge_crossover(ind1, ind2):
            """Merge shapelets from one set with shapelets from the other"""
            # Construct a pairwise similarity matrix using GAK
            _all = list(ind1) + list(ind2)
            similarity_matrix = cdist_gak(ind1, ind2, sigma=sigma_gak(_all))

            # Iterate over shapelets in `ind1` and merge them with shapelets
            # from `ind2`
            for row_idx in range(similarity_matrix.shape[0]):
                # Remove all elements equal to 1.0
                mask = similarity_matrix[row_idx, :] != 1.0
                non_equals = similarity_matrix[row_idx, :][mask]
                if len(non_equals):
                    # Get the timeseries most similar to the one at row_idx
                    max_col_idx = np.argmax(non_equals)
                    ts1 = list(ind1[row_idx]).copy()
                    ts2 = list(ind2[max_col_idx]).copy()
                    # Merge them and remove nans
                    ind1[row_idx] = euclidean_barycenter([ts1, ts2])
                    ind1[row_idx] = ind1[row_idx][~np.isnan(ind1[row_idx])]

            # Apply the same for the elements in ind2
            for col_idx in range(similarity_matrix.shape[1]):
                mask = similarity_matrix[:, col_idx] != 1.0
                non_equals = similarity_matrix[:, col_idx][mask]
                if len(non_equals):
                    max_row_idx = np.argmax(non_equals)
                    ts1 = list(ind1[max_row_idx]).copy()
                    ts2 = list(ind2[col_idx]).copy()
                    ind2[col_idx] = euclidean_barycenter([ts1, ts2])
                    ind2[col_idx] = ind2[col_idx][~np.isnan(ind2[col_idx])]

            return ind1, ind2
Esempio n. 2
0
def random_merge_crossover(ind1, ind2, p=0.25):
    """Merge shapelets from one set with shapelets from the other"""
    # Construct a pairwise similarity matrix using GAK
    new_ind1, new_ind2 = [], []
    np.random.shuffle(ind1)
    np.random.shuffle(ind2)
    for shap1, shap2 in zip(ind1, ind2):
        if len(shap1) > 4 and len(shap2) > 4 and np.random.random() < p:
            max_size = min(len(shap1), len(shap2))
            merge_len = np.random.randint(1, max_size)
            shap1_start = np.random.randint(len(shap1) - merge_len)
            shap2_start = np.random.randint(len(shap2) - merge_len)

            shap1 = np.concatenate(
                (shap1[:shap1_start].flatten(),
                 euclidean_barycenter([
                     shap1[shap1_start:shap1_start + merge_len],
                     shap2[shap2_start:shap2_start + merge_len]
                 ]).flatten(), shap1[shap1_start + merge_len:].flatten()))

            shap2 = np.concatenate(
                (shap2[:shap2_start].flatten(),
                 euclidean_barycenter([
                     shap1[shap1_start:shap1_start + merge_len],
                     shap2[shap2_start:shap2_start + merge_len]
                 ]).flatten(), shap2[shap2_start + merge_len:].flatten()))

        new_ind1.append(shap1)
        new_ind2.append(shap2)

    return new_ind1, new_ind2
Esempio n. 3
0
def merge(ts1, ts2):
    if len(ts1) > len(ts2):
        start = np.random.randint(len(ts1) - len(ts2))
        centroid = euclidean_barycenter([ts1[start:start + len(ts2):],
                                         ts2]).flatten()
    elif len(ts2) > len(ts1):
        start = np.random.randint(len(ts2) - len(ts1))
        centroid = euclidean_barycenter([ts2[start:start + len(ts1):],
                                         ts1]).flatten()
    else:
        start = 0
        centroid = euclidean_barycenter([ts1, ts2]).flatten()

    return centroid, start
Esempio n. 4
0
def caculate_mid_centers(class_num, iter_num, input_len):
    for i in range(class_num):
        first_filename = "....\src\LSTM\start_point_0\class" + str(
            i) + "_train.csv"
        first_train_data = load_data(first_filename)
        # 迭代的数据合并求簇心
        # concat_data = []
        # for j in range(iter_num):
        #     inter_filename = "....\src\LSTM\\result\\number"+str(j)+"_class"+str(i)+"_train.csv"
        #     inter_data = load_data(inter_filename)
        #     if j == 0:
        #         concat_data = np.vstack((first_train_data, inter_data))
        #         # concat_data = inter_data
        #     else:
        #         concat_data = np.vstack((concat_data, inter_data))
        #     # centers = softdtw_barycenter(concat_data, gamma=1.0, max_iter=5)
        #     centers = euclidean_barycenter(concat_data)
        #     centers = pd.DataFrame(np.reshape(centers, (1, len(centers))))
        #     centers.to_csv("D:\研究生\实验室\云环境下时间预测\代码\时间序列聚类\src\LSTM\\result\\number"+str(j)+"_class"+str(i)+"_centers.csv")
        # 每次迭代的数据单独求簇心
        for j in range(iter_num):
            inter_filename = "....\src\LSTM\\result\\number" + str(
                j) + "_class" + str(i) + "_train.csv"
            inter_data = load_data(inter_filename)[:, :input_len]
            print(inter_data.shape)
            # centers = softdtw_barycenter(inter_data, gamma=1.0, max_iter=5)
            centers = euclidean_barycenter(inter_data)
            centers = pd.DataFrame(np.reshape(centers, (1, len(centers))))
            centers.to_csv("....\src\LSTM\\result\\number" + str(j) +
                           "_class" + str(i) + "_centers.csv")
Esempio n. 5
0
 def _update_centroids(self, X):
     if self.metric_params is None:
         metric_params = {}
     else:
         metric_params = self.metric_params.copy()
     if "gamma_sdtw" in metric_params.keys():
         metric_params["gamma"] = metric_params["gamma_sdtw"]
         del metric_params["gamma_sdtw"]
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 metric_params=metric_params,
                 verbose=False)
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = softdtw_barycenter(
                 X=X[self.labels_ == k],
                 max_iter=self.max_iter_barycenter,
                 init=self.cluster_centers_[k],
                 **metric_params)
         else:
             self.cluster_centers_[k] = euclidean_barycenter(
                 X=X[self.labels_ == k])
Esempio n. 6
0
def calculate_test_mse_center(class_num):
    root_path = '....\src\LSTM\\test_result'
    for i in range(class_num):
        filename = 'mse_test_' + str(i) + '.csv'
        data = pd.read_csv(os.path.join(root_path, filename))
        data = pd.DataFrame(data).values[:, 1:]
        # center = softdtw_barycenter(data, gamma=1.0, max_iter=5)
        center = euclidean_barycenter(data)
        plt.plot(center)
        plt.title('Cluster' + str(i))
        plt.show()
Esempio n. 7
0
 def _update_centroids(self, X):
     metric_params = self._get_metric_params()
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 metric_params=metric_params,
                 verbose=False)
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = softdtw_barycenter(
                 X=X[self.labels_ == k],
                 max_iter=self.max_iter_barycenter,
                 init=self.cluster_centers_[k],
                 **metric_params)
         else:
             self.cluster_centers_[k] = euclidean_barycenter(
                 X=X[self.labels_ == k])
Esempio n. 8
0
import numpy
import matplotlib.pyplot as plt

from tslearn.barycenters import euclidean_barycenter, dtw_barycenter_averaging, softdtw_barycenter
from tslearn.datasets import CachedDatasets

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X = X_train[y_train == 2]

plt.figure()
plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示负号
plt.subplot(3, 1, 1)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(euclidean_barycenter(X).ravel(), "r-", linewidth=2)
plt.title("算数平均序列求解")

plt.subplot(3, 1, 2)
sdtw_bar = softdtw_barycenter(X, gamma=1., max_iter=100)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(sdtw_bar.ravel(), "r-", linewidth=2)
plt.title("DBA平均序列求解")

plt.tight_layout()
plt.show()
Esempio n. 9
0
length_of_sequence = X.shape[1]


def plot_helper(barycenter):
    # plot all points of the data set
    for series in X:
        plt.plot(series.ravel(), "k-", alpha=.2)
    # plot the given barycenter of them
    plt.plot(barycenter.ravel(), "r-", linewidth=2)


# plot the four variants with the same number of iterations and a tolerance of
# 1e-3 where applicable
ax1 = plt.subplot(4, 1, 1)
plt.title("Euclidean barycenter")
plot_helper(euclidean_barycenter(X))

plt.subplot(4, 1, 2, sharex=ax1)
plt.title("DBA (vectorized version of Petitjean's EM)")
plot_helper(dtw_barycenter_averaging(X, max_iter=50, tol=1e-3))

plt.subplot(4, 1, 3, sharex=ax1)
plt.title("DBA (subgradient descent approach)")
plot_helper(dtw_barycenter_averaging_subgradient(X, max_iter=50, tol=1e-3))

plt.subplot(4, 1, 4, sharex=ax1)
plt.title("Soft-DTW barycenter ($\gamma$=1.0)")
plot_helper(softdtw_barycenter(X, gamma=1., max_iter=50, tol=1e-3))

# clip the axes for better readability
ax1.set_xlim([0, length_of_sequence])