def merge_crossover(ind1, ind2): """Merge shapelets from one set with shapelets from the other""" # Construct a pairwise similarity matrix using GAK _all = list(ind1) + list(ind2) similarity_matrix = cdist_gak(ind1, ind2, sigma=sigma_gak(_all)) # Iterate over shapelets in `ind1` and merge them with shapelets # from `ind2` for row_idx in range(similarity_matrix.shape[0]): # Remove all elements equal to 1.0 mask = similarity_matrix[row_idx, :] != 1.0 non_equals = similarity_matrix[row_idx, :][mask] if len(non_equals): # Get the timeseries most similar to the one at row_idx max_col_idx = np.argmax(non_equals) ts1 = list(ind1[row_idx]).copy() ts2 = list(ind2[max_col_idx]).copy() # Merge them and remove nans ind1[row_idx] = euclidean_barycenter([ts1, ts2]) ind1[row_idx] = ind1[row_idx][~np.isnan(ind1[row_idx])] # Apply the same for the elements in ind2 for col_idx in range(similarity_matrix.shape[1]): mask = similarity_matrix[:, col_idx] != 1.0 non_equals = similarity_matrix[:, col_idx][mask] if len(non_equals): max_row_idx = np.argmax(non_equals) ts1 = list(ind1[max_row_idx]).copy() ts2 = list(ind2[col_idx]).copy() ind2[col_idx] = euclidean_barycenter([ts1, ts2]) ind2[col_idx] = ind2[col_idx][~np.isnan(ind2[col_idx])] return ind1, ind2
def random_merge_crossover(ind1, ind2, p=0.25): """Merge shapelets from one set with shapelets from the other""" # Construct a pairwise similarity matrix using GAK new_ind1, new_ind2 = [], [] np.random.shuffle(ind1) np.random.shuffle(ind2) for shap1, shap2 in zip(ind1, ind2): if len(shap1) > 4 and len(shap2) > 4 and np.random.random() < p: max_size = min(len(shap1), len(shap2)) merge_len = np.random.randint(1, max_size) shap1_start = np.random.randint(len(shap1) - merge_len) shap2_start = np.random.randint(len(shap2) - merge_len) shap1 = np.concatenate( (shap1[:shap1_start].flatten(), euclidean_barycenter([ shap1[shap1_start:shap1_start + merge_len], shap2[shap2_start:shap2_start + merge_len] ]).flatten(), shap1[shap1_start + merge_len:].flatten())) shap2 = np.concatenate( (shap2[:shap2_start].flatten(), euclidean_barycenter([ shap1[shap1_start:shap1_start + merge_len], shap2[shap2_start:shap2_start + merge_len] ]).flatten(), shap2[shap2_start + merge_len:].flatten())) new_ind1.append(shap1) new_ind2.append(shap2) return new_ind1, new_ind2
def merge(ts1, ts2): if len(ts1) > len(ts2): start = np.random.randint(len(ts1) - len(ts2)) centroid = euclidean_barycenter([ts1[start:start + len(ts2):], ts2]).flatten() elif len(ts2) > len(ts1): start = np.random.randint(len(ts2) - len(ts1)) centroid = euclidean_barycenter([ts2[start:start + len(ts1):], ts1]).flatten() else: start = 0 centroid = euclidean_barycenter([ts1, ts2]).flatten() return centroid, start
def caculate_mid_centers(class_num, iter_num, input_len): for i in range(class_num): first_filename = "....\src\LSTM\start_point_0\class" + str( i) + "_train.csv" first_train_data = load_data(first_filename) # 迭代的数据合并求簇心 # concat_data = [] # for j in range(iter_num): # inter_filename = "....\src\LSTM\\result\\number"+str(j)+"_class"+str(i)+"_train.csv" # inter_data = load_data(inter_filename) # if j == 0: # concat_data = np.vstack((first_train_data, inter_data)) # # concat_data = inter_data # else: # concat_data = np.vstack((concat_data, inter_data)) # # centers = softdtw_barycenter(concat_data, gamma=1.0, max_iter=5) # centers = euclidean_barycenter(concat_data) # centers = pd.DataFrame(np.reshape(centers, (1, len(centers)))) # centers.to_csv("D:\研究生\实验室\云环境下时间预测\代码\时间序列聚类\src\LSTM\\result\\number"+str(j)+"_class"+str(i)+"_centers.csv") # 每次迭代的数据单独求簇心 for j in range(iter_num): inter_filename = "....\src\LSTM\\result\\number" + str( j) + "_class" + str(i) + "_train.csv" inter_data = load_data(inter_filename)[:, :input_len] print(inter_data.shape) # centers = softdtw_barycenter(inter_data, gamma=1.0, max_iter=5) centers = euclidean_barycenter(inter_data) centers = pd.DataFrame(np.reshape(centers, (1, len(centers)))) centers.to_csv("....\src\LSTM\\result\\number" + str(j) + "_class" + str(i) + "_centers.csv")
def _update_centroids(self, X): if self.metric_params is None: metric_params = {} else: metric_params = self.metric_params.copy() if "gamma_sdtw" in metric_params.keys(): metric_params["gamma"] = metric_params["gamma_sdtw"] del metric_params["gamma_sdtw"] for k in range(self.n_clusters): if self.metric == "dtw": self.cluster_centers_[k] = dtw_barycenter_averaging( X=X[self.labels_ == k], barycenter_size=None, init_barycenter=self.cluster_centers_[k], metric_params=metric_params, verbose=False) elif self.metric == "softdtw": self.cluster_centers_[k] = softdtw_barycenter( X=X[self.labels_ == k], max_iter=self.max_iter_barycenter, init=self.cluster_centers_[k], **metric_params) else: self.cluster_centers_[k] = euclidean_barycenter( X=X[self.labels_ == k])
def calculate_test_mse_center(class_num): root_path = '....\src\LSTM\\test_result' for i in range(class_num): filename = 'mse_test_' + str(i) + '.csv' data = pd.read_csv(os.path.join(root_path, filename)) data = pd.DataFrame(data).values[:, 1:] # center = softdtw_barycenter(data, gamma=1.0, max_iter=5) center = euclidean_barycenter(data) plt.plot(center) plt.title('Cluster' + str(i)) plt.show()
def _update_centroids(self, X): metric_params = self._get_metric_params() for k in range(self.n_clusters): if self.metric == "dtw": self.cluster_centers_[k] = dtw_barycenter_averaging( X=X[self.labels_ == k], barycenter_size=None, init_barycenter=self.cluster_centers_[k], metric_params=metric_params, verbose=False) elif self.metric == "softdtw": self.cluster_centers_[k] = softdtw_barycenter( X=X[self.labels_ == k], max_iter=self.max_iter_barycenter, init=self.cluster_centers_[k], **metric_params) else: self.cluster_centers_[k] = euclidean_barycenter( X=X[self.labels_ == k])
import numpy import matplotlib.pyplot as plt from tslearn.barycenters import euclidean_barycenter, dtw_barycenter_averaging, softdtw_barycenter from tslearn.datasets import CachedDatasets numpy.random.seed(0) X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace") X = X_train[y_train == 2] plt.figure() plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号 plt.subplot(3, 1, 1) for ts in X: plt.plot(ts.ravel(), "k-", alpha=.2) plt.plot(euclidean_barycenter(X).ravel(), "r-", linewidth=2) plt.title("算数平均序列求解") plt.subplot(3, 1, 2) sdtw_bar = softdtw_barycenter(X, gamma=1., max_iter=100) for ts in X: plt.plot(ts.ravel(), "k-", alpha=.2) plt.plot(sdtw_bar.ravel(), "r-", linewidth=2) plt.title("DBA平均序列求解") plt.tight_layout() plt.show()
length_of_sequence = X.shape[1] def plot_helper(barycenter): # plot all points of the data set for series in X: plt.plot(series.ravel(), "k-", alpha=.2) # plot the given barycenter of them plt.plot(barycenter.ravel(), "r-", linewidth=2) # plot the four variants with the same number of iterations and a tolerance of # 1e-3 where applicable ax1 = plt.subplot(4, 1, 1) plt.title("Euclidean barycenter") plot_helper(euclidean_barycenter(X)) plt.subplot(4, 1, 2, sharex=ax1) plt.title("DBA (vectorized version of Petitjean's EM)") plot_helper(dtw_barycenter_averaging(X, max_iter=50, tol=1e-3)) plt.subplot(4, 1, 3, sharex=ax1) plt.title("DBA (subgradient descent approach)") plot_helper(dtw_barycenter_averaging_subgradient(X, max_iter=50, tol=1e-3)) plt.subplot(4, 1, 4, sharex=ax1) plt.title("Soft-DTW barycenter ($\gamma$=1.0)") plot_helper(softdtw_barycenter(X, gamma=1., max_iter=50, tol=1e-3)) # clip the axes for better readability ax1.set_xlim([0, length_of_sequence])