コード例 #1
0
 def _update_centroids(self, X):
     if self.metric_params is None:
         metric_params = {}
     else:
         metric_params = self.metric_params.copy()
     if "gamma_sdtw" in metric_params.keys():
         metric_params["gamma"] = metric_params["gamma_sdtw"]
         del metric_params["gamma_sdtw"]
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 metric_params=metric_params,
                 verbose=False)
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = softdtw_barycenter(
                 X=X[self.labels_ == k],
                 max_iter=self.max_iter_barycenter,
                 init=self.cluster_centers_[k],
                 **metric_params)
         else:
             self.cluster_centers_[k] = euclidean_barycenter(
                 X=X[self.labels_ == k])
コード例 #2
0
def create_model_number_of_people(files_path):
    if not os.path.exists(endfolder + 'number_of_people.pickle'):
        result = dict()  #{aeroporto: [baricentro, stdevs], ...}

        files = glob.glob(files_path + "*.csv")
        for file in files:
            station = os.path.splitext(os.path.basename(file))[0]

            series = pd.read_csv(file,
                                 header=0,
                                 parse_dates=[0],
                                 index_col=0,
                                 squeeze=True)
            df_series = pd.DataFrame(series).sort_index().asfreq('15T',
                                                                 fill_value=0)

            start_date = datetime.datetime(2018, 10, 1)
            end_date = datetime.datetime(2018, 11, 1)

            final_ts = []

            while start_date <= end_date:
                values_by_day = df_series[str(start_date)[:10]]
                arr = values_by_day.to_numpy()
                final_ts.append(arr)
                start_date = start_date + datetime.timedelta(days=1)

            barycenter = softdtw_barycenter(final_ts, max_iter=50,
                                            tol=1e-3)  #array of 96 arrays.

            #Compute the stdev for each hour
            start_time = datetime.time(0, 0, 0)
            end_time = datetime.time(23, 45, 0)

            std_devs = []

            while start_time < end_time:
                std = df_series.between_time(
                    str(start_time),
                    str(start_time))['count'].to_numpy().std()
                std_devs.append(std)
                start_time = (datetime.datetime.combine(
                    datetime.date(1, 1, 1), start_time) +
                              datetime.timedelta(minutes=15)).time()
            std = df_series.between_time(
                str(end_time), str(end_time))['count'].to_numpy().std()
            std_devs.append(std)

            result[station] = [
                list(chain.from_iterable(barycenter.tolist())), std_devs
            ]

        with open(endfolder + 'number_of_people.pickle', 'wb') as handle:
            pickle.dump(result, handle, protocol=pickle.HIGHEST_PROTOCOL)

        return result
    else:
        with open(endfolder + 'number_of_people.pickle', 'rb') as handle:
            result = pickle.load(handle)
        return result
コード例 #3
0
def plotAllRepetitions(preprocessed_data,gamma):
    plt.figure()
    [plt.plot(i,linewidth=0.5,color='grey') for i in preprocessed_data]
    plt.plot(np.mean(preprocessed_data,axis=0),color='r',label='Arithmetic barycentre')
    plt.plot(softdtw_barycenter(preprocessed_data,gamma=gamma),color='b',label='Soft-DTW barycentre')
    plt.ylabel(r'')
    plt.xlabel(r'')
    plt.legend()
    plt.xlim([0,preprocessed_data.shape[1]])
コード例 #4
0
def CalcCentroid(StackedDF,ClusterResults,gamma=1):
    """
    Calculates Centroid  
    :param StackedDF: np.array containing cycles stacked row wise
    :param ClusterResults:  dataframe with index Clusters which has cluster number. indexing from 1
    :return: Centroids 2D array
    """

    print("Computing Centroids")
    #Compute centroids with Soft-DTW
    data=np.array(StackedDF)
    
    Centroids = []
    for clust in range(len(ClusterResults['Clusters'].unique())):
        Centroids.append(softdtw_barycenter(data[ClusterResults['Clusters'] == clust+1],gamma=gamma, max_iter=5))
    return Centroids  
コード例 #5
0
 def _update_centroids(self, X):
     metric_params = self._get_metric_params()
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 metric_params=metric_params,
                 verbose=False)
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = softdtw_barycenter(
                 X=X[self.labels_ == k],
                 max_iter=self.max_iter_barycenter,
                 init=self.cluster_centers_[k],
                 **metric_params)
         else:
             self.cluster_centers_[k] = euclidean_barycenter(
                 X=X[self.labels_ == k])
コード例 #6
0
ファイル: utils.py プロジェクト: hadifawaz1999/Soft_dtw_draw
def barrycenters(xtrain, ytrain, labels, num_in_each_label):
    split = []
    barrycenterss = []
    barrycenters_all = []
    for j in range(labels.size):
        split.clear()
        barrycenterss.clear()
        for i in range(xtrain.shape[0]):
            if (ytrain[i] == labels[j]):
                split.append(xtrain[i])
        split_array = np.asarray(split, dtype=np.float64)
        # split_array.shape = (num_in_each_label[j], xtrain.shape[1])
        barrycenter = softdtw_barycenter(split_array,
                                         gamma=1.0,
                                         weights=None,
                                         method='L-BFGS-B',
                                         tol=0.001,
                                         max_iter=50,
                                         init=None)
        barrycenterss.append(barrycenter)
        barrycenters_all += barrycenterss
    barrycenters_all_array = np.asarray(barrycenters_all, dtype=np.float64)
    barrycenters_all_array.shape = (labels.size, xtrain.shape[1])
    return barrycenters_all_array
コード例 #7
0
import numpy
import matplotlib.pyplot as plt

from tslearn.barycenters import euclidean_barycenter, dtw_barycenter_averaging, softdtw_barycenter
from tslearn.datasets import CachedDatasets

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X = X_train[y_train == 2]

plt.figure()
plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  #用来正常显示负号
plt.subplot(3, 1, 1)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(euclidean_barycenter(X).ravel(), "r-", linewidth=2)
plt.title("算数平均序列求解")

plt.subplot(3, 1, 2)
sdtw_bar = softdtw_barycenter(X, gamma=1., max_iter=100)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(sdtw_bar.ravel(), "r-", linewidth=2)
plt.title("DBA平均序列求解")

plt.tight_layout()
plt.show()
コード例 #8
0
 def multi_clustering(self, numbers, model, seq_train, sax_flag):
     """
     首先确定最优的聚类个数,然后多次聚类,取DBI最小的数据进行保存
     numbers:单次聚类循环的次数
     :return:
     """
     out = {}  # 分类好的时间序列
     out_cent = []  # 分类好的质心序列
     min_inertia = float('inf')
     raw_data = pd.read_csv('start_point_' + str(self.start_point) +
                            '\\train.csv')
     df_data = pd.DataFrame(raw_data)
     sil_score = []
     print(len(raw_data))
     # 随机取聚类数据
     df_data = df_data.sample(frac=0.6,
                              replace=True,
                              random_state=0,
                              axis=0)
     data_raw = df_data.iloc[:, 1:].values
     data_new = df_data.iloc[:, 1:self.seq_train + 1].values
     if sax_flag:
         data_new = self.sax(data_new)
     # 确定最优的聚类个数
     # # example_data = data_new.sample(frac=0.3)
     # example_data = data_new
     # # min = 100000
     # for i in range(2, 9):
     #     print('第%d个:' % i)
     #     kmeans = KMeans(n_clusters=i).fit(example_data)
     #     lab = kmeans.labels_
     #     cent = kmeans.cluster_centers_
     #     # 初始化一个字典作为DBI的输入
     #     d = {}
     #     for j in range(i):
     #         d[j] = []
     #     for j in range(len(example_data)):
     #         d[lab[j]].append(example_data[j])
     #     # x是聚好类的时间序列的输入矩阵,clusters是簇质心,nc是簇类的数量
     #     dbi = DBI.compute_DB_index(d, cent, i)
     #     sil_score.append(dbi)
     #     # if dbi < min:
     #     #     min = dbi
     #     #     self.labels = lab
     #     #     self.centers = cent
     #
     # file = open('....\DBI_elbow_Google.csv', 'a', newline='')
     # content = csv.writer(file)
     # content.writerow(sil_score)
     # file.close()
     # print(sil_score)
     # print(np.argmax(sil_score))
     # self.Centroid = int(np.argmax(sil_score))+1
     # 以最优聚类个数为参数,进行多次聚类,记录inertia最小的聚类结果
     for i in range(numbers):
         # 记录聚类花费的时间
         start = time.time()
         inertia, output, centers = self.single_clustering(
             data_raw, data_new, self.Centroid, model)
         end = time.time()
         print('聚类耗时:%.10f' % (end - start))
         print(inertia)
         if inertia < min_inertia:
             min_inertia = inertia
             out = output.copy()
             print("Here:" + str(len(out)))
             out_cent = centers.copy()
     for i in range(len(out)):
         data = pd.DataFrame(out[i])
         data.to_csv('start_point_' + str(self.start_point) + '\\class' +
                     str(i) + '.csv')
     if model == 'DTW' or model == 'K-Shape':
         # out_cent = self.sax(out_cent)
         out_cent = pd.DataFrame(
             np.reshape(out_cent, (self.Centroid, seq_train)))
     elif model == 'K-Means':
         # out_cent = self.sax(out_cent)
         out_cent = pd.DataFrame(out_cent)
     elif model == 'Kernel-KMeans':
         for i in range(len(out)):
             out_cent.append(
                 softdtw_barycenter(out[i], max_iter=5, tol=1e-3))
         # print(out_cent)
         out_cent = pd.DataFrame(
             np.reshape(out_cent, (self.Centroid, seq_length)))
         # print(out_cent)
     out_cent.to_csv('start_point_' + str(self.start_point) +
                     '\\centers.csv')
コード例 #9
0
    for series in X:
        plt.plot(series.ravel(), "k-", alpha=.2)
    # plot the given barycenter of them
    plt.plot(barycenter.ravel(), "r-", linewidth=2)


# plot the four variants with the same number of iterations and a tolerance of
# 1e-3 where applicable
ax1 = plt.subplot(4, 1, 1)
plt.title("Euclidean barycenter")
plot_helper(euclidean_barycenter(X))

plt.subplot(4, 1, 2, sharex=ax1)
plt.title("DBA (vectorized version of Petitjean's EM)")
plot_helper(dtw_barycenter_averaging(X, max_iter=50, tol=1e-3))

plt.subplot(4, 1, 3, sharex=ax1)
plt.title("DBA (subgradient descent approach)")
plot_helper(dtw_barycenter_averaging_subgradient(X, max_iter=50, tol=1e-3))

plt.subplot(4, 1, 4, sharex=ax1)
plt.title("Soft-DTW barycenter ($\gamma$=1.0)")
plot_helper(softdtw_barycenter(X, gamma=1., max_iter=50, tol=1e-3))

# clip the axes for better readability
ax1.set_xlim([0, length_of_sequence])

# show the plot(s)
plt.tight_layout()
plt.show()
コード例 #10
0
    plt.plot(X_out[i].ravel(),
             color=matplotlib.colors.rgb2hex(get_color(w)),
             linewidth=2)
    plt.text(X_out[i].shape[0],
             0.,
             "$X_%d$" % i,
             horizontalalignment="right",
             verticalalignment="baseline",
             fontsize=24)
    plt.xticks([])
    plt.yticks([])

for pos in range(2, 25):
    if pos in [1, 5, 21, 25]:
        continue
    plt.subplot(5, 5, pos)
    idxr, idxc = row_col(pos, 5)
    w = numpy.array([0.] * 4)
    w[0] = (4 - idxr) * (4 - idxc) / 16
    w[1] = (4 - idxr) * idxc / 16
    w[2] = idxr * (4 - idxc) / 16
    w[3] = idxr * idxc / 16
    plt.plot(softdtw_barycenter(X=X_out, weights=w).ravel(),
             color=matplotlib.colors.rgb2hex(get_color(w)),
             linewidth=2)
    plt.xticks([])
    plt.yticks([])

plt.tight_layout()
plt.show()
コード例 #11
0
def soft_dtw_avg(class_x):
    class_avg = [list() for i in range(len(class_x))]
    for c in range(len(class_x)):
        class_avg[c] = softdtw_barycenter(class_x[c], gamma=1., max_iter=100)
    class_avg = np.array(class_avg)
    return class_avg
コード例 #12
0
def softdtw_augment_train_set(x_train,
                              y_train,
                              classes,
                              num_synthetic_ts,
                              max_neighbors=5):
    from tslearn.neighbors import KNeighborsTimeSeries
    from tslearn.barycenters import softdtw_barycenter
    from tslearn.metrics import gamma_soft_dtw

    # synthetic train set and labels
    synthetic_x_train = []
    synthetic_y_train = []
    # loop through each class
    for c in classes:
        # get the MTS for this class
        c_x_train = x_train[np.where(y_train == 0)[0]]
        if len(c_x_train) == 1:
            # skip if there is only one time series per set
            continue
        # compute appropriate gamma for softdtw for the entire class
        class_gamma = gamma_soft_dtw(c_x_train)
        # loop through the number of synthtectic examples needed
        generated_samples = 0
        while generated_samples < num_synthetic_ts:
            # Choose a random representative for the class
            representative_indices = np.arange(len(c_x_train))
            random_representative_index = np.random.choice(
                representative_indices, size=1, replace=False)
            random_representative = c_x_train[random_representative_index]
            # Choose a random number of neighbors (between 1 and one minus the total number of class representatives)
            random_number_of_neighbors = int(
                np.random.uniform(1, max_neighbors, size=1))
            knn = KNeighborsTimeSeries(n_neighbors=random_number_of_neighbors +
                                       1,
                                       metric='softdtw',
                                       metric_params={
                                           'gamma': class_gamma
                                       }).fit(c_x_train)
            random_neighbor_distances, random_neighbor_indices = knn.kneighbors(
                X=random_representative, return_distance=True)
            random_neighbor_indices = random_neighbor_indices[0]
            random_neighbor_distances = random_neighbor_distances[0]
            nearest_neighbor_distance = np.sort(random_neighbor_distances)[1]
            # random_neighbors = np.zeros((random_number_of_neighbors+1, c_x_train.shape[1]), dtype=float)
            random_neighbors = np.zeros(
                (random_number_of_neighbors + 1, c_x_train.shape[1],
                 c_x_train.shape[2]),
                dtype=float)

            for j, neighbor_index in enumerate(random_neighbor_indices):
                random_neighbors[j, :] = c_x_train[neighbor_index]
            # Choose a random weight vector (and then normalize it)
            weights = np.exp(
                np.log(0.5) * random_neighbor_distances /
                nearest_neighbor_distance)
            weights /= np.sum(weights)
            # Compute tslearn.barycenters.softdtw_barycenter with weights=random weights and gamma value specific to neighbors
            random_neighbors_gamma = gamma_soft_dtw(random_neighbors)
            generated_sample = softdtw_barycenter(random_neighbors,
                                                  weights=weights,
                                                  gamma=random_neighbors_gamma)
            synthetic_x_train.append(generated_sample)
            synthetic_y_train.append(c)
            # Repeat until you have the desired number of synthetic samples for each class
            generated_samples += 1
    # return the synthetic set
    return np.array(synthetic_x_train), np.array(synthetic_y_train)