def test_distance_matrix1_b(): with util_numpy.test_uses_numpy() as np: s = [[0, 0, 1, 2, 1, 0, 1, 0, 0], [0, 1, 2, 0, 0, 0, 0, 0, 0]] s = [np.array(si) for si in s] m2 = dtw.distance_matrix(s, parallel=True, use_c=False) assert m2[0, 1] == pytest.approx(math.sqrt(2))
def classify(timeseries, hiddens, labels, test_size=0.6, knn=3): idxs = np.arange(len(hiddens)).reshape(-1, 1) scores_hidden = [] scores_ts = [] t = time() matrix_hidden = dtw_ndim.distance_matrix(hiddens) print("hidden_ts: {:.3f}".format(time() - t)) t = time() matrix_ts = dtw.distance_matrix(timeseries, use_c=True) print("raw_ts: {:.3f}".format(time() - t)) for i in range(100): X_train, X_test, y_train, y_test = train_test_split(idxs, labels.cpu().numpy(), test_size=test_size) clf = KNeighborsClassifier(metric=get_metric(matrix_ts), algorithm="brute", n_neighbors=knn) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) scores_ts.append(score) clf = KNeighborsClassifier(metric=get_metric(matrix_hidden), algorithm="brute", n_neighbors=knn) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) scores_hidden.append(score) print("Raw ts score: {:.3f} +- {:.3f}".format(np.mean(scores_ts), np.std(scores_ts))) print("Hidden ts score: {:.3f} +- {:.3f}".format(np.mean(scores_hidden), np.std(scores_hidden))) return scores_ts, scores_hidden
def test_bug3(): with util_numpy.test_uses_numpy() as np: series = np.array([ np.array([1, 2, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1, 3, 4, 5]), np.array([0., 0, 1, 2, 1, 0, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1]) ]) ds = dtw.distance_matrix(series) print(ds) model = clustering.LinkageTree(dtw.distance_matrix, {}) cluster_idx = model.fit(series) print(cluster_idx) if directory: fn = directory / "bug3.png" else: file = tempfile.NamedTemporaryFile() fn = Path(file.name + "_bug3.png") if not dtwvis.test_without_visualization(): model.plot(fn, show_ts_label=True)
def test_numpymatrix_transpose(): """Passing a matrix instead of a list failed because the array is now a view instead of the original data structure.""" s = np.array([ [0., 0., 1.,], [0, 1, 2], [1, 2, 0], [2, 0, 0], [1, 0, 0], [0, 0, 0], [1, 0, 0], [0, 0, 1], [0, 0, 0] ]).T m = dtw_c.distance_matrix_nogil(s) m = dtw.distances_array_to_matrix(m, len(s)) m2 = dtw.distance_matrix(s) correct = np.array([ [np.inf, 1.41421356, 1.73205081], [np.inf, np.inf, 1.41421356], [np.inf, np.inf, np.inf]]) assert m[0, 1] == pytest.approx(math.sqrt(2)) assert m2[0, 1] == pytest.approx(math.sqrt(2)) np.testing.assert_almost_equal(correct, m, decimal=4) np.testing.assert_almost_equal(correct, m2, decimal=4)
def _find_shape_templates(self, patterns): """ Find the shape templates for the given patterns. :returns shape_templates : array of arrays Found shape templates. """ # normalize the patterns norm_patterns = [] for i, p in enumerate(patterns): # normalize v m, s = np.mean(p), np.std(p) if s == 0.0: norm_patterns.append(p) else: norm_patterns.append((p - m) / s) norm_patterns = np.array(norm_patterns) # calculate shape templates (depending on number of clusters) if len(patterns) > self.n_clusters: # DTW distance matrix dists = dtw.distance_matrix(norm_patterns, use_nogil=True, window=int(self.warping_width * self.w_size)) dists[dists == np.inf] = 0 dists = dists + dists.T - np.diag(np.diag(dists)) affinities = np.exp(-dists * self.alpha) # spectral clustering spec = SpectralClustering(n_clusters=self.n_clusters, affinity='precomputed') spec.fit(affinities) split_labels = spec.labels_.astype(np.int) # find mediods centers = [] for l in np.unique(split_labels): ix = np.where(split_labels == l)[0] if len(ix) == 1: # there is only one pattern in the cluster centers.append(norm_patterns[ix[0]]) elif len(ix) == 2: # there are 2 patterns in the cluster: select randomly centers.append(norm_patterns[ix[0]]) else: # more than 2 patterns in the cluster c = ix[np.argmin(np.sum(dists[ix, :], axis=1))] # select mediod centers.append(norm_patterns[c]) shape_templates = np.array(centers) else: shape_templates = norm_patterns return shape_templates
def test_bug1_psi(): s = [ np.array([0., 0, 1, 2, 1, 0, 1, 0, 0]), np.array([9., 0, 1, 2, 1, 0, 1, 0, 9]) ] res1 = dtw.distance_matrix(s, compact=True, psi=1) res2 = dtw.distance_matrix_fast(s, compact=True, psi=1) print(res1) print(res2) assert res1 == pytest.approx(res2)
def test_distance_matrix2_e(): n = 1 nn = 1 s = [[0., 0, 1, 2, 1, 0, 1, 0, 0] * n, [0., 1, 2, 0, 0, 0, 0, 0, 0] * n, [1., 2, 0, 0, 0, 0, 0, 1] * n] * nn s = [np.array(si) for si in s] m1 = dtw_c.distance_matrix_nogil(s, is_parallel=True) m1 = dtw.distances_array_to_matrix(m1, len(s)) m2 = dtw.distance_matrix(s, parallel=True, use_c=True, use_nogil=True) assert m1[0, 1] == math.sqrt(2) * n, "m1[0,1]={} != {}".format(m1[0, 1], math.sqrt(2) * n) assert m2[0, 1] == math.sqrt(2) * n, "m2[0,1]={} != {}".format(m2[0, 1], math.sqrt(2) * n)
def test_numpymatrix_compact(): """Passing a matrix instead of a list failed because the array is now a view instead of the original data structure.""" s = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 0]]) m = dtw_c.distance_matrix_nogil(s) m2 = dtw.distance_matrix(s, compact=True) correct = np.array([1.41421356, 1.73205081, 1.41421356]) assert m[0] == pytest.approx(math.sqrt(2)) assert m2[0] == pytest.approx(math.sqrt(2)) np.testing.assert_almost_equal(correct, m, decimal=4) np.testing.assert_almost_equal(correct, m2, decimal=4)
def exec_dtw(data_for_dtw): ''' input: dataframe 源数据 output: 输出计算好矩阵的dtw距离 ''' indicators = [i for i in data_for_dtw.columns if i not in 'date'] array_for_dtw = data_for_dtw[indicators].values array_for_dtw_zscore = stats.zscore(array_for_dtw) array_for_dtw_zscore_T = array_for_dtw_zscore.T # need transpose ds = dtw.distance_matrix(array_for_dtw_zscore_T) return pd.DataFrame(ds, index=indicators, columns=indicators)
def test_distance_matrix2_e(): with util_numpy.test_uses_numpy() as np: n = 1 nn = 1 s = [[0., 0, 1, 2, 1, 0, 1, 0, 0] * n, [0., 1, 2, 0, 0, 0, 0, 0, 0] * n, [1., 2, 0, 0, 0, 0, 0, 1] * n] * 3 s = [np.array(si) for si in s] m1 = dtw.distance_matrix_fast(s, parallel=True) m2 = dtw.distance_matrix(s, parallel=True, use_c=True) assert m1[0, 1] == math.sqrt(2) * n, "m1[0,1]={} != {}".format(m1[0, 1], math.sqrt(2) * n) assert m2[0, 1] == math.sqrt(2) * n, "m2[0,1]={} != {}".format(m2[0, 1], math.sqrt(2) * n)
def run_distance_matrix_block(parallel=False, use_c=False, compact=False): with util_numpy.test_uses_numpy() as np: s = [[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1]] s = np.array(s) m = dtw.distance_matrix(s, block=((1, 4), (3, 5)), parallel=parallel, use_c=use_c, compact=compact) if not compact: assert m[1, 3] == pytest.approx(math.sqrt(2)) assert np.isinf(m[1, 2])
def run_distance_matrix_block(parallel=False, use_c=False, use_nogil=False): # print(parallel, use_c, use_nogil) s = [[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1]] s = np.array(s) m = dtw.distance_matrix(s, block=((1, 4), (3, 5)), parallel=parallel, use_c=use_c, use_nogil=use_nogil) print(m) assert m[1, 3] == pytest.approx(math.sqrt(2)) assert np.isinf(m[1, 2])
def compute_dtw_distance_matrix(ts_list, **kwargs): """ This function computes the pairwise distance matrix of a list of time-series with Dynamic Time Warping distance. It is based on dtaidistance package :param ts_list: list of time-series to compare pairwise :param kwargs: extra arguments for the dtaidistance.dtw.distance_matrix() function :return: dist_matrix """ start = time.time() dist_matrix_vec = dtw.distance_matrix(ts_list, **kwargs) dist_matrix = np.triu(dist_matrix_vec) + np.triu(dist_matrix_vec).T np.fill_diagonal(dist_matrix, 0) print("Distance matrix computed in {} minutes".format( round((time.time() - start) / 60, 1))) return dist_matrix
def dynamic_time_warping(input_file): print("***********inside DTw****************") data = pd.read_csv(input_file) #removing ids for dtw processing id_list = data['Id'] del data['Id'] values = np.array(data) start_time = time.clock() #import pdb;pdb.set_trace() #distance_mat = dtw.distance_matrix(values,parallel=True, compact=True) distance_mat = dtw.distance_matrix(values) print ("Distance Calc Took ",time.clock() - start_time, "seconds") distance_mat[distance_mat == np.inf] = 0 distance_mat += distance_mat.T return id_list, distance_mat
def test_numpymatrix(): """Passing a matrix instead of a list failed because the array is now a view instead of the original data structure.""" with util_numpy.test_uses_numpy() as np: s = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 0]]) m = dtw.distance_matrix_fast(s, only_triu=True) m2 = dtw.distance_matrix(s, only_triu=True) correct = np.array([[np.inf, 1.41421356, 1.73205081], [np.inf, np.inf, 1.41421356], [np.inf, np.inf, np.inf]]) assert m[0, 1] == pytest.approx(math.sqrt(2)) assert m2[0, 1] == pytest.approx(math.sqrt(2)) np.testing.assert_almost_equal(correct, m, decimal=4) np.testing.assert_almost_equal(correct, m2, decimal=4)
def compute_dtw_dist_mat(ts_list, r=None): """ This function computes the pairwise distance matrix of a list of time-series with Dynamic Time Warping distance. It is based on dtaidistance package :param ts_list: list of time-series to compare pairwise :type ts_list: list of 1D array :param r: distance upper bound - if distance is higher than R, then computation stops and the distance is set as inf this parameter serves merely for speeding up computation :type r: float :return: distance matrix :rtype: 2D array """ dist_matrix_vec = dtw.distance_matrix(ts_list, parallel=True, max_dist=r) dist_matrix = np.triu(dist_matrix_vec) + np.triu(dist_matrix_vec).T np.fill_diagonal(dist_matrix, 0) return dist_matrix
def h_clust(): """ Import cryptocurrency series, r = beginning range buffer """ r = 20 series = [ AMPy[r:], ARDRy[r:], BCNy[r:], BCYy[r:], BELAy[r:], BLKy[r:], BTCDy[r:], BTMy[r:], BTSy[r:], BURSTy[r:], CLAMy[r:], DASHy[r:], DCRy[r:], DGBy[r:], DOGEy[r:], ETCy[r:], ETHy[r:], EXPy[r:], FLDCy[r:], FLOy[r:], GAMEy[r:], GRCy[r:], HUCy[r:], LBCy[r:], LSKy[r:], LTCy[r:], MAIDy[r:], NAVy[r:], NEOSy[r:], NMCy[r:], NXTy[r:], OMNIy[r:], PINKy[r:], POTy[r:], PPCy[r:], RADSy[r:], REPy[r:], RICy[r:], SBDy[r:], SCy[r:], STEEMy[r:], STRy[r:], SYSy[r:], VIAy[r:], VRCy[r:], VTCy[r:], XBCy[r:], XCPy[r:], XEMy[r:], XMRy[r:], XPMy[r:], XRPy[r:], XVCy[r:], ZECy[r:] ] series_labels = [ 'AMPy', 'ARDRy', 'BCNy', 'BCYy', 'BELAy', 'BLKy', 'BTCDy', 'BTMy', 'BTSy', 'BURSTy', 'CLAMy', 'DASHy', 'DCRy', 'DGBy', 'DOGEy', 'ETCy', 'ETHy', 'EXPy', 'FLDCy', 'FLOy', 'GAMEy', 'GRCy', 'HUCy', 'LBCy', 'LSKy', 'LTCy', 'MAIDy', 'NAVy', 'NEOSy', 'NMCy', 'NXTy', 'OMNIy', 'PINKy', 'POTy', 'PPCy', 'RADSy', 'REPy', 'RICy', 'SBDy', 'SCy', 'STEEMy', 'STRy', 'SYSy', 'VIAy', 'VRCy', 'VTCy', 'XBCy', 'XCPy', 'XEMy', 'XMRy', 'XPMy', 'XRPy', 'XVCy', 'ZECy' ] dists = dtw.distance_matrix(series) print("Distance matrix:\n{}".format(dists)) dists_cond = np.zeros(size_cond(len(series))) idx = 0 for r in range(len(series) - 1): dists_cond[idx:idx + len(series) - r - 1] = dists[r, r + 1:] idx += len(series) - r - 1 z = linkage(dists_cond, method='complete', metric='euclidean') fig, axes = plt.subplots(2, 1, figsize=(8, 3)) for idx, serie in enumerate(series): axes[0].plot(serie, label=str(series_labels[idx])) dendrogram(z, ax=axes[1]) plt.show()
def test_bug3(): series = np.array([ np.array([1, 2, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1, 3, 4, 5]), np.array([0., 0, 1, 2, 1, 0, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1]) ]) ds = dtw.distance_matrix(series) print(ds) model = clustering.LinkageTree(dtw.distance_matrix, {}) cluster_idx = model.fit(series) print(cluster_idx) if directory: fn = directory / "bug3.png" else: file = tempfile.NamedTemporaryFile() fn = Path(file.name + "_bug3.png") model.plot(fn, show_ts_label=True)
N_OBS = df.shape[1] - 1 # -------------------------------------------------------------------------------- NOTEBOOK-CELL: MARKDOWN # # Clustering # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # first, format the input data for clustering # let's compute the DTW distances for all the pairs among our FRFs val = np.matrix(df.to_numpy()[:, 1:], dtype=np.double) labels = df.to_numpy()[:, 0] # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # create a distance matrix # this can be fed to sklearn function # its lower triangle is empty ds = dtw.distance_matrix(val) ds # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # this 1-D distance matrix is used for Scipy ds_compact = dtw.distance_matrix(val, compact=True) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # convert a distance matrix into a full-distance matrix # full-distance matrix has its lower triangle a transposed from the upper i_lower = np.tril_indices(val.shape[0], -1) ds_full = ds ds_full[i_lower] = ds_full.T[i_lower] ds_full[ds_full == inf] = 0 ds_full
X = ret_var.values kmeans = KMeans(n_clusters=8).fit(X) centroids = kmeans.cluster_centers_ pl.scatter(X[:, 0], X[:, 1], c=kmeans.labels_, cmap="rainbow") pl.show() Company = pd.DataFrame(ret_var.index) cluster_labels = pd.DataFrame(kmeans.labels_) df = pd.concat([Company, cluster_labels], axis=1) # dtw illustration for hierarchical from dtaidistance import dtw # print(type(symbol_listH)) ds = dtw.distance_matrix(series_listH) print(type(ds)) dsC = np.minimum(ds, ds.transpose()) ## create the summetrix distance matrix np.fill_diagonal(dsC, 0) import scipy.spatial.distance as ssd # convert the redundant n*n square matrix form into a condensed nC2 array distArray = ssd.squareform(dsC) # symmetric matrix print(distArray) # data_matrix = [[0,0.8,0.9],[0.8,0,0.2],[0.9,0.2,0]] distList = dsC.tolist() # K-means with DTW
def test_distance_matrix1_c(): s = [[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0]] s = [np.array(si) for si in s] m3 = dtw.distance_matrix(s, parallel=False, use_c=True) assert m3[0, 1] == pytest.approx(math.sqrt(2))
if __name__ == '__main__': start_time = time.time() np.random.seed(0) '''get data from multiple csv files and merge them into one pandas dataframe''' path = r'data' data = dp.get_multiple_csvs(path) data.fillna(method='ffill') comp_symbols = data.Name.unique() print(data) '''convert DataFrame columns into numpy arrays''' adj_close_series = dp.col_as_ts(data, comp_symbols, col='Adj Close', normalization='minmax') dm = dtw.distance_matrix(adj_close_series) print(dm) print("--- DTW distance matrix %s seconds ---" % (time.time() - start_time)) sys.exit() print("--- nparray transform in %s seconds ---\n" % (time.time() - start_time)) linkage_types = ["single", "complete", "ward"] for lt in linkage_types: plt.figure() hclust = AgglomerativeClustering(n_clusters=8, affinity='euclidean', compute_full_tree=True,
linkage_matrix = np.column_stack([model.children_, model.distances_, counts]).astype(float) # Plot the corresponding dendrogram dendrogram( linkage_matrix, **kwargs, labels=main_df.columns # company symbols ) # hierarchical with DTW # dtw illustration for hierarchical from dtaidistance import dtw ds = dtw.distance_matrix(series_listH) #euclidean method dsC = np.minimum(ds, ds.transpose()) ## create the summetrix distance matrix np.fill_diagonal(dsC, 0) import scipy.spatial.distance as ssd # convert the redundant n*n square matrix form into a condensed nC2 array distArray = ssd.squareform(dsC) # symmetric matrix distList = dsC.tolist() model = AgglomerativeClustering(distance_threshold=0, affinity='precomputed', n_clusters=None, linkage='complete') model = model.fit(distList) plt.title('Hierarchical Clustering Dendrogram with DTW')
def d(): return dtw.distance_matrix(s, parallel=True, use_c=False, use_nogil=False)
def distance(c_series, ic, jc, subim, S, m, rmin, cmin, window=None, max_dist=None, max_step=None, max_diff=None, penalty=None, psi=None, pruning=False): """This function computes the spatial-temporal distance between \ two pixels using the DTW distance. :param c_series: average time series of cluster. :type c_series: numpy.ndarray :param ic: X coordinate of cluster center. :type ic: int :param jc: Y coordinate of cluster center. :type jc: int :param subim: Block of image from the cluster under analysis. :type subim: int :param S: Pattern spacing value. :type S: int :param m: Compactness value. :type m: float :param rmin: Minimum row. :type rmin: int :param cmin: Minimum column. :type cmin: int :param window: Only allow for maximal shifts from the two diagonals \ smaller than this number. It includes the diagonal, meaning that an \ Euclidean distance is obtained by setting window=1. :param max_dist: Stop if the returned values will be larger than \ this value. :param max_step: Do not allow steps larger than this value. :param max_diff: Return infinity if length of two series is larger. :param penalty: Penalty to add if compression or expansion is applied. :param psi: Psi relaxation parameter (ignore start and end of matching). Useful for cyclical series. :param use_pruning: Prune values based on Euclidean distance. :returns D: numpy.ndarray distance. """ from dtaidistance import dtw # Normalizing factor m = m / 10 # Initialize submatrix ds = numpy.zeros([subim.shape[1], subim.shape[2]]) # Tranpose matrix to allow dtw fast computation with dtaidistance linear = subim.transpose(1, 2, 0).reshape(subim.shape[1] * subim.shape[2], subim.shape[0]) merge = numpy.vstack((linear, c_series)).astype(numpy.double) c = dtw.distance_matrix(merge, block=((0, merge.shape[0]), (merge.shape[0] - 1, merge.shape[0])), compact=True, use_c=True, parallel=True, use_mp=True) c1 = numpy.array(c) dc = c1.reshape(subim.shape[1], subim.shape[2]) x = numpy.arange(subim.shape[1]) y = numpy.arange(subim.shape[2]) xx, yy = numpy.meshgrid(x, y, sparse=True, indexing='ij') # Calculate Spatial Distance ds = (((xx - ic)**2 + (yy - jc)**2)**0.5) # Calculate SPatial-temporal distance D = (dc) / m + (ds / S) return D
# Labeling each of the clusters x = np.array(nc1) y = np.array(nc2) c = np.array(nc3) d = np.array(nc4) # Plot each cluster on a scatter plot plt.figure(figsize=(15, 15)) plt.title("Clusters") plt.plot(x,'r', label='x') plt.plot(y, 'g', label='y') plt.plot(c,'b', label='c') plt.plot(d, 'y', label='d') plt.show() # Generate the data frame representing the messages appearing in each cluster # at a particular time. Each row represents a cluster with 'message ID' elements # Columns represent time values in chronological order print("-----Date matrix-----") dateMatrix = np.asarray(dtwInput) # Transpose to get clusters as rows instead of clusters print(dateMatrix.T) print() # Distance matrix using DTW algorithm. Transpose used because we want distance between clusters(rows) ds = dtw.distance_matrix(dateMatrix.T) print('-----Distance matrix from DTW-----') print(ds) print()
def d(): return dtw.distance_matrix(s, parallel=True, use_c=False, compact=True)
import numpy as np from dtaidistance import dtw from sklearn import metrics from cobras_ts.cobras_dtw import COBRAS_DTW from cobras_ts.labelquerier import LabelQuerier ucr_path = '/home/toon/Downloads/UCR_TS_Archive_2015' dataset = 'ECG200' budget = 100 alpha = 0.5 window = 10 data = np.loadtxt(os.path.join(ucr_path, dataset, dataset + '_TEST'), delimiter=',') series = data[:, 1:] labels = data[:, 0] dists = dtw.distance_matrix(series, window=int(0.01 * window * series.shape[1])) dists[dists == np.inf] = 0 dists = dists + dists.T - np.diag(np.diag(dists)) affinities = np.exp(-dists * alpha) clusterer = COBRAS_DTW(affinities, LabelQuerier(labels), budget) clustering, intermediate_clusterings, runtimes, ml, cl = clusterer.cluster() print( metrics.adjusted_rand_score(clustering.construct_cluster_labeling(), labels))