import pandas as pd def DTWDistance(s1, s2, w: float = np.inf): DTW = {} w = max(w, abs(len(s1) - len(s2))) for i in range(-1, len(s1)): for j in range(-1, len(s2)): DTW[(i, j)] = float("inf") DTW[(-1, -1)] = 0 for i in range(len(s1)): for j in range(max(0, i - w), min(len(s2), i + w)): dist = (s1[i] - s2[j]) ** 2 DTW[(i, j)] = dist + min( DTW[(i - 1, j)], DTW[(i, j - 1)], DTW[(i - 1, j - 1)] ) return sqrt(DTW[len(s1) - 1, len(s2) - 1]) if __name__ == "__main__": data = np.array(pd.read_csv("data/data1.csv").head(10))[(0, 1, 3, 5, 6, 8), 1:] print(data) print(data.shape) clust = Cluster(data, metric=DTWDistance) clust.print(2) clust.dendogram()
def basic_features_extract(data): return extract_features(data, column_id="id", column_sort="time") def extract_features_from_TS(Data, y): extracted_features = basic_features_extract(Data) impute(extracted_features) # features_filtered = select_features(extracted_features, y) features_filtered_direct = extract_relevant_features( Data, y, column_id="id", column_sort="time" ) return extracted_features, features_filtered_direct if __name__ == "__main__": n_series = 10 n_clust = 4 features = np.concatenate( [np.loadtxt(f"data/f{i}.csv") for i in range(1, 4)], axis=0 )[:n_series] features = features[(0, 1, 3, 5, 6, 8), :] print(f"Data recive : {features.shape}") clust = Cluster(features) print("Cluster initialized :)") lengths = list(map(len, clust.get(n_series))) plt.plot(list(range(len(lengths))), lengths) plt.show() clust.print(n_clust) clust.dendogram()