def test_controlchart(): import matplotlib.pyplot as plt series = np.zeros((600, 60)) rsrc_fn = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'rsrc', 'synthetic_control.data') with open(rsrc_fn, 'r') as ifile: for idx, line in enumerate(ifile.readlines()): series[idx, :] = line.split() s = [] for idx in range(0, 600, 20): s.append(series[idx, :]) model = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model.fit(s) if directory: hierarchy_fn = os.path.join(directory, "hierarchy.png") else: file = tempfile.NamedTemporaryFile() hierarchy_fn = file.name + "_hierarchy.png" fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10)) show_ts_label = lambda idx: "ts-" + str(idx) # show_ts_label = list(range(len(s))) model.plot(hierarchy_fn, axes=ax, show_ts_label=show_ts_label, show_tr_label=True, ts_label_margin=-10, ts_left_margin=10, ts_sample_length=1) print("Figure saved to", hierarchy_fn)
def test_bug3(): with util_numpy.test_uses_numpy() as np: series = np.array([ np.array([1, 2, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1, 3, 4, 5]), np.array([0., 0, 1, 2, 1, 0, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1]) ]) ds = dtw.distance_matrix(series) print(ds) model = clustering.LinkageTree(dtw.distance_matrix, {}) cluster_idx = model.fit(series) print(cluster_idx) if directory: fn = directory / "bug3.png" else: file = tempfile.NamedTemporaryFile() fn = Path(file.name + "_bug3.png") if not dtwvis.test_without_visualization(): model.plot(fn, show_ts_label=True)
def test_linkage_tree(): s = np.array([ [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [1., 2, 0, 0, 0, 0, 0, 1, 1]]) model = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model.fit(s) if directory: hierarchy_fn = os.path.join(directory, "hierarchy.png") graphviz_fn = os.path.join(directory, "hierarchy.dot") else: file = tempfile.NamedTemporaryFile() hierarchy_fn = file.name + "_hierarchy.png" graphviz_fn = file.name + "_hierarchy.dot" model.plot(hierarchy_fn) print("Figure saved to", hierarchy_fn) with open(graphviz_fn, "w") as ofile: print(model.to_dot(), file=ofile) print("Dot saved to", graphviz_fn)
def linkage_tree(self, df): print('Producing linkage Tree') self.model = clustering.LinkageTree(dtw.distance_matrix_fast, {}) clusters_dtw = self.model.fit(df) return clusters_dtw pickle.dump(self.model, open('model.pkl', 'wb')) if run_plots == True: f, ax = self.model.plot() f.set_size_inches(17, 20)
def dtai_dendogram(series, dir_name): from dtaidistance import clustering model = clustering.LinkageTree(dtw.distance_matrix_fast, \ {'window':window_size, 'psi':psi_size}) model.fit(series) model.plot(filename=dir_name+"/dendogram_dtai_"+str(num_series)+"_"+str(num_pts_per_series)+".png",\ axes=None, ts_height=.5, \ bottom_margin=.4, top_margin=.4, ts_left_margin=.2, \ ts_sample_length=1/num_pts_per_series, \ tr_label_margin=.1, tr_left_margin=0, ts_label_margin=-.25, \ show_ts_label=lambda x : "ts-" + str(x), show_tr_label=True, \ cmap='viridis_r', ts_color=None)
def test_plotbug1(): s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0]) s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0]) series = s1, s2 m = clustering.LinkageTree(dtw.distance_matrix, {}) m.fit(series) if directory: hierarchy_fn = os.path.join(directory, "clustering.png") else: file = tempfile.NamedTemporaryFile() hierarchy_fn = file.name + "_clustering.png" m.plot(hierarchy_fn) print("Figure save to", hierarchy_fn)
def test_bug1(): series = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1]]) model = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model.fit(series) if directory: hierarchy_fn = directory / "hierarchy.png" else: file = tempfile.NamedTemporaryFile() hierarchy_fn = Path(file.name + "_hierarchy.png") model.plot(hierarchy_fn) print("Figure saved to", hierarchy_fn)
def test_plotbug1(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0]) s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0]) series = s1, s2 m = clustering.LinkageTree(dtw.distance_matrix, {}) m.fit(series) if not dtwvis.test_without_visualization(): if directory: hierarchy_fn = os.path.join(directory, "clustering.png") else: file = tempfile.NamedTemporaryFile() hierarchy_fn = file.name + "_clustering.png" m.plot(hierarchy_fn) print("Figure save to", hierarchy_fn)
def test_bug1(): with util_numpy.test_uses_numpy() as np: series = np.array([[0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1], [0., 0, 1, 2, 1, 0, 1, 0, 0], [0., 1, 2, 0, 0, 0, 0, 0, 0], [1., 2, 0, 0, 0, 0, 0, 1, 1]]) model = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model.fit(series) if directory: hierarchy_fn = directory / "hierarchy.png" else: file = tempfile.NamedTemporaryFile() hierarchy_fn = Path(file.name + "_hierarchy.png") if not dtwvis.test_without_visualization(): model.plot(hierarchy_fn) print("Figure saved to", hierarchy_fn)
def test_controlchart(directory=None): series = np.zeros((600, 60)) rsrc_fn = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'rsrc', 'synthetic_control.data') with open(rsrc_fn, 'r') as ifile: for idx, line in enumerate(ifile.readlines()): series[idx, :] = line.split() s = [] for idx in range(0, 600, 20): s.append(series[idx, :]) model = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model.fit(s) if directory: hierarchy_fn = os.path.join(directory, "hierarchy.png") else: file = tempfile.NamedTemporaryFile() hierarchy_fn = file.name + "_hierarchy.png" model.plot(hierarchy_fn) print("Figure saved to", hierarchy_fn)
def test_bug3(): series = np.array([ np.array([1, 2, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1, 3, 4, 5]), np.array([0., 0, 1, 2, 1, 0, 1]), np.array([0., 1, 2, 0, 0, 0, 0, 0]), np.array([1., 2, 0, 0, 0, 0, 0, 1, 1]) ]) ds = dtw.distance_matrix(series) print(ds) model = clustering.LinkageTree(dtw.distance_matrix, {}) cluster_idx = model.fit(series) print(cluster_idx) if directory: fn = directory / "bug3.png" else: file = tempfile.NamedTemporaryFile() fn = Path(file.name + "_bug3.png") model.plot(fn, show_ts_label=True)
def d(): c = clustering.LinkageTree(dtw.distance_matrix_fast, {}) return c.fit(s)
def get_cluster(): """ Function to get the clustering for the time series getting the distances between each operation. """ series = [] aux_file_path = r'C:\TFM\auxdata\hist_protected.csv' data_path = r'C:\TFM\data\2018\2018.csv' hierarchical_plot = r'C:\TFM\dtw\hierarchical_cluster.png' linkage_plot = r'C:\TFM\dtw\linkage_cluster.png' df_aux = pd.read_csv(aux_file_path, header=0, delimiter=',', parse_dates=[SEGMENT_BEGIN, SEGMENT_END]) df_data = pd.read_csv(data_path, header=0, delimiter=',', parse_dates=[DATE]) # print(df_aux[SEGMENT_BEGIN, SEGMENT_END][df_data[OPERATION_ID_NUMBER] == 4]) op_no = 28 program_number = 1108805036 # df1 = df[(df.a != -1) & (df.b != -1)] # begin_date = (df_aux.loc[(df_aux[OPERATION_ID_NUMBER] == op_no)][SEGMENT_BEGIN]) # Get begin date and end date for each time serie corresponding to the begin_date = ( df_aux[(df_aux[OPERATION_ID_NUMBER] == op_no) & (df_aux[PROGRAM_NAME] == program_number)][SEGMENT_BEGIN]) end_date = ( df_aux[(df_aux[OPERATION_ID_NUMBER] == op_no) & (df_aux[PROGRAM_NAME] == program_number)][SEGMENT_END]) data_index = begin_date.index # data_index = data_index[:30] for item in data_index: if item > YEAR_INDEX_LIMIT: break else: series_begin = begin_date[item] series_end = end_date[item] aux_series = df_data.loc[(df_data[DATE] >= series_begin) & (df_data[DATE] <= series_end)] if not aux_series.empty: df_spload = aux_series[SPINDLE_LOAD] df_spload = np.array(df_spload) series.append(df_spload) # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) cluster_idx = model1.fit(series) try: # Augment Hierarchical object to keep track of the full tree model2 = clustering.HierarchicalTree(model1) cluster_idx = model2.fit(series) model2.plot(hierarchical_plot, show_tr_label=True) except Exception as ex: print(ex) # SciPy linkage clustering try: model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(series) model3.plot(linkage_plot, show_tr_label=True) except Exception as ex: print(ex)
df = df.T # transpose the data df = df.values ds = dtw.distance_matrix_fast(df) # get dist matrix ds[ds == inf] = 0 # replace all infinity vals in the dist matrix with 0. pd.DataFrame(ds).to_excel("ds.xlsx") # save dist matrix to a xlsx. # clustering starts # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) # Augment Hierarchical object to keep track of the full tree model2 = clustering.HierarchicalTree(model1) # SciPy linkage clustering model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(df) # plot fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 15)) model3.plot("hierarchy.png", axes=ax, show_ts_label=head, show_tr_label=True, ts_label_margin=-10, ts_left_margin=10, ts_sample_length=1) # to find number of clusters NumberOfClusters=range(2,30) silhouette_score_values=list() for i in NumberOfClusters:
def cluster_the_ts_curves(infile, outfolder, maturity, smoothing): series = {} venues = [] indicies = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)] for ind, line in enumerate(open(infile)): fields = line.strip().split('\t') venue = fields[0] ts = fields[1:] venues.append(venue) #if ind == 500: break if smoothing == 'smooth': series[venue] = savgol_filter( np.asarray([float(fff) for fff in ts]), 5, 3) elif smoothing == 'notsmooth': series[venue] = np.asarray([float(fff) for fff in ts]) else: print('F**K OFF') dists = dtw.distance_matrix_fast(list(series.values())) model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(list(series.values())) linkage_matrix = model3.linkage nnn = len(series) cluster_dict = {} if not os.path.exists(maturity): os.makedirs(maturity) for i in range(0, nnn - 1): new_cluster_id = nnn + i old_cluster_id_0 = linkage_matrix[i, 0] old_cluster_id_1 = linkage_matrix[i, 1] combined_ids = list() if old_cluster_id_0 in cluster_dict: combined_ids += cluster_dict[old_cluster_id_0] del cluster_dict[old_cluster_id_0] else: combined_ids += [old_cluster_id_0] if old_cluster_id_1 in cluster_dict: combined_ids += cluster_dict[old_cluster_id_1] del cluster_dict[old_cluster_id_1] else: combined_ids += [old_cluster_id_1] cluster_dict[new_cluster_id] = combined_ids nodes_included = [] for v in cluster_dict.values(): nodes_included += v nc = len(cluster_dict) nnodes = len(set(nodes_included)) #for NNN in [6]: #for NNN in [3, 5, 6, 10]: for NNN in [10]: #NNN = 6 # 5 # 6 # 10 figfolder = outfolder + '/' + maturity + '/figs_clusters_' + smoothing + '/' + str( NNN) curvefodler = outfolder + '/' + maturity + '/avg_curves_' + smoothing + '/' + str( NNN) vensfolder = outfolder + '/' + maturity + '/clusters_venues_' + smoothing + '/' + str( NNN) if not os.path.exists(figfolder): os.makedirs(figfolder) if not os.path.exists(curvefodler): os.makedirs(curvefodler) if not os.path.exists(vensfolder): os.makedirs(vensfolder) MINCSIZE = 100 MAXSIZE = len(series) / 2 cnt = [(c, len(n)) for (c, n) in cluster_dict.items() if len(n) > MINCSIZE and len(n) < MAXSIZE] num = min(len(cnt), NNN) cnt = sorted(cnt, key=lambda tup: tup[1], reverse=True)[0:num] biggest = sum([cc[1] for cc in cnt]) top5cluster = [c[0] for c in cnt] if biggest > len(series) / 2: f, ax = plt.subplots(2, 5, figsize=(20, 8)) ind = 0 for ccc, nodes in cluster_dict.items(): if ccc in top5cluster: ttt = [] sss = [] cluster_vens = [] subseries = [] for n in nodes: subseries.append(list(series.values())[int(n)]) sss += list(list(series.values())[int(n)]) ttt += transform_ts( list(range(len(list( series.values())[int(n)]))), 11) for n in nodes: cluster_vens.append(list(series.keys())[int(n)]) linetotplot = list(series.values())[int(n)] xlinetotplot = transform_ts( list(range(len(list( series.values())[int(n)]))), 11) ax[indicies[ind]].plot(xlinetotplot, linetotplot, linewidth=0.4, color='grey', alpha=0.15) ffout = open( vensfolder + '/venues_in_' + str(ind) + '_' + str(biggest) + '_venuesnum=' + str(len(subseries)) + '.dat', 'w') ffout.write('\n'.join(cluster_vens)) ffout.close() ax[indicies[ind]].set_title('Number of venues = ' + str(len(subseries)), fontsize=15) bx, by = getBinnedDistribution(ttt, sss, 8) bx = (bx[1:] + bx[:-1]) / 2 fout = open( curvefodler + '/avg_curve_' + str(ind) + '_' + str(biggest) + '_venuesnum=' + str(len(subseries)) + '.dat', 'w') fout.write('\t'.join([str(b) for b in bx]) + '\n') fout.write('\t'.join([str(b) for b in by]) + '\n') fout.close() ax[indicies[ind]].plot(bx, by, linewidth=3, color='r') ind += 1 plt.savefig(figfolder + '/top_' + str(NNN) + '_clusters_' + str(biggest) + '.png') plt.close()
def cluster(time_series_set, name): path = "./static/cluster_data.csv" cluster_data = csv.reader(open(path, 'r')) name_list = [] series_list = [] for row in cluster_data: #print(row) #print("row", row) name_list.append(row[0]) #print("name", name_list) series = row[1:] #print("series", series) float_series = [] for i in series: float_series.append(float(i)) np_series = np.array(float_series) temp_series = stats.zscore(np_series) series_list.append(temp_series) if name not in name_list: # timeseries是性能指标序列 time_series = [] time_series_with_name = [] time_series_with_name.append(name) for row in time_series_set: time_series.append(row[1]) time_series_with_name.append(row[1]) #print(time_series) with open(path, 'a') as f: csv_write = csv.writer(f) csv_write.writerow(time_series_with_name) f.close() name_list.append(name) float_series = [] for i in time_series: float_series.append(float(i)) np_series = np.array(float_series) temp_series = stats.zscore(np_series) series_list.append(temp_series) # Custom Hierarchical clustering model1 = clustering.Hierarchical(dtw.distance_matrix_fast, {}) cluster_idx = model1.fit(series_list) # Augment Hierarchical object to keep track of the full tree model2 = clustering.HierarchicalTree(model1) cluster_idx = model2.fit(series_list) # SciPy linkage clustering model3 = clustering.LinkageTree(dtw.distance_matrix_fast, {}) cluster_idx = model3.fit(series_list) # model2.plot("hierarchy.png") fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10)) show_ts_label = lambda idx: name_list[idx] model2.plot("hierarchy.png", axes=ax, show_ts_label=show_ts_label, show_tr_label=True, ts_label_margin=-10, ts_left_margin=10, ts_sample_length=1)