continue l = [] sorted_dict = {} hour = 0 minute = 0 while hour < 24: time = str(hour) + ":" + str(minute) measure = vals[time] l.append(measure) hour = hour if minute == 0 else hour + 1 minute = 0 if minute == 30 else 30 sazonal_data[meter] = np.array(l) total_data += 1 if total_data >= max_data: break k = K_means_clustering( sazonal_data, list(range(2, 18)), ["k-means", "k-means++"], ["euclidean", "cityblock", "chebyshev", "DTW", "LB_Keogh"], normalize_by_min=False, ) # k = K_means_clustering(sazonal_data,list(range(2,3)),['k-means'],['euclidean'],normalize_by_min=False) k.fit(verbose=True) k.plot_all(as_time_series=True, key_name=(str(max_data) + "_sg_")) k.plot_silhouette(key_name=(str(max_data) + "_sg_"))
with open(folder_name + benchmark + '/' + benchmark + '_TRAIN', 'r') as f: for line in f: i += 1 floats = [float(x) for x in line.split(',')] data[i] = np.array(floats[1:len(floats)]) label = floats[0] expected[i] = label if label not in clusters_labels: clusters_labels.append(label) n_expected_cluster = len(clusters_labels) diff = 1 min_k = 2 if n_expected_cluster - diff < 2 else n_expected_cluster - diff max_k = n_expected_cluster + diff algs = ['k-means'] #dists = ['minkowski_1','minkowski_2','chebyshev','DTW','LB_Keogh','correlation_1_1.5','cort-euclidean'] dists = ['LB_Keogh','correlation_1_1.5','cort-euclidean','DTW'] ks = list(range(min_k,max_k)) normal_by_min = False k = K_means_clustering(data,expected,[n_expected_cluster],algs,dists,Normalization.by_Z_normalization ) k.fit(verbose=True) #k.plot_all(as_time_series=True,key_name=benchmark) #k.plot_validations(benchmark,n_expected_cluster) #k.plot_real_solution(len(clusters_labels),expected,'k-means','minkowski_1',benchmark) #k.plot_real_solution(len(clusters_labels),expected,'k-means++','minkowski_2',benchmark) if (benchmark not in results): results[benchmark] = {} results[benchmark][it] = (k.solutions, k.time_clustering, k.expected_indices)