Ejemplo n.º 1
0
            continue
        l = []
        sorted_dict = {}
        hour = 0
        minute = 0
        while hour < 24:
            time = str(hour) + ":" + str(minute)

            measure = vals[time]
            l.append(measure)

            hour = hour if minute == 0 else hour + 1
            minute = 0 if minute == 30 else 30

        sazonal_data[meter] = np.array(l)
        total_data += 1
        if total_data >= max_data:
            break

k = K_means_clustering(
    sazonal_data,
    list(range(2, 18)),
    ["k-means", "k-means++"],
    ["euclidean", "cityblock", "chebyshev", "DTW", "LB_Keogh"],
    normalize_by_min=False,
)
# k = K_means_clustering(sazonal_data,list(range(2,3)),['k-means'],['euclidean'],normalize_by_min=False)
k.fit(verbose=True)
k.plot_all(as_time_series=True, key_name=(str(max_data) + "_sg_"))
k.plot_silhouette(key_name=(str(max_data) + "_sg_"))
Ejemplo n.º 2
0
        with open(folder_name + benchmark + '/' + benchmark + '_TRAIN', 'r') as f:
            for line in f:
                i += 1
                floats = [float(x) for x in line.split(',')]
                data[i] = np.array(floats[1:len(floats)])
                label = floats[0]
                expected[i] = label
                if label not in clusters_labels:
                    clusters_labels.append(label)

        n_expected_cluster = len(clusters_labels)
        diff = 1
        min_k = 2 if n_expected_cluster - diff < 2 else n_expected_cluster - diff
        max_k = n_expected_cluster + diff
        algs = ['k-means']
        #dists = ['minkowski_1','minkowski_2','chebyshev','DTW','LB_Keogh','correlation_1_1.5','cort-euclidean']
        dists = ['LB_Keogh','correlation_1_1.5','cort-euclidean','DTW']
        ks = list(range(min_k,max_k))
        normal_by_min = False
        k = K_means_clustering(data,expected,[n_expected_cluster],algs,dists,Normalization.by_Z_normalization )
        k.fit(verbose=True)
        #k.plot_all(as_time_series=True,key_name=benchmark)
        #k.plot_validations(benchmark,n_expected_cluster)
        #k.plot_real_solution(len(clusters_labels),expected,'k-means','minkowski_1',benchmark)
        #k.plot_real_solution(len(clusters_labels),expected,'k-means++','minkowski_2',benchmark)

        if (benchmark not in results):
            results[benchmark] = {}
        results[benchmark][it] = (k.solutions, k.time_clustering, k.expected_indices)