import numpy as np from sklearn.metrics import silhouette_score from clustering.kmeans import k_means from sklearn.cluster import KMeans from sklearn.metrics import calinski_harabasz_score from clustering.preprocess.pre_process import load_data import time from clustering.kmeans import k_means raw_data, data_all, data_pca, data_respectively_pca = load_data('/Users/jackietien/Documents/ClassificationAndClustering/clustering/preprocess/data.csv') data = data_respectively_pca k = 6 start_time = time.time() estimator = KMeans(n_clusters=k).fit(data) # _, label, sse = k_means.k_means(data, k) end_time = time.time() consuming_time = end_time - start_time print(consuming_time) # cluster_centers = estimator.cluster_centers_ # label = estimator.labels_ sse = estimator.inertia_ silhouette = silhouette_score(data, label) calinski_harabasz = calinski_harabasz_score(data, label) print("lib result: \t", " k: ", k, "\tsse: ", sse, "\tsilhouette: ", silhouette, "\tcalinski_harabaz_score: ", calinski_harabasz) for i in range(k): cluster_k = raw_data[label == i] a = {}
# res[len(res) - 1].append(i) continue else: res[full_list[i]].append(i) return res def get_sub_list(full_list, indexes): res = [] for i in indexes: res.append(full_list[i]) return res if __name__ == '__main__': raw_data, full_data, data_pca, data_respectively_pca = pre_process.load_data( "../preprocess/data.csv") db = DBSCAN(eps=0.75, min_samples=20).fit(data_respectively_pca) labels = db.labels_ grouped = group(labels) fig1 = plt.figure() ax1 = Axes3D(fig1) ax2 = plt.figure().add_subplot(111) ax3 = plt.figure().add_subplot(111) ax4 = plt.figure().add_subplot(111) for i in grouped: plot_data = np.array(get_sub_list(data_pca, i))
import time import matplotlib.pyplot as plt from sklearn.cluster import DBSCAN from sklearn.metrics import calinski_harabasz_score from sklearn.metrics import silhouette_score from sklearn.neighbors import NearestNeighbors from clustering.preprocess.pre_process import load_data raw_data, data_all, data_pca, data_respectively_pca = load_data( "../preprocess/data.csv") data = data_all lib_SSE = [] # 轮廓系数 # lib_silhouette = [] # lib_calinski_harabasz = [] # # self_SSE = [] # self_silhouette = [] # self_calinski_harabasz = [] # # # lib_SSE.clear() # lib_silhouette.clear() # lib_calinski_harabasz.clear() # self_SSE.clear() # self_silhouette.clear() # self_calinski_harabasz.clear()