def make_elbow(X): lst_k = range(1, 10) lst_rsq = [] for k in lst_k: kmeanModel = KMeans(n_clusters=k) kmeanModel.fit(X) # lst_rsq.append(np.average(np.min(cdist(X, kmeanModel.cluster_centers_, "euclidean"), axis=1)) / X.shape[0]) lst_rsq.append( r_square(X.values, kmeanModel.cluster_centers_, kmeanModel.labels_, k)) fig = plt.figure() plt.plot(lst_k, lst_rsq, "bx-") plt.xlabel("k") plt.ylabel("RSQ score") plt.title("The Elbow Method showing the optimal k") plt.savefig("fig/" + type_exec + "/k-means_elbow_method") plt.close()
plt.savefig('fig/k-means_ground_truth') plt.close(fig) # Compute R-square, i.e. V_inter/V from R_square_clustering import r_square from purity import purity_score # Plot elbow graphs for KMeans using R square and purity scores lst_k = range(2, 11) lst_rsq = [] lst_purity = [] for k in lst_k: est = KMeans(n_clusters=k) est.fit(X_norm) lst_rsq.append( r_square(X_norm.to_numpy(), est.cluster_centers_, est.labels_, k)) lst_purity.append(purity_score(y.to_numpy(), est.labels_)) fig = plt.figure() plt.plot(lst_k, lst_rsq, 'bx-') plt.plot(lst_k, lst_purity, 'rx-') plt.xlabel('k') plt.ylabel('RSQ/purity score') plt.title('The Elbow Method showing the optimal k') plt.savefig('fig/k-means_elbow_method') plt.close() # # hierarchical clustering from scipy.cluster.hierarchy import dendrogram, linkage lst_labels = map(lambda pair: pair[0] + str(pair[1]),
cercle = plt.Circle((0, 0), 1, color='blue', fill=False) axes.add_artist(cercle) plt.savefig('acp_correlation_circle_axes_' + str(x_axis) + '_' + str(y_axis)) plt.close(fig) correlation_circle(data, 9, 2, 3) #question 5 lst_k = range(2, 8) lst_rsq = [] for k in lst_k: est = KMeans(n_clusters=k) est.fit(X_norm) lst_rsq.append(r_square(X_norm, est.cluster_centers_, est.labels_, k)) fig = plt.figure() plt.plot(lst_k, lst_rsq, 'bx-') plt.xlabel('k') plt.ylabel('RSQ') plt.title('The Elbow Method showing the optimal k') plt.savefig('r_square') plt.close(fig) est = KMeans(n_clusters=5) est.fit(X) # print centroids associated with several countries lst_countries = ['EL', 'FR', 'DE', 'US'] # centroid of the entire dataset
def cah_cat(): import pandas as pd import matplotlib.pyplot as plt from sklearn.cluster import KMeans print("###############################################") print("#####RUN KMEANS DEMISSIONAIRES CATEGORIELS#####") print("###############################################") clients_dem = pd.read_csv('../donnees/fusion/dem.csv', sep=',') print(clients_dem) del clients_dem['is_adh'] del clients_dem['DTADH'] del clients_dem['DTDEM'] ## Remove numerics data del clients_dem['MTREV'] del clients_dem['AGEAD'] del clients_dem['agedem'] del clients_dem['adh'] print(clients_dem) X_cat_one_hot = pd.get_dummies(clients_dem.astype(str)) print(X_cat_one_hot) #### ## kmeans #### # Compute R-square, i.e. V_inter/V # Plot elbow graphs for KMeans using R square and purity scores lst_k = range(2, 8) lst_rsq = [] lst_purity = [] for k in lst_k: est = KMeans(n_clusters=k) est.fit(X_cat_one_hot) lst_rsq.append( r_square(X_cat_one_hot.to_numpy(), est.cluster_centers_, est.labels_, k)) # TODO: complete lst_purity print("------------- Groupe de " + str(k) + " clusters ---------") clusters = { "code": pd.DataFrame(clients_dem.index.values.tolist()), "cluster": est.labels_ } print(pd.DataFrame(clusters)) fig = plt.figure() plt.plot(lst_k, lst_rsq, 'bx-') # plt.plot(lst_k, lst_purity, 'rx-') plt.xlabel('k') plt.ylabel('RSQ/purity score') plt.title('The Elbow Method showing the optimal k') plt.savefig('../fig/k-means_elbow_method') plt.close() print("###############################################") print("#####END KMEANS DEMISSIONAIRES CATEGORIELS#####") print("###############################################")
print('\tcentroid: ' + str(est.cluster_centers_[num_cluster])) print( '--------------------------------------------------------------------------' ) print( '--------------------------------------------------------------------------' ) lst_k = range(2, 9) lst_rsq = [] for k in lst_k: est = KMeans(n_clusters=k) est.fit(eurofit) lst_rsq.append(r_square(eurofit, est.cluster_centers_, est.labels_, k)) fig = plt.figure() plt.plot(lst_k, lst_rsq, 'bx-') plt.xlabel('k') plt.ylabel('RSQ') plt.title('The Elbow Method showing the optimal k') plt.savefig('R2') plt.show() plt.close() print( '--------------------------------------------------------------------------' ) print( '--------------------------------------------------------------------------' )