def calc_and_output(sim): # labels_predict = k_means(sim, 6) model = AgglomerativeClustering(n_clusters=8, affinity='euclidean') labels_predict = model.fit(sim).labels_ # labels_predict = knn_model.predict(np.max(sim) - sim) print('ARI:', ARI(labels, labels_predict)) return labels_predict, ARI(labels, labels_predict)
default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8], [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]] for j in range(1, 10): acc_arr = [] # 数据集切分 train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True) # knn 模型遍历训练 for i in range(1, 51): knn_model = KNeighborsClassifier(n_neighbors=i) knn_model.fit(train_data, train_label) predict_label = knn_model.predict(test_data) acc = accuracy(predict_label, test_label) ari = ARI(test_label, predict_label) print(ari) acc_arr.append(acc) x = range(1, 51) plt.plot(x, acc_arr, '*-', color=default_colors[j - 1], label='test percent:'+str(j / 10)) # 图14 my_x_ticks = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] plt.xticks(my_x_ticks) plt.legend(loc='best') plt.grid(axis='x', linestyle='--') plt.xlabel('k') plt.ylabel('accuracy') plt.show()
# dimenison reduction # t-SNE dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA # dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # read labels labels = read_from_mat('data/corr/Labels_islet.mat')['Labels'] labels = [i[0][0] for i in labels] # print(labels) # knn training and predict model = hca(dim_data) labels_predict = hca_labels(model, 6) # print(labels_predict) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels_predict, default_colors) # draw print('ARI:', ARI(labels, labels_predict)) hca_dendrogram(model) draw_scatter(x, y, labels_predict, colors)
labels = joblib.load('ae_output/labels.pkl') print(labels) print(X.shape) print(datetime.datetime.now()) # PCA # dim_data, ratio, result = get_pca(X, c=11, with_normalize=False) # print(sum(ratio)) # t-SNE dim_data = t_SNE(X, perp=40, with_normalize=False) print(datetime.datetime.now()) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # get color list based on labels default_colors = ['b', 'g', 'r', 'm', 'y', 'c'] colors = get_color(labels, default_colors) # plot draw_scatter(x, y, labels, colors) predict_labels = k_means(X, k=6) print(ARI(labels, predict_labels)) print(NMI(labels, predict_labels))
from sklearn.cluster import KMeans, AgglomerativeClustering from sklearn.neighbors import KNeighborsClassifier from Utils import get_color, draw_scatter from Metrics import ARI, accuracy, NMI, F1 print('Loading data...') companies, rate, data_after_process = joblib.load( 'data/data_after_process.pkl') def knn(X, y, k): knn_model = KNeighborsClassifier(n_neighbors=k) knn_model.fit(X, y) return knn_model model = knn(data_after_process, rate, 3) labels_predict = model.predict(data_after_process) print('Accuracy:', accuracy(labels_predict, rate)) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels_predict, default_colors) # draw print('ARI:', ARI(rate, labels_predict)) print('NMI:', NMI(rate, labels_predict)) print('F1:', F1(rate, labels_predict))