default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8], [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]] for j in range(1, 10): acc_arr = [] # 数据集切分 train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True) # knn 模型遍历训练 for i in range(1, 51): knn_model = KNeighborsClassifier(n_neighbors=i) knn_model.fit(train_data, train_label) predict_label = knn_model.predict(test_data) acc = accuracy(predict_label, test_label) ari = ARI(test_label, predict_label) print(ari) acc_arr.append(acc) x = range(1, 51) plt.plot(x, acc_arr, '*-', color=default_colors[j - 1], label='test percent:'+str(j / 10)) # 图14 my_x_ticks = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] plt.xticks(my_x_ticks) plt.legend(loc='best') plt.grid(axis='x', linestyle='--') plt.xlabel('k') plt.ylabel('accuracy') plt.show()
i[0]: [i[1]] for i in edge_hits(export_pom(cmd.net, by='label'), export_pom(tn1, by='label')).items() }).assign(Trial=i, Learner='CASMOD', Net="ds1"), sort=False) g_res = g_res.append(pd.DataFrame({ i[0]: [i[1]] for i in edge_hits(export_pom(cjn.net, by='label'), export_pom(tn1, by='label')).items() }).assign(Trial=i, Learner='CASJNK', Net="ds1"), sort=False) ds1_acc = ds1_acc.append(pd.DataFrame({ i[0]: [i[1]] for i in accuracy(cjn.net, ds1.loc[500:519]).items() }).assign(Trial=i, Learner='GREEDY', Net="ds1"), sort=False) ds1_acc = ds1_acc.append(pd.DataFrame({ i[0]: [i[1]] for i in accuracy(cgm.net, ds1.loc[500:519]).items() }).assign(Trial=i, Learner='CASGMM', Net="ds1"), sort=False) ds1_acc = ds1_acc.append(pd.DataFrame({ i[0]: [i[1]] for i in accuracy(cmd.net, ds1.loc[500:519]).items() }).assign(Trial=i, Learner='CASMOD', Net="ds1"), sort=False)
if __name__ == "__main__": "1,2,3 => 1" "else => 0" simple_dataset = [ ([1, 2, 3, 7, 8], 1), ([1, 2, 3, 4, 5], 1), ([7, 8, 1, 2, 3, 4, 5], 1), ([1, 2], 0), ([2, 3], 0), ([1, 3], 0), ([5, 6, 7], 0), ([4, 1, 2], 0), ([3, 7, 9], 0), ([1, 2, 4, 5, 6, 7, 8], 0), ] xs, ys = zip(*simple_dataset) attributes = list(set(flatten(xs))) dt = DecisionTree(attributes) dt.fit(xs, ys) predictions = dt.predict(xs) acc = accuracy(ys, predictions, class_value=1) print "\nAccuracy: " + str(acc) print "" print str(dt.tree) pass
([3, 1, 2], 0 ), ([1, 2, 4, 5, 6, 7, 8], 0 ), ] simple_ordered_dataset2 = [ (["big", "fat", "hairy", "cat"], 1 ), (["big", "fat", "hairy", "troublesome", "cat"], 1 ), (["big", "fat", "hirsute", "hairy", "troublesome", "cat"], 1 ), (["big", "hairy", "fat", "cat"], 0), (["big", "hairy", "cat"], 0), (["big", "fat", "cat"], 0), (["big", "fat"], 0), (["fat"], 0 ), (["hairy", "troublesome"], 0 ), (["a", "cat"], 0 ), (["a", "big"], 0 ), (["fat", "cat"], 0 ), (["big", "cat"], 0 ), ] xs, ys = zip(*complex_ordered_dataset) dt = OrderedDecisionTree(4) dt.fit(xs, ys) predictions = dt.predict(xs) acc = accuracy(ys, predictions, class_value=1) print "\nAccuracy: " + str(acc) print "" print str(dt.tree) pass
# t-SNE dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA # dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # read labels labels = read_from_mat('data/corr/Labels_islet.mat')['Labels'] labels = [i[0][0] for i in labels] # print(labels) # knn training and predict model = knn(dim_data, labels, 3) labels_predict = model.predict(dim_data) print('Accuracy:', accuracy(labels_predict, labels)) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels_predict, default_colors) # draw print('ARI:', ARI(labels, labels_predict)) print('NMI:', NMI(labels, labels_predict)) print('F1:', F1(labels, labels_predict)) draw_scatter(x, y, labels_predict, colors)
from sklearn.cluster import KMeans, AgglomerativeClustering from sklearn.neighbors import KNeighborsClassifier from Utils import get_color, draw_scatter from Metrics import ARI, accuracy, NMI, F1 print('Loading data...') companies, rate, data_after_process = joblib.load( 'data/data_after_process.pkl') def knn(X, y, k): knn_model = KNeighborsClassifier(n_neighbors=k) knn_model.fit(X, y) return knn_model model = knn(data_after_process, rate, 3) labels_predict = model.predict(data_after_process) print('Accuracy:', accuracy(labels_predict, rate)) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels_predict, default_colors) # draw print('ARI:', ARI(rate, labels_predict)) print('NMI:', NMI(rate, labels_predict)) print('F1:', F1(rate, labels_predict))