data = np.array(data) return data, [i[-2] for i in labels] sleep_data, sleep_labels = read_data_by_sheets('data/sleep.xlsx') # PCA dim_data, ratio, result = get_pca(sleep_data, c=2, with_normalize=False) # print(ratio) # 绘图13 x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] default_colors = ['r', 'b', 'g', 'c', 'm'] colors = get_color(sleep_labels, default_colors) print('Drawing...') draw_scatter(x, y, sleep_labels, colors) default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8], [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]] for j in range(1, 10): acc_arr = [] # 数据集切分 train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True) # knn 模型遍历训练 for i in range(1, 51): knn_model = KNeighborsClassifier(n_neighbors=i)
for i in range(60): wave = np.array(get_normalize(train_data[i])).T[0] if train_event[i] == 0: non_p300_wave_avg = non_p300_wave_avg + wave else: p300_wave_avg = p300_wave_avg + wave non_p300_wave_avg = non_p300_wave_avg / 50 p300_wave_avg = p300_wave_avg / 10 print(train_data.shape) print(train_data.shape, train_event.shape) default_colors = ['r', 'b'] colors = get_color(range(8)) x = range(0, 800, 4) plt.plot(x, non_p300_wave_avg, c='r', label='non p300') plt.plot(x, p300_wave_avg, c='k', label='p300') plt.xlabel('time(ms)') my_x_ticks = np.arange(0, 200, 50) plt.grid() plt.legend(loc='best') plt.show()
from Utils import get_color, draw_scatter, read_from_txt data = read_from_txt('pca.txt', head=True) # print(data) default_colors = [[138 / 256, 158 / 256, 202 / 256], [246 / 256, 140 / 256, 99 / 256], [98 / 256, 194 / 256, 164 / 256]] label_dict = {'DD': 'Duroc', 'LL': 'Landrace', 'YY': 'Yorkshire'} labels = data[:, 0] labels = [label_dict[i] for i in labels] x = data[:, 2] y = data[:, 3] x = [float(i) for i in x] y = [float(i) for i in y] print(x) print(y) colors = get_color(labels, default_colors) print(len(colors)) draw_scatter(x, y, labels, colors, xlabel='PC1(56.84%)', ylabel='PC2(35.73%)')
# dimenison reduction # t-SNE dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA # dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # read labels labels = read_from_mat('data/corr/Labels_islet.mat')['Labels'] labels = [i[0][0] for i in labels] # print(labels) # knn training and predict model = hca(dim_data) labels_predict = hca_labels(model, 6) # print(labels_predict) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels_predict, default_colors) # draw print('ARI:', ARI(labels, labels_predict)) hca_dendrogram(model) draw_scatter(x, y, labels_predict, colors)
import numpy as np import joblib # read labels labels = read_from_txt('data/human_islets_labels.txt') labels = [i[-1] for i in labels][1:] print(len(labels)) # read data X = read_from_txt('data/human_islets.txt') X = X.T[1:, 1:].astype(np.float64) print(X.shape) # joblib.dump(X, 'datasets/human_islets.pkl') # joblib.dump(labels, 'datasets/human_islets_labels.pkl') # dimenison reduction # t-SNE # dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # get color list based on labels colors = get_color(labels) # plot draw_scatter(x, y, labels, colors)
C=params['C'], degree=params['degree'], kernel=params['kernel'], gamma=params['gamma'], decision_function_shape=params['decision_function_shape'], verbose=0) scores = cross_val_score(cross_model, x, y.ravel(), cv=s) return scores def svm_predict(x, model): results = model.predict(x) return results labels_int = get_color(rate, [1, 2, 3, 4]) # knn training and predict # model = knn(up_connection, labels_int, 3) # labels_predict = model.predict(up_connection) up_connection = np.array(up_connection) labels_int = np.array(labels_int) loo = LeaveOneOut() correct = 0 for train, test in loo.split(up_connection): model = knn(up_connection[train], labels_int[train], 3) labels_predict = model.predict(up_connection[test]) if labels_predict == labels_int[test]: correct += 1
X = read_from_mat('data/corr/A_islet.mat')['A'] print(X.shape) # read labels labels = read_from_mat('data/corr/Labels_islet.mat')['Labels'] labels = [i[0][0] for i in labels] # dimenison reduction # t-SNE dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA # dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # print(ratio) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # labels_predict = SpectralClustering( n_clusters=6, affinity='nearest_neighbors').fit_predict(X) print(labels_predict) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y'] colors = get_color(labels_predict) # plot draw_scatter(x, y, labels_predict, colors)
import joblib from Utils import get_color, draw_scatter3d print('Loading data...') companies, rate, data_after_process = joblib.load('data/data_train_after.pkl') # get three coordinates x = [i[0] for i in data_after_process] y = [i[1] for i in data_after_process] z = [i[2] for i in data_after_process] colors = get_color(rate, colors=None) z_max = max(z) draw_scatter3d(x, y, [i / z_max for i in z], rate, colors=colors)
import joblib from sklearn.model_selection import LeaveOneOut from Utils import get_color import numpy as np from Clustering import knn print('Loading data...') companies, rate, data_after_process = joblib.load('data/data_train_after.pkl') up_connection = np.array(joblib.load('data/up_connection.pkl')) down_connection = np.array(joblib.load('data/down_connection.pkl')) connection = down_connection # connection = np.hstack((up_connection, down_connection)) labels_int = np.array(get_color(rate, [1, 2, 3, 4])) loo = LeaveOneOut() correct = 0 for train, test in loo.split(connection): model = knn(connection[train], labels_int[train], 3) labels_predict = model.predict(connection[test]) if labels_predict == labels_int[test]: correct += 1 print(correct / len(rate))