return data, [i[-2] for i in labels] sleep_data, sleep_labels = read_data_by_sheets('data/sleep.xlsx') # PCA dim_data, ratio, result = get_pca(sleep_data, c=2, with_normalize=False) # print(ratio) # 绘图13 x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] default_colors = ['r', 'b', 'g', 'c', 'm'] colors = get_color(sleep_labels, default_colors) print('Drawing...') draw_scatter(x, y, sleep_labels, colors) default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8], [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]] for j in range(1, 10): acc_arr = [] # 数据集切分 train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True) # knn 模型遍历训练 for i in range(1, 51): knn_model = KNeighborsClassifier(n_neighbors=i) knn_model.fit(train_data, train_label) predict_label = knn_model.predict(test_data)
# dimenison reduction # t-SNE dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA # dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # read labels labels = read_from_mat('data/corr/Labels_islet.mat')['Labels'] labels = [i[0][0] for i in labels] # print(labels) # knn training and predict model = hca(dim_data) labels_predict = hca_labels(model, 6) # print(labels_predict) # get color list based on labels default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels_predict, default_colors) # draw print('ARI:', ARI(labels, labels_predict)) hca_dendrogram(model) draw_scatter(x, y, labels_predict, colors)
import numpy as np import joblib # read labels labels = read_from_txt('data/human_islets_labels.txt') labels = [i[-1] for i in labels][1:] print(len(labels)) # read data X = read_from_txt('data/human_islets.txt') X = X.T[1:, 1:].astype(np.float64) print(X.shape) # joblib.dump(X, 'datasets/human_islets.pkl') # joblib.dump(labels, 'datasets/human_islets_labels.pkl') # dimenison reduction # t-SNE # dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # get color list based on labels colors = get_color(labels) # plot draw_scatter(x, y, labels, colors)
from Utils import get_color, draw_scatter, read_from_txt data = read_from_txt('pca.txt', head=True) # print(data) default_colors = [[138 / 256, 158 / 256, 202 / 256], [246 / 256, 140 / 256, 99 / 256], [98 / 256, 194 / 256, 164 / 256]] label_dict = {'DD': 'Duroc', 'LL': 'Landrace', 'YY': 'Yorkshire'} labels = data[:, 0] labels = [label_dict[i] for i in labels] x = data[:, 2] y = data[:, 3] x = [float(i) for i in x] y = [float(i) for i in y] print(x) print(y) colors = get_color(labels, default_colors) print(len(colors)) draw_scatter(x, y, labels, colors, xlabel='PC1(56.84%)', ylabel='PC2(35.73%)')
genes_P.append(genes[idx]) idx += 1 GEM_P = np.array(GEM_P) print(GEM_P.shape) print(len(genes_P)) # In[ ]: dim_data, ratio, result = get_pca(GEM_P.T, c=20, with_normalize=True) dim_data = t_SNE(dim_data, perp=30, with_normalize=True) x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels, default_colors) draw_scatter(x, y, labels, colors) # In[4]: print(genes_P.index('POU5F1')) print(genes_P.index('GATA6')) # In[ ]: # Scatter of gene GATA6 and POU5F1 G_x = [] G_y = [] for cell in GEM_P.T: if float(cell[6394]) > 0: