LastEditTime: 2020-08-10 16:39:49 LastEditors: CheeReus_11 ''' import matplotlib.pyplot as plt from ReadData import read_from_mat from DimensionReduction import t_SNE, get_pca from Utils import get_color, draw_scatter from Clustering import hca, hca_dendrogram, hca_labels from Metrics import ARI # read data X = read_from_mat('data/corr/A_islet.mat')['A'] # dimenison reduction # t-SNE dim_data = t_SNE(X, perp=5, with_normalize=True) # PCA # dim_data, ratio, result = get_pca(X, c=2, with_normalize=True) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # read labels labels = read_from_mat('data/corr/Labels_islet.mat')['Labels'] labels = [i[0][0] for i in labels] # print(labels) # knn training and predict model = hca(dim_data)
import joblib from DimensionReduction import t_SNE, get_pca from Utils import get_color, draw_scatter data = joblib.load('ae_output/ae_dim_data_99.pkl') print(data.shape) labels = joblib.load('ae_output/labels.pkl') print(len(labels)) dim_data = t_SNE(data, perp=40, with_normalize=True) x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'c', 'k'] colors = get_color(labels, default_colors) draw_scatter(x, y, labels, colors)
idx = 0 for gene in GEM: gene = np.array(gene) if len(gene[gene > 0]) >= 10: GEM_P.append(gene) genes_P.append(genes[idx]) idx += 1 GEM_P = np.array(GEM_P) print(GEM_P.shape) print(len(genes_P)) # In[ ]: dim_data, ratio, result = get_pca(GEM_P.T, c=20, with_normalize=True) dim_data = t_SNE(dim_data, perp=30, with_normalize=True) x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels, default_colors) draw_scatter(x, y, labels, colors) # In[4]: print(genes_P.index('POU5F1')) print(genes_P.index('GATA6')) # In[ ]: # Scatter of gene GATA6 and POU5F1
# 文件路径 filePath = 'data/41592_2017_BFnmeth4179_MOESM235_ESM.xlsx' x1 = xlrd.open_workbook(filePath) sheet = x1.sheets() labels = sheet[0].col_values(3)[1:] # joblib.dump(X, 'datasets/human_islets.pkl') # joblib.dump(labels, 'datasets/human_islets_labels.pkl') print(labels) print(X.shape) print(datetime.datetime.now()) # PCA dim_data, ratio, result = get_pca(X, c=11, with_normalize=False) print(sum(ratio)) # t-SNE dim_data = t_SNE(dim_data, perp=5, with_normalize=False) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # get color list based on labels default_colors = ['b', 'g', 'r', 'm', 'y', 'k'] colors = get_color(labels, default_colors) # plot draw_scatter(x, y, labels, colors)
import datetime from Clustering import k_means from Metrics import ARI, NMI X = joblib.load('ae_output/ae_dim_data_99.pkl') labels = joblib.load('ae_output/labels.pkl') print(labels) print(X.shape) print(datetime.datetime.now()) # PCA # dim_data, ratio, result = get_pca(X, c=11, with_normalize=False) # print(sum(ratio)) # t-SNE dim_data = t_SNE(X, perp=40, with_normalize=False) print(datetime.datetime.now()) # get two coordinates x = [i[0] for i in dim_data] y = [i[1] for i in dim_data] # get color list based on labels default_colors = ['b', 'g', 'r', 'm', 'y', 'c'] colors = get_color(labels, default_colors) # plot draw_scatter(x, y, labels, colors) predict_labels = k_means(X, k=6) print(ARI(labels, predict_labels))