Exemple #1
0
    return data, [i[-2] for i in labels]


sleep_data, sleep_labels = read_data_by_sheets('data/sleep.xlsx')

# PCA
dim_data, ratio, result = get_pca(sleep_data, c=2, with_normalize=False)
# print(ratio)

# 绘图13
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
default_colors = ['r', 'b', 'g', 'c', 'm']
colors = get_color(sleep_labels, default_colors)
print('Drawing...')
draw_scatter(x, y, sleep_labels, colors)

default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8],
                  [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]]
for j in range(1, 10):

    acc_arr = []

    # 数据集切分
    train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True)

    # knn 模型遍历训练
    for i in range(1, 51):
        knn_model = KNeighborsClassifier(n_neighbors=i)
        knn_model.fit(train_data, train_label)
        predict_label = knn_model.predict(test_data)
# dimenison reduction
# t-SNE
dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
# dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# read labels
labels = read_from_mat('data/corr/Labels_islet.mat')['Labels']
labels = [i[0][0] for i in labels]
# print(labels)

# knn training and predict
model = hca(dim_data)
labels_predict = hca_labels(model, 6)
# print(labels_predict)

# get color list based on labels
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels_predict, default_colors)

# draw
print('ARI:', ARI(labels, labels_predict))
hca_dendrogram(model)
draw_scatter(x, y, labels_predict, colors)
Exemple #3
0
import numpy as np
import joblib

# read labels
labels = read_from_txt('data/human_islets_labels.txt')
labels = [i[-1] for i in labels][1:]
print(len(labels))

# read data
X = read_from_txt('data/human_islets.txt')
X = X.T[1:, 1:].astype(np.float64)
print(X.shape)
# joblib.dump(X, 'datasets/human_islets.pkl')
# joblib.dump(labels, 'datasets/human_islets_labels.pkl')
# dimenison reduction
# t-SNE
# dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
colors = get_color(labels)

# plot
draw_scatter(x, y, labels, colors)
Exemple #4
0
from Utils import get_color, draw_scatter, read_from_txt

data = read_from_txt('pca.txt', head=True)

# print(data)

default_colors = [[138 / 256, 158 / 256, 202 / 256],
                  [246 / 256, 140 / 256, 99 / 256],
                  [98 / 256, 194 / 256, 164 / 256]]

label_dict = {'DD': 'Duroc', 'LL': 'Landrace', 'YY': 'Yorkshire'}

labels = data[:, 0]
labels = [label_dict[i] for i in labels]
x = data[:, 2]
y = data[:, 3]
x = [float(i) for i in x]
y = [float(i) for i in y]
print(x)
print(y)
colors = get_color(labels, default_colors)
print(len(colors))

draw_scatter(x, y, labels, colors, xlabel='PC1(56.84%)', ylabel='PC2(35.73%)')
Exemple #5
0
        genes_P.append(genes[idx])
    idx += 1

GEM_P = np.array(GEM_P)
print(GEM_P.shape)
print(len(genes_P))

# In[ ]:

dim_data, ratio, result = get_pca(GEM_P.T, c=20, with_normalize=True)
dim_data = t_SNE(dim_data, perp=30, with_normalize=True)
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels, default_colors)
draw_scatter(x, y, labels, colors)

# In[4]:

print(genes_P.index('POU5F1'))
print(genes_P.index('GATA6'))

# In[ ]:

# Scatter of gene GATA6 and POU5F1

G_x = []
G_y = []

for cell in GEM_P.T:
    if float(cell[6394]) > 0: