Exemple #1
0
    data = np.array(data)
    return data, [i[-2] for i in labels]


sleep_data, sleep_labels = read_data_by_sheets('data/sleep.xlsx')

# PCA
dim_data, ratio, result = get_pca(sleep_data, c=2, with_normalize=False)
# print(ratio)

# 绘图13
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
default_colors = ['r', 'b', 'g', 'c', 'm']
colors = get_color(sleep_labels, default_colors)
print('Drawing...')
draw_scatter(x, y, sleep_labels, colors)

default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8],
                  [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]]
for j in range(1, 10):

    acc_arr = []

    # 数据集切分
    train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True)

    # knn 模型遍历训练
    for i in range(1, 51):
        knn_model = KNeighborsClassifier(n_neighbors=i)
Exemple #2
0
for i in range(60):
    wave = np.array(get_normalize(train_data[i])).T[0]
    if train_event[i] == 0:
        non_p300_wave_avg = non_p300_wave_avg + wave
    else:
        p300_wave_avg = p300_wave_avg + wave

non_p300_wave_avg = non_p300_wave_avg / 50
p300_wave_avg = p300_wave_avg / 10

print(train_data.shape)

print(train_data.shape, train_event.shape)

default_colors = ['r', 'b']
colors = get_color(range(8))

x = range(0, 800, 4)

plt.plot(x, non_p300_wave_avg, c='r', label='non p300')
plt.plot(x, p300_wave_avg, c='k', label='p300')
plt.xlabel('time(ms)')
my_x_ticks = np.arange(0, 200, 50)
plt.grid()
plt.legend(loc='best')
plt.show()




Exemple #3
0
from Utils import get_color, draw_scatter, read_from_txt

data = read_from_txt('pca.txt', head=True)

# print(data)

default_colors = [[138 / 256, 158 / 256, 202 / 256],
                  [246 / 256, 140 / 256, 99 / 256],
                  [98 / 256, 194 / 256, 164 / 256]]

label_dict = {'DD': 'Duroc', 'LL': 'Landrace', 'YY': 'Yorkshire'}

labels = data[:, 0]
labels = [label_dict[i] for i in labels]
x = data[:, 2]
y = data[:, 3]
x = [float(i) for i in x]
y = [float(i) for i in y]
print(x)
print(y)
colors = get_color(labels, default_colors)
print(len(colors))

draw_scatter(x, y, labels, colors, xlabel='PC1(56.84%)', ylabel='PC2(35.73%)')
# dimenison reduction
# t-SNE
dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
# dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# read labels
labels = read_from_mat('data/corr/Labels_islet.mat')['Labels']
labels = [i[0][0] for i in labels]
# print(labels)

# knn training and predict
model = hca(dim_data)
labels_predict = hca_labels(model, 6)
# print(labels_predict)

# get color list based on labels
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels_predict, default_colors)

# draw
print('ARI:', ARI(labels, labels_predict))
hca_dendrogram(model)
draw_scatter(x, y, labels_predict, colors)
Exemple #5
0
import numpy as np
import joblib

# read labels
labels = read_from_txt('data/human_islets_labels.txt')
labels = [i[-1] for i in labels][1:]
print(len(labels))

# read data
X = read_from_txt('data/human_islets.txt')
X = X.T[1:, 1:].astype(np.float64)
print(X.shape)
# joblib.dump(X, 'datasets/human_islets.pkl')
# joblib.dump(labels, 'datasets/human_islets_labels.pkl')
# dimenison reduction
# t-SNE
# dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
colors = get_color(labels)

# plot
draw_scatter(x, y, labels, colors)
Exemple #6
0
        C=params['C'],
        degree=params['degree'],
        kernel=params['kernel'],
        gamma=params['gamma'],
        decision_function_shape=params['decision_function_shape'],
        verbose=0)
    scores = cross_val_score(cross_model, x, y.ravel(), cv=s)
    return scores


def svm_predict(x, model):
    results = model.predict(x)
    return results


labels_int = get_color(rate, [1, 2, 3, 4])

# knn training and predict
# model = knn(up_connection, labels_int, 3)
# labels_predict = model.predict(up_connection)

up_connection = np.array(up_connection)
labels_int = np.array(labels_int)

loo = LeaveOneOut()
correct = 0
for train, test in loo.split(up_connection):
    model = knn(up_connection[train], labels_int[train], 3)
    labels_predict = model.predict(up_connection[test])
    if labels_predict == labels_int[test]:
        correct += 1
Exemple #7
0
X = read_from_mat('data/corr/A_islet.mat')['A']

print(X.shape)

# read labels
labels = read_from_mat('data/corr/Labels_islet.mat')['Labels']
labels = [i[0][0] for i in labels]

# dimenison reduction
# t-SNE
dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
# dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)
# print(ratio)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]
#
labels_predict = SpectralClustering(
    n_clusters=6, affinity='nearest_neighbors').fit_predict(X)
print(labels_predict)

# get color list based on labels
default_colors = ['c', 'b', 'g', 'r', 'm', 'y']
colors = get_color(labels_predict)

# plot
draw_scatter(x, y, labels_predict, colors)
Exemple #8
0
import joblib
from Utils import get_color, draw_scatter3d

print('Loading data...')
companies, rate, data_after_process = joblib.load('data/data_train_after.pkl')

# get three coordinates
x = [i[0] for i in data_after_process]
y = [i[1] for i in data_after_process]
z = [i[2] for i in data_after_process]
colors = get_color(rate, colors=None)

z_max = max(z)

draw_scatter3d(x, y, [i / z_max for i in z], rate, colors=colors)
Exemple #9
0
import joblib
from sklearn.model_selection import LeaveOneOut
from Utils import get_color
import numpy as np
from Clustering import knn

print('Loading data...')
companies, rate, data_after_process = joblib.load('data/data_train_after.pkl')
up_connection = np.array(joblib.load('data/up_connection.pkl'))
down_connection = np.array(joblib.load('data/down_connection.pkl'))

connection = down_connection
# connection = np.hstack((up_connection, down_connection))

labels_int = np.array(get_color(rate, [1, 2, 3, 4]))

loo = LeaveOneOut()
correct = 0
for train, test in loo.split(connection):
    model = knn(connection[train], labels_int[train], 3)
    labels_predict = model.predict(connection[test])
    if labels_predict == labels_int[test]:
        correct += 1
print(correct / len(rate))