Esempio n. 1
0
def calc_and_output(sim):
    # labels_predict = k_means(sim, 6)
    model = AgglomerativeClustering(n_clusters=8, affinity='euclidean')
    labels_predict = model.fit(sim).labels_
    # labels_predict = knn_model.predict(np.max(sim) - sim)
    print('ARI:', ARI(labels, labels_predict))
    return labels_predict, ARI(labels, labels_predict)
Esempio n. 2
0
default_colors = [[0, 0.8, 1], [0, 0.5, 0.5], [0.2, 0.8, 0.8], [0.2, 0.4, 1], [0.6, 0.8, 1], [1, 0.6, 0.8],
                  [0.8, 0.6, 1], [1, 0.8, 0.6], [1, 0, 0], [0, 1, 0]]
for j in range(1, 10):

    acc_arr = []

    # 数据集切分
    train_data, test_data, train_label, test_label = train_test_split(sleep_data, sleep_labels, test_size=(j / 10), shuffle=True)

    # knn 模型遍历训练
    for i in range(1, 51):
        knn_model = KNeighborsClassifier(n_neighbors=i)
        knn_model.fit(train_data, train_label)
        predict_label = knn_model.predict(test_data)
        acc = accuracy(predict_label, test_label)
        ari = ARI(test_label, predict_label)
        print(ari)
        acc_arr.append(acc)

    x = range(1, 51)
    plt.plot(x, acc_arr, '*-', color=default_colors[j - 1], label='test percent:'+str(j / 10))

# 图14
my_x_ticks = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
plt.xticks(my_x_ticks)
plt.legend(loc='best')
plt.grid(axis='x', linestyle='--')
plt.xlabel('k')
plt.ylabel('accuracy')
plt.show()
# dimenison reduction
# t-SNE
dim_data = t_SNE(X, perp=5, with_normalize=True)

# PCA
# dim_data, ratio, result = get_pca(X, c=2, with_normalize=True)

# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# read labels
labels = read_from_mat('data/corr/Labels_islet.mat')['Labels']
labels = [i[0][0] for i in labels]
# print(labels)

# knn training and predict
model = hca(dim_data)
labels_predict = hca_labels(model, 6)
# print(labels_predict)

# get color list based on labels
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels_predict, default_colors)

# draw
print('ARI:', ARI(labels, labels_predict))
hca_dendrogram(model)
draw_scatter(x, y, labels_predict, colors)
labels = joblib.load('ae_output/labels.pkl')


print(labels)
print(X.shape)
print(datetime.datetime.now())
# PCA
# dim_data, ratio, result = get_pca(X, c=11, with_normalize=False)
# print(sum(ratio))
# t-SNE
dim_data = t_SNE(X, perp=40, with_normalize=False)
print(datetime.datetime.now())
# get two coordinates
x = [i[0] for i in dim_data]
y = [i[1] for i in dim_data]

# get color list based on labels
default_colors = ['b', 'g', 'r', 'm', 'y', 'c']
colors = get_color(labels, default_colors)

# plot
draw_scatter(x, y, labels, colors)

predict_labels = k_means(X, k=6)

print(ARI(labels, predict_labels))
print(NMI(labels, predict_labels))



Esempio n. 5
0
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.neighbors import KNeighborsClassifier
from Utils import get_color, draw_scatter
from Metrics import ARI, accuracy, NMI, F1

print('Loading data...')
companies, rate, data_after_process = joblib.load(
    'data/data_after_process.pkl')


def knn(X, y, k):
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(X, y)
    return knn_model


model = knn(data_after_process, rate, 3)

labels_predict = model.predict(data_after_process)

print('Accuracy:', accuracy(labels_predict, rate))

# get color list based on labels
default_colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
colors = get_color(labels_predict, default_colors)

# draw
print('ARI:', ARI(rate, labels_predict))
print('NMI:', NMI(rate, labels_predict))
print('F1:', F1(rate, labels_predict))