def main(): trainD, devD, testD = init() allD = Data(trainD, devD) if sys.argv[1] == "display": display(allD) exit(0) if sys.argv[1] == "train": # local settings x = trainD.nx() y = trainD.ny() xx = devD.nx() else: # submit settings x = allD.nx() y = allD.ny() xx = testD.nx() gmm1 = GMM(x[y == 1], round=500, K=4) gmm2 = GMM(x[y == 2], round=500, K=4) print("GMM1.dist: ", gmm1.pi) print("GMM2.dist: ", gmm2.pi) r1 = gmm1.predict(xx) * np.sum(y == 2) r2 = gmm2.predict(xx) * np.sum(y == 1) result = 1 + (r1 < r2) * 1 if sys.argv[1] == "train": # local settings print("accuracy: ", sum(result == devD.ny()) / devD.ny().shape[0]) else: # submit settings testD.y = list(result) testD.output()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Apr 21 02:43:24 2019 @author: maachou """ from sklearn.datasets.samples_generator import make_blobs import matplotlib.pyplot as plt from GMM import GMM mix=GMM(K=6) X,Y = make_blobs(cluster_std=0.5,random_state=20,n_samples=100,centers=6) plt.scatter(X[:,0],X[:,1]) print(X.shape) mix.fit(X) mix.Means() Y=mix.predict(X) plt.scatter(X[:,0],X[:,1],c=Y)
# 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图 X, Y = make_moons(n_samples=1000, noise=0.04, random_state=0) # 划分数据,一部分用于训练聚类,一部分用于分类 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = GMM(X_train, K=10) # 获取各个类别的概率 result = model.fit() print('每条数据属于各个类别的概率如下: ', result) # 获取每条数据所在的类别 label_train = np.argmax(result, axis=1) print(label_train) # 获取测试数据所在的类别的概率 result_test = model.predict(X_test) # 获取测试数据的类别 label_test = np.argmax(result_test, axis=1) # 展示原始数据分布及其label ax1 = plt.subplot(211) ax1.scatter(X[:, 0], X[:, 1], s=50, c=Y, marker='x', cmap='viridis', label="Original") ax1.set_title('Original Data and label Distribution') # 将聚类后的训练数据和其相应的label拼接起来展示
""" import DatasetUtil as DS from HTMLTable import HTMLTable import re from GMM import GMM if __name__ == "__main__": print("\t============ Chap9 EM for GMM ============") ds = DS.DATAUtil() x_train, y_train = ds.load(True, r".\dataset.dat") model = GMM() model.train(x_train) y_pred = model.predict(x_train) y_train = ds.y_int2str(y_train) table = HTMLTable(caption='Iris Data Cluster') table.append_header_rows(( ('No.', 'A1', 'A2', 'A3', 'A4', 'Classification', ''), ('', '', '', '', '', 'Label-C', 'Predict-C'), )) table[0][0].attr.rowspan = 2 table[0][1].attr.rowspan = 2 table[0][2].attr.rowspan = 2 table[0][3].attr.rowspan = 2 table[0][4].attr.rowspan = 2 table[0][5].attr.colspan = 2 for i in range(x_train.shape[0]):