예제 #1
0
def main():
    trainD, devD, testD = init()
    allD = Data(trainD, devD)
    if sys.argv[1] == "display":
        display(allD)
        exit(0)
    if sys.argv[1] == "train":
        # local settings
        x = trainD.nx()
        y = trainD.ny()
        xx = devD.nx()
    else:
        # submit settings
        x = allD.nx()
        y = allD.ny()
        xx = testD.nx()
    gmm1 = GMM(x[y == 1], round=500, K=4)
    gmm2 = GMM(x[y == 2], round=500, K=4)
    print("GMM1.dist: ", gmm1.pi)
    print("GMM2.dist: ", gmm2.pi)
    r1 = gmm1.predict(xx) * np.sum(y == 2)
    r2 = gmm2.predict(xx) * np.sum(y == 1)
    result = 1 + (r1 < r2) * 1
    if sys.argv[1] == "train":
        # local settings
        print("accuracy: ", sum(result == devD.ny()) / devD.ny().shape[0])
    else:
        # submit settings
        testD.y = list(result)
        testD.output()
예제 #2
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 21 02:43:24 2019

@author: maachou
"""

from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from GMM import GMM        
mix=GMM(K=6)   
X,Y = make_blobs(cluster_std=0.5,random_state=20,n_samples=100,centers=6)
plt.scatter(X[:,0],X[:,1])
print(X.shape)
mix.fit(X)
mix.Means()
Y=mix.predict(X)
plt.scatter(X[:,0],X[:,1],c=Y)
예제 #3
0
# 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图
X, Y = make_moons(n_samples=1000, noise=0.04, random_state=0)
# 划分数据,一部分用于训练聚类,一部分用于分类
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

model = GMM(X_train, K=10)
# 获取各个类别的概率
result = model.fit()
print('每条数据属于各个类别的概率如下: ', result)

# 获取每条数据所在的类别
label_train = np.argmax(result, axis=1)
print(label_train)

# 获取测试数据所在的类别的概率
result_test = model.predict(X_test)
# 获取测试数据的类别
label_test = np.argmax(result_test, axis=1)

# 展示原始数据分布及其label
ax1 = plt.subplot(211)
ax1.scatter(X[:, 0],
            X[:, 1],
            s=50,
            c=Y,
            marker='x',
            cmap='viridis',
            label="Original")
ax1.set_title('Original Data and label Distribution')

# 将聚类后的训练数据和其相应的label拼接起来展示
"""
import DatasetUtil as DS
from HTMLTable import HTMLTable
import re
from GMM import GMM

if __name__ == "__main__":
    print("\t============ Chap9 EM for GMM ============")

    ds = DS.DATAUtil()
    x_train, y_train = ds.load(True, r".\dataset.dat")
    model = GMM()
    model.train(x_train)

    y_pred = model.predict(x_train)
    y_train = ds.y_int2str(y_train)

    table = HTMLTable(caption='Iris Data Cluster')
    table.append_header_rows((
        ('No.', 'A1', 'A2', 'A3', 'A4', 'Classification', ''),
        ('', '', '', '', '', 'Label-C', 'Predict-C'),
    ))
    table[0][0].attr.rowspan = 2
    table[0][1].attr.rowspan = 2
    table[0][2].attr.rowspan = 2
    table[0][3].attr.rowspan = 2
    table[0][4].attr.rowspan = 2
    table[0][5].attr.colspan = 2

    for i in range(x_train.shape[0]):