コード例 #1
0
    def fit(self, X, K, eps=pow(10, -2)):
        # fits the parameters of the HMM using EM algorithm
        # X is the sequence of observations (array of size (T,D)),
        # K is the number of hidden states
        # eps : tolerance on log likelihood difference between two iterations for convergence of EM algorithm

        self.K = K
        T, D = X.shape

        # initialization of means and covariances with GMM
        print(
            "Initialization of Gaussians parameters (means and covariances) with GMM : "
        )
        gmm_model = GMM(isotropic=False)
        gmm_model.fit(X, K, eps=eps)
        self.mus = gmm_model.mus
        self.Sigmas2 = gmm_model.Sigmas2

        print("\nFit of HMM : ")
        # initialization of pis and A at random
        self.pis = np.random.rand(self.K)
        self.pis /= np.sum(self.pis)
        self.A = np.random.rand(self.K, self.K)
        self.A /= np.sum(self.A, axis=1)[:, None]

        lik = self.compute_log_likelihood(X)
        print("Initial log-likelihood : ", lik)

        delta_lik = 1
        cpt_iter = 1

        while (delta_lik > eps):

            # Expectation step
            pi = self.compute_proba_Zt_cond_X(
                X)  # array (T,K) (t,i) -> p(z_t = i|X; θ)
            pij = self.compute_proba_Zt_and_Znext_cond_X(
                X)  # tensor (T-1,K,K) (t,i,j) -> p(z_(t+1) = j, z t = i|X; θ)

            # Maximization step

            self.pis = pi[0, :]
            pi_repeated = pi[:, :, np.newaxis]  # (T,K,D)
            self.mus = np.sum(pi_repeated * X[:, np.newaxis, :],
                              axis=0) / np.sum(pi_repeated, axis=0)

            self.Sigmas2 = []
            for k in range(self.K):
                Xc = X - self.mus[k]
                Sigmas2k = 0
                for t in range(T):
                    xt = Xc[t, :][:, None]  # size (d,1)
                    Sigmas2k += np.dot(xt, xt.T) * pi[t, k]
                Sigmas2k /= np.sum(pi[:, k])
                self.Sigmas2.append(Sigmas2k)
            self.Sigmas2 = np.array(self.Sigmas2)

            self.A = np.sum(pij, axis=0) / np.sum(pi[:-1], axis=0)[:, None]

            # Computing new likelihood, and deciding if we should stop
            old_lik = lik  # storing old_likelihood to compute delta_lik
            lik = self.compute_log_likelihood(X)  # storing new likelihood
            delta_lik = lik - old_lik  # measure to decide if we should stop or iterate again
            print("Iter " + str(cpt_iter) + " ; log_likelihood : " + str(lik))
            cpt_iter += 1

        print("EM algorithm converged.")

        print("initial distribution found (rounded, 2 decimals) : ",
              np.round(self.pis, 2))
        print("transition matrix found (rounded, 2 decimals) : ",
              np.round(self.A, 2))
コード例 #2
0
from GMM import GMM
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from util import *

# 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图
X, Y = make_moons(n_samples=1000, noise=0.04, random_state=0)
# 划分数据,一部分用于训练聚类,一部分用于分类
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

model = GMM(X_train, K=10)
# 获取各个类别的概率
result = model.fit()
print('每条数据属于各个类别的概率如下: ', result)

# 获取每条数据所在的类别
label_train = np.argmax(result, axis=1)
print(label_train)

# 获取测试数据所在的类别的概率
result_test = model.predict(X_test)
# 获取测试数据的类别
label_test = np.argmax(result_test, axis=1)

# 展示原始数据分布及其label
ax1 = plt.subplot(211)
ax1.scatter(X[:, 0],
            X[:, 1],
コード例 #3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 21 02:43:24 2019

@author: maachou
"""

from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from GMM import GMM        
mix=GMM(K=6)   
X,Y = make_blobs(cluster_std=0.5,random_state=20,n_samples=100,centers=6)
plt.scatter(X[:,0],X[:,1])
print(X.shape)
mix.fit(X)
mix.Means()
Y=mix.predict(X)
plt.scatter(X[:,0],X[:,1],c=Y)
コード例 #4
0
ファイル: testGMM.py プロジェクト: wsnpyo/ML
import numpy as np
import matplotlib.pyplot as plt

from GMM import GMM

if __name__ == '__main__':
    group_a = np.random.normal(loc=(20.00, 14.00), scale=(4.0, 4.0), size=(1000, 2))
    group_b = np.random.normal(loc=(15.00, 8.00), scale=(2.0, 2.0), size=(1000, 2))
    group_c = np.random.normal(loc=(30.00, 40.00), scale=(2.0, 2.0), size=(1000, 2))
    group_d = np.random.normal(loc=(25.00, 32.00), scale=(7.0, 7.0), size=(1000, 2))
    group_e = np.random.normal(loc=(10.00, 32.00), scale=(7.0, 7.0), size=(1000, 2))

    DATA = np.concatenate((group_a, group_b, group_c, group_d, group_e))
    S = GMM(5, DATA, 1e-3)
    S.fit()
    S.print_status()

    testdata = np.random.rand(10000, 2)*50
    labels = S.Classify(testdata)

    plt.scatter(testdata[:, 0], testdata[:, 1], c=list(map(lambda i : {0:'b',1:'g',2:'r',3:'y',4:'k'}[i], labels)))
    plt.show()

コード例 #5
0
# generate the dataset
X, Y = make_classification(n_samples=1000,
                           n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           n_clusters_per_class=2)

X = preprocessing.scale(X)

num_clusters = 3
num_epochs = 50

gmm_model = GMM()
phi, pi_dist, mean, covariance = gmm_model.fit(X,
                                               num_clusters=num_clusters,
                                               num_epochs=num_epochs)

gmm_sklearn = mixture.GaussianMixture(n_components=2)
gmm_sklearn.fit(X)
plt.figure(figsize=(8, 8))
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.9)

plt.subplot(211)
plt.title('Plot for the unclustered data', fontsize='small')
plt.scatter(X[:, 0], X[:, 1], s=25, c=None)

plt.subplot(212)
plt.title('Plot for the clustered data', fontsize='small')
plt.scatter(X[:, 0], X[:, 1], s=25, c=phi)
コード例 #6
0
ファイル: QuickBrush.py プロジェクト: rheiland/cl-util
class QuickBrush(Brush):
    lWorksize = (16, 16)

    def __init__(self, context, devices, d_img, d_labels):
        Brush.__init__(self, context, devices, d_labels)

        self.context = context
        self.queue = cl.CommandQueue(context,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

        nComponentsFg = 4
        nComponentsBg = 4
        self.nDim = 3

        self.dim = d_img.dim

        filename = os.path.join(os.path.dirname(__file__), 'quick.cl')
        program = createProgram(context, context.devices, [], filename)
        #        self.kernSampleBg = cl.Kernel(program, 'sampleBg')
        self.kern_get_samples = cl.Kernel(program, 'get_samples')

        self.lWorksize = (16, 16)
        self.gWorksize = roundUp(self.dim, self.lWorksize)

        nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * (
            self.gWorksize[1] / self.lWorksize[1])

        #		self.gmmFg_cpu = mixture.GMM(4)

        self.gmmFg = GMM(context, 65, nComponentsFg, 10240)
        self.gmmBg = GMM(context, 65, nComponentsBg, nSamples)

        self.hScore = np.empty(self.dim, np.float32)
        self.hSampleFg = np.empty((10240, ), np.uint32)
        self.hSampleBg = np.empty((12000, ), np.uint32)
        self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32)

        self.d_img = d_img

        cm = cl.mem_flags
        self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240)
        self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000)
        self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA)
        self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32)
        self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32)

        #self.points = Set()

        self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300
        self.points = np.empty((self.capPoints), np.uint32)

        #		self.colorize = Colorize.Colorize(clContext, clContext.devices)

        #        self.hTriFlat = self.hTri.reshape(-1)

        #        self.probBg(1200)

        self.h_img = np.empty(self.dim, np.uint32)
        self.h_img = self.h_img.ravel()
        cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait()

        self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000)
        self.hSampleBg = self.h_img[self.samples_bg_idx]

        cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait()

        w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True)

        print w
        print m
        print c

        self.gmmBg.score(self.d_img, self.dScoreBg)

        pass

    def draw(self, p0, p1):
        Brush.draw(self, p0, p1)
        #self.probFg(x1-20, x1+20, y1-20, y1+20)
        #return
        """color = self.colorTri[self.type]

        #self.argsScore[5] = np.int32(self.nComponentsFg)

        #seed = []
        hasSeeds = False
        redoBg = False

        minX = sys.maxint
        maxX = -sys.maxint
        minY = sys.maxint
        maxY = -sys.maxint

        for point in self.points[0:nPoints]:
            #if self.hTriFlat[point] != color:
                self.hTriFlat[point] = color
                #seed += point
                hasSeeds = True

                minX = min(minX, point%self.width)

                maxX = max(maxX, point%self.width)
                minY = min(minY, point/self.width)
                maxY = max(maxY, point/self.width)

                #if (point[1]*self.width + point[0]) in self.randIdx:

                #	redoBg = True
        #if redoBg:
        #	self.probBg(0)

        #if len(seed) == 0:
        if not hasSeeds:
            return

        minX = max(0, minX-DILATE)
        maxX = min(self.width-1, maxX + DILATE)
        minY = max(0, minY-DILATE)
        maxY = min(self.height-1, maxY + DILATE)
        """

        args = [
            np.int32(self.n_points),
            self.d_points,
            cl.Sampler(self.context, False, cl.addressing_mode.NONE,
                cl.filter_mode.NEAREST),
            self.d_img,
            self.dSampleFg
        ]

        gWorksize = roundUp((self.n_points, ), (256, ))

        self.kern_get_samples(self.queue, gWorksize, (256,), *args).wait()

        cl.enqueue_copy(self.queue, self.hSampleFg, self.dSampleFg)
#        print self.hSampleFg.view(np.uint8).reshape(10240, 4)[0:self.n_points, :]

#        print self.n_points
        self.gmmFg.fit(self.dSampleFg, self.n_points)
#        print w
#        print m
#        print c

        self.gmmFg.score(self.d_img, self.dScoreFg)

        #        self.argsSampleBg = [
        #            self.d_labels,
        #            np.int32(self.label),
        #            cl.Sampler(self.context, False, cl.addressing_mode.NONE,
        #                cl.filter_mode.NEAREST),
        #            self.d_img,
        #            self.dSampleFg
        #        ]
        #
        #        gWorksize = roundUp(self.dim, (16, 16))
        #
        #        self.kernSampleBg(self.queue, gWorksize, (16, 16),
        #            *(self.argsSampleBg)).wait()
        #        cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait()

        pass

    def probFg(self, d_samples, n_points):
    #		if True:
    #			tri = self.hTri[minY:maxY, minX:maxX]
    #			b = (tri == self.colorTri[self.type])
    #
    #			samplesFg = self.hSrc[minY:maxY, minX:maxX]
    #			samplesFg = samplesFg[b]
    #		else:
    #			DILATE = 5
    #			samplesFg = self.hSrc[minY:maxY, minX:maxX].ravel()

        #gpu = False
        #self.prob(self.gmmFG, samplesFg, self.dScoreFg, gpu)

        #self.gmmFg_cpu.fit(samplesFg)
        #print 'cpu', self.gmmFg_cpu.weights_
        #a = calcA_cpu(self.gmmFg_cpu.weights_.astype(np.float32), self.gmmFg_cpu.means_.astype(np.float32), self.gmmFg_cpu.covars_.astype(np.float32))
        #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait()

        #weights, means, covars = self.gmmFg.fit(samplesFg, retParams=True)
        #a = calcA_cpu(weights, means[:, 0:3], covars[:, 0:3])
        #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait()


        w,m,c = self.gmmFg.fit(d_samples, n_points, retParams=True)
        print w
        print m
        print c
        #print 'gpu', weights

        self.gmmFg.score(self.d_img, self.dScoreFg)

    #score returns float64, not float32 -> convert with astype
    #self.hScore = -self.gmmFG.score(self.rgb.reshape(-1, 3)).astype(np.float32)
    """
        def drawCircle(self, xc, yc, points=None):
            r = self.radius

            for y in xrange(-r, r):
                for x in xrange(-r, r):
                    if points != None:
                        points.add((xc+x, yc+y))
        """

    def probBg(self, nSamples):
        #self.kernSampleBg(self.queue, self.gWorksize, self.lWorksize, *(self.argsSampleBg)).wait()
        #cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait()

        self.bgIdx = np.where(self.hTri.ravel() != self.colorTri[self.type])[0]
        self.randIdx = self.bgIdx[np.random.randint(0, len(self.bgIdx), 2000)]
        self.bgIdx = np.setdiff1d(self.bgIdx, self.randIdx)

        self.hSampleBg[0:len(self.randIdx)] = self.hSrc.view(np.uint32).ravel()[
                                              self.randIdx]
        cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait()

        #print self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx], retParams=True)
        self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx])
        #self.gmmBg.fit(self.dSampleBg, nSamples=len(self.randIdx))
        self.gmmBg.score(self.dSrc, self.dScoreBg)
コード例 #7
0
ファイル: test_GMM.py プロジェクト: rheiland/cl-util
gmm_cpu = mixture.GMM(nComp)
gmm_cpu.dtype = np.float32
gmm_cpu.init_params = ''
gmm_cpu.means_ = means
gmm_cpu.weights_ = weights
gmm_cpu.covars_ = covars
gmm_cpu.fit(samples)

gmm = GMM(context, nIter, nComp, nSamples)

a = calcA_cpu(weights, means, covars)
cl.enqueue_copy(queue, gmm.dA, a).wait()

gmm.has_preset_wmc = True
w,m,c = gmm.fit(dSamples, nSamples, retParams=True)
print 'converged: {0}'.format(gmm.has_converged)

print gmm_cpu.weights_
print w
print
print gmm_cpu.means_
print m
print
print gmm_cpu.covars_
print c

gmm_cpu.init_params = 'wmc'
iter = 10

#to estimate wmc on cpu
コード例 #8
0
from GMM import GMM
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

X, y = make_blobs(n_samples=1000, centers=4, n_features=2)

gmm_cls = GMM(initializer='uniform', cov_type='diag')
gmm_cls.fit(X, 4)

colors = []
for l in gmm_cls.kmeans_cls_.predict(X):
    if l == 0:
        colors.append('red')
    if l == 1:
        colors.append('green')
    if l == 2:
        colors.append('orange')
    if l == 3:
        colors.append('blue')

plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1)
plt.scatter(gmm_cls.means_[:, 0], gmm_cls.means_[:, 1], c='k')
plt.show()

plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1)
plt.scatter(gmm_cls.kmeans_cls_.means_[:, 0],
            gmm_cls.kmeans_cls_.means_[:, 1],
            c='k')
plt.show()
コード例 #9
0
kmeans_obj = KMeans(3, x)
kmeans_obj.fit(3, 0.002)

means = kmeans_obj.mean_vec
cov_mat_list = kmeans_obj.CovMatrix()
mixture_coeff = kmeans_obj.MixtureCoeff()

print(cov_mat_list)

"""from sklearn.cluster import KMeans
obj = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 100, n_init = 10, random_state = 0)
y_Kmeans = obj.fit_predict(x)
print(obj.cluster_centers_[:])"""

GMM_obj = GMM(3, x, means, cov_mat_list, mixture_coeff)
GMM_obj.fit(0.0002)

print(GMM_obj.mean_vec)
print(GMM_obj.cov_mat)
print(GMM_obj.mixture_coeff)

y_pred = GMM_obj.ClusterPredict(x)
plt.scatter(GMM_obj.x_train[y_pred == 0, 0], GMM_obj.x_train[y_pred == 0, 1], s = 20, c = 'red', label = 'Cluster 1')
plt.scatter(GMM_obj.x_train[y_pred == 1, 0], GMM_obj.x_train[y_pred == 1, 1], s = 20, c = 'green', label = 'Cluster 2')
plt.scatter(GMM_obj.x_train[y_pred == 2, 0], GMM_obj.x_train[y_pred == 2, 1], s = 20, c = 'blue', label = 'Cluster 3')
plt.scatter(GMM_obj.mean_vec[:, 0], GMM_obj.mean_vec[:, 1], s = 50, c = 'yellow', label = 'Centroids')
plt.show()

plt.scatter(GMM_obj.x_train[:, 0], GMM_obj.x_train[:, 1])
plt.show()
コード例 #10
0
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn.datasets.samples_generator import make_blobs
from sklearn.model_selection import train_test_split
from util import *


# 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图
X, Y = make_blobs(n_samples=700, centers=4,cluster_std=0.5, random_state=2019)
# 划分数据,一部分用于训练聚类,一部分用于分类
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

model = GMM(X_train,K=4)
# 获取训练数据各个类别的概率
result_train = model.fit()
print('每条数据属于各个类别的概率如下: ',result_train)

# 获取训练数据所在的类别
label_train = np.argmax(result_train,axis=1)
print(label_train)

# 获取测试数据所在的类别的概率
result_test = model.predict(X_test)
# 获取测试数据的类别
label_test = np.argmax(result_test,axis=1)

# 展示原始数据分布及其label
ax1 = plt.subplot(211)
ax1.scatter(X[:,0],X[:,1],s=50,c=Y,marker='x',cmap='viridis',label="Original")
ax1.set_title('Original Data and label Distribution')