def fit(self, X, K, eps=pow(10, -2)): # fits the parameters of the HMM using EM algorithm # X is the sequence of observations (array of size (T,D)), # K is the number of hidden states # eps : tolerance on log likelihood difference between two iterations for convergence of EM algorithm self.K = K T, D = X.shape # initialization of means and covariances with GMM print( "Initialization of Gaussians parameters (means and covariances) with GMM : " ) gmm_model = GMM(isotropic=False) gmm_model.fit(X, K, eps=eps) self.mus = gmm_model.mus self.Sigmas2 = gmm_model.Sigmas2 print("\nFit of HMM : ") # initialization of pis and A at random self.pis = np.random.rand(self.K) self.pis /= np.sum(self.pis) self.A = np.random.rand(self.K, self.K) self.A /= np.sum(self.A, axis=1)[:, None] lik = self.compute_log_likelihood(X) print("Initial log-likelihood : ", lik) delta_lik = 1 cpt_iter = 1 while (delta_lik > eps): # Expectation step pi = self.compute_proba_Zt_cond_X( X) # array (T,K) (t,i) -> p(z_t = i|X; θ) pij = self.compute_proba_Zt_and_Znext_cond_X( X) # tensor (T-1,K,K) (t,i,j) -> p(z_(t+1) = j, z t = i|X; θ) # Maximization step self.pis = pi[0, :] pi_repeated = pi[:, :, np.newaxis] # (T,K,D) self.mus = np.sum(pi_repeated * X[:, np.newaxis, :], axis=0) / np.sum(pi_repeated, axis=0) self.Sigmas2 = [] for k in range(self.K): Xc = X - self.mus[k] Sigmas2k = 0 for t in range(T): xt = Xc[t, :][:, None] # size (d,1) Sigmas2k += np.dot(xt, xt.T) * pi[t, k] Sigmas2k /= np.sum(pi[:, k]) self.Sigmas2.append(Sigmas2k) self.Sigmas2 = np.array(self.Sigmas2) self.A = np.sum(pij, axis=0) / np.sum(pi[:-1], axis=0)[:, None] # Computing new likelihood, and deciding if we should stop old_lik = lik # storing old_likelihood to compute delta_lik lik = self.compute_log_likelihood(X) # storing new likelihood delta_lik = lik - old_lik # measure to decide if we should stop or iterate again print("Iter " + str(cpt_iter) + " ; log_likelihood : " + str(lik)) cpt_iter += 1 print("EM algorithm converged.") print("initial distribution found (rounded, 2 decimals) : ", np.round(self.pis, 2)) print("transition matrix found (rounded, 2 decimals) : ", np.round(self.A, 2))
from GMM import GMM import numpy as np from sklearn.datasets import make_moons from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from util import * # 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图 X, Y = make_moons(n_samples=1000, noise=0.04, random_state=0) # 划分数据,一部分用于训练聚类,一部分用于分类 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = GMM(X_train, K=10) # 获取各个类别的概率 result = model.fit() print('每条数据属于各个类别的概率如下: ', result) # 获取每条数据所在的类别 label_train = np.argmax(result, axis=1) print(label_train) # 获取测试数据所在的类别的概率 result_test = model.predict(X_test) # 获取测试数据的类别 label_test = np.argmax(result_test, axis=1) # 展示原始数据分布及其label ax1 = plt.subplot(211) ax1.scatter(X[:, 0], X[:, 1],
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Apr 21 02:43:24 2019 @author: maachou """ from sklearn.datasets.samples_generator import make_blobs import matplotlib.pyplot as plt from GMM import GMM mix=GMM(K=6) X,Y = make_blobs(cluster_std=0.5,random_state=20,n_samples=100,centers=6) plt.scatter(X[:,0],X[:,1]) print(X.shape) mix.fit(X) mix.Means() Y=mix.predict(X) plt.scatter(X[:,0],X[:,1],c=Y)
import numpy as np import matplotlib.pyplot as plt from GMM import GMM if __name__ == '__main__': group_a = np.random.normal(loc=(20.00, 14.00), scale=(4.0, 4.0), size=(1000, 2)) group_b = np.random.normal(loc=(15.00, 8.00), scale=(2.0, 2.0), size=(1000, 2)) group_c = np.random.normal(loc=(30.00, 40.00), scale=(2.0, 2.0), size=(1000, 2)) group_d = np.random.normal(loc=(25.00, 32.00), scale=(7.0, 7.0), size=(1000, 2)) group_e = np.random.normal(loc=(10.00, 32.00), scale=(7.0, 7.0), size=(1000, 2)) DATA = np.concatenate((group_a, group_b, group_c, group_d, group_e)) S = GMM(5, DATA, 1e-3) S.fit() S.print_status() testdata = np.random.rand(10000, 2)*50 labels = S.Classify(testdata) plt.scatter(testdata[:, 0], testdata[:, 1], c=list(map(lambda i : {0:'b',1:'g',2:'r',3:'y',4:'k'}[i], labels))) plt.show()
# generate the dataset X, Y = make_classification(n_samples=1000, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=2) X = preprocessing.scale(X) num_clusters = 3 num_epochs = 50 gmm_model = GMM() phi, pi_dist, mean, covariance = gmm_model.fit(X, num_clusters=num_clusters, num_epochs=num_epochs) gmm_sklearn = mixture.GaussianMixture(n_components=2) gmm_sklearn.fit(X) plt.figure(figsize=(8, 8)) plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.9) plt.subplot(211) plt.title('Plot for the unclustered data', fontsize='small') plt.scatter(X[:, 0], X[:, 1], s=25, c=None) plt.subplot(212) plt.title('Plot for the clustered data', fontsize='small') plt.scatter(X[:, 0], X[:, 1], s=25, c=phi)
class QuickBrush(Brush): lWorksize = (16, 16) def __init__(self, context, devices, d_img, d_labels): Brush.__init__(self, context, devices, d_labels) self.context = context self.queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) nComponentsFg = 4 nComponentsBg = 4 self.nDim = 3 self.dim = d_img.dim filename = os.path.join(os.path.dirname(__file__), 'quick.cl') program = createProgram(context, context.devices, [], filename) # self.kernSampleBg = cl.Kernel(program, 'sampleBg') self.kern_get_samples = cl.Kernel(program, 'get_samples') self.lWorksize = (16, 16) self.gWorksize = roundUp(self.dim, self.lWorksize) nSamples = 4 * (self.gWorksize[0] / self.lWorksize[0]) * ( self.gWorksize[1] / self.lWorksize[1]) # self.gmmFg_cpu = mixture.GMM(4) self.gmmFg = GMM(context, 65, nComponentsFg, 10240) self.gmmBg = GMM(context, 65, nComponentsBg, nSamples) self.hScore = np.empty(self.dim, np.float32) self.hSampleFg = np.empty((10240, ), np.uint32) self.hSampleBg = np.empty((12000, ), np.uint32) self.hA = np.empty((max(nComponentsFg, nComponentsBg), 8), np.float32) self.d_img = d_img cm = cl.mem_flags self.dSampleFg = cl.Buffer(context, cm.READ_WRITE, size=4 * 10240) self.dSampleBg = cl.Buffer(context, cm.READ_WRITE, size=4 * 12000) self.dA = cl.Buffer(context, cm.READ_ONLY | cm.COPY_HOST_PTR, hostbuf=self.hA) self.dScoreFg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) self.dScoreBg = Buffer2D(context, cm.READ_WRITE, self.dim, np.float32) #self.points = Set() self.capPoints = 200 * 200 * 300 #brush radius 200, stroke length 300 self.points = np.empty((self.capPoints), np.uint32) # self.colorize = Colorize.Colorize(clContext, clContext.devices) # self.hTriFlat = self.hTri.reshape(-1) # self.probBg(1200) self.h_img = np.empty(self.dim, np.uint32) self.h_img = self.h_img.ravel() cl.enqueue_copy(self.queue, self.h_img, self.d_img, origin=(0, 0), region=self.dim).wait() self.samples_bg_idx = np.random.randint(0, self.dim[0] * self.dim[1], 12000) self.hSampleBg = self.h_img[self.samples_bg_idx] cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait() w,m,c = self.gmmBg.fit(self.dSampleBg, 300, retParams=True) print w print m print c self.gmmBg.score(self.d_img, self.dScoreBg) pass def draw(self, p0, p1): Brush.draw(self, p0, p1) #self.probFg(x1-20, x1+20, y1-20, y1+20) #return """color = self.colorTri[self.type] #self.argsScore[5] = np.int32(self.nComponentsFg) #seed = [] hasSeeds = False redoBg = False minX = sys.maxint maxX = -sys.maxint minY = sys.maxint maxY = -sys.maxint for point in self.points[0:nPoints]: #if self.hTriFlat[point] != color: self.hTriFlat[point] = color #seed += point hasSeeds = True minX = min(minX, point%self.width) maxX = max(maxX, point%self.width) minY = min(minY, point/self.width) maxY = max(maxY, point/self.width) #if (point[1]*self.width + point[0]) in self.randIdx: # redoBg = True #if redoBg: # self.probBg(0) #if len(seed) == 0: if not hasSeeds: return minX = max(0, minX-DILATE) maxX = min(self.width-1, maxX + DILATE) minY = max(0, minY-DILATE) maxY = min(self.height-1, maxY + DILATE) """ args = [ np.int32(self.n_points), self.d_points, cl.Sampler(self.context, False, cl.addressing_mode.NONE, cl.filter_mode.NEAREST), self.d_img, self.dSampleFg ] gWorksize = roundUp((self.n_points, ), (256, )) self.kern_get_samples(self.queue, gWorksize, (256,), *args).wait() cl.enqueue_copy(self.queue, self.hSampleFg, self.dSampleFg) # print self.hSampleFg.view(np.uint8).reshape(10240, 4)[0:self.n_points, :] # print self.n_points self.gmmFg.fit(self.dSampleFg, self.n_points) # print w # print m # print c self.gmmFg.score(self.d_img, self.dScoreFg) # self.argsSampleBg = [ # self.d_labels, # np.int32(self.label), # cl.Sampler(self.context, False, cl.addressing_mode.NONE, # cl.filter_mode.NEAREST), # self.d_img, # self.dSampleFg # ] # # gWorksize = roundUp(self.dim, (16, 16)) # # self.kernSampleBg(self.queue, gWorksize, (16, 16), # *(self.argsSampleBg)).wait() # cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait() pass def probFg(self, d_samples, n_points): # if True: # tri = self.hTri[minY:maxY, minX:maxX] # b = (tri == self.colorTri[self.type]) # # samplesFg = self.hSrc[minY:maxY, minX:maxX] # samplesFg = samplesFg[b] # else: # DILATE = 5 # samplesFg = self.hSrc[minY:maxY, minX:maxX].ravel() #gpu = False #self.prob(self.gmmFG, samplesFg, self.dScoreFg, gpu) #self.gmmFg_cpu.fit(samplesFg) #print 'cpu', self.gmmFg_cpu.weights_ #a = calcA_cpu(self.gmmFg_cpu.weights_.astype(np.float32), self.gmmFg_cpu.means_.astype(np.float32), self.gmmFg_cpu.covars_.astype(np.float32)) #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait() #weights, means, covars = self.gmmFg.fit(samplesFg, retParams=True) #a = calcA_cpu(weights, means[:, 0:3], covars[:, 0:3]) #cl.enqueue_copy(self.queue, self.gmmFg.dA, a).wait() w,m,c = self.gmmFg.fit(d_samples, n_points, retParams=True) print w print m print c #print 'gpu', weights self.gmmFg.score(self.d_img, self.dScoreFg) #score returns float64, not float32 -> convert with astype #self.hScore = -self.gmmFG.score(self.rgb.reshape(-1, 3)).astype(np.float32) """ def drawCircle(self, xc, yc, points=None): r = self.radius for y in xrange(-r, r): for x in xrange(-r, r): if points != None: points.add((xc+x, yc+y)) """ def probBg(self, nSamples): #self.kernSampleBg(self.queue, self.gWorksize, self.lWorksize, *(self.argsSampleBg)).wait() #cl.enqueue_copy(self.queue, self.hSampleBg, self.dSampleBg).wait() self.bgIdx = np.where(self.hTri.ravel() != self.colorTri[self.type])[0] self.randIdx = self.bgIdx[np.random.randint(0, len(self.bgIdx), 2000)] self.bgIdx = np.setdiff1d(self.bgIdx, self.randIdx) self.hSampleBg[0:len(self.randIdx)] = self.hSrc.view(np.uint32).ravel()[ self.randIdx] cl.enqueue_copy(self.queue, self.dSampleBg, self.hSampleBg).wait() #print self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx], retParams=True) self.gmmBg.fit(self.hSrc.view(np.uint32).ravel()[self.randIdx]) #self.gmmBg.fit(self.dSampleBg, nSamples=len(self.randIdx)) self.gmmBg.score(self.dSrc, self.dScoreBg)
gmm_cpu = mixture.GMM(nComp) gmm_cpu.dtype = np.float32 gmm_cpu.init_params = '' gmm_cpu.means_ = means gmm_cpu.weights_ = weights gmm_cpu.covars_ = covars gmm_cpu.fit(samples) gmm = GMM(context, nIter, nComp, nSamples) a = calcA_cpu(weights, means, covars) cl.enqueue_copy(queue, gmm.dA, a).wait() gmm.has_preset_wmc = True w,m,c = gmm.fit(dSamples, nSamples, retParams=True) print 'converged: {0}'.format(gmm.has_converged) print gmm_cpu.weights_ print w print print gmm_cpu.means_ print m print print gmm_cpu.covars_ print c gmm_cpu.init_params = 'wmc' iter = 10 #to estimate wmc on cpu
from GMM import GMM from sklearn.datasets import make_blobs import matplotlib.pyplot as plt X, y = make_blobs(n_samples=1000, centers=4, n_features=2) gmm_cls = GMM(initializer='uniform', cov_type='diag') gmm_cls.fit(X, 4) colors = [] for l in gmm_cls.kmeans_cls_.predict(X): if l == 0: colors.append('red') if l == 1: colors.append('green') if l == 2: colors.append('orange') if l == 3: colors.append('blue') plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1) plt.scatter(gmm_cls.means_[:, 0], gmm_cls.means_[:, 1], c='k') plt.show() plt.scatter(X[:, 0], X[:, 1], c=colors, alpha=0.1) plt.scatter(gmm_cls.kmeans_cls_.means_[:, 0], gmm_cls.kmeans_cls_.means_[:, 1], c='k') plt.show()
kmeans_obj = KMeans(3, x) kmeans_obj.fit(3, 0.002) means = kmeans_obj.mean_vec cov_mat_list = kmeans_obj.CovMatrix() mixture_coeff = kmeans_obj.MixtureCoeff() print(cov_mat_list) """from sklearn.cluster import KMeans obj = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 100, n_init = 10, random_state = 0) y_Kmeans = obj.fit_predict(x) print(obj.cluster_centers_[:])""" GMM_obj = GMM(3, x, means, cov_mat_list, mixture_coeff) GMM_obj.fit(0.0002) print(GMM_obj.mean_vec) print(GMM_obj.cov_mat) print(GMM_obj.mixture_coeff) y_pred = GMM_obj.ClusterPredict(x) plt.scatter(GMM_obj.x_train[y_pred == 0, 0], GMM_obj.x_train[y_pred == 0, 1], s = 20, c = 'red', label = 'Cluster 1') plt.scatter(GMM_obj.x_train[y_pred == 1, 0], GMM_obj.x_train[y_pred == 1, 1], s = 20, c = 'green', label = 'Cluster 2') plt.scatter(GMM_obj.x_train[y_pred == 2, 0], GMM_obj.x_train[y_pred == 2, 1], s = 20, c = 'blue', label = 'Cluster 3') plt.scatter(GMM_obj.mean_vec[:, 0], GMM_obj.mean_vec[:, 1], s = 50, c = 'yellow', label = 'Centroids') plt.show() plt.scatter(GMM_obj.x_train[:, 0], GMM_obj.x_train[:, 1]) plt.show()
import numpy as np import matplotlib.pyplot as plt import seaborn as sns; sns.set() from sklearn.datasets.samples_generator import make_blobs from sklearn.model_selection import train_test_split from util import * # 构造聚类数据,X是特征数据,Y是相应的label,此时生成的是半环形图 X, Y = make_blobs(n_samples=700, centers=4,cluster_std=0.5, random_state=2019) # 划分数据,一部分用于训练聚类,一部分用于分类 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = GMM(X_train,K=4) # 获取训练数据各个类别的概率 result_train = model.fit() print('每条数据属于各个类别的概率如下: ',result_train) # 获取训练数据所在的类别 label_train = np.argmax(result_train,axis=1) print(label_train) # 获取测试数据所在的类别的概率 result_test = model.predict(X_test) # 获取测试数据的类别 label_test = np.argmax(result_test,axis=1) # 展示原始数据分布及其label ax1 = plt.subplot(211) ax1.scatter(X[:,0],X[:,1],s=50,c=Y,marker='x',cmap='viridis',label="Original") ax1.set_title('Original Data and label Distribution')