def gmmClustering(X, k = 2, maxiter = 3): my_io.startLog(__name__) logger = logging.getLogger(__name__) X, r = kmeans.kmeansClustering(X, 2, 1) (N, D) = np.shape(X) pi_k_old = [np.divide(len(np.where(r==kth)[0]), float(N)) for kth in range(k) ] # mu_old = compute_muk(X, k, r) cova_old, mu_old = compute_cova(k, X, r) for i in range(maxiter): #if i==1: # pdb.set_trace() logger.info('ite: %d loss: %f',i, loss(X, mu_old, pi_k_old, cova_old) ) p = gmm_Esteps(X, pi_k_old, k, cova_old, mu_old) mu_new, cova_new, pi_k_new = gmm_Msteps(mu_old, X, p) pi_k_old = pi_k_new mu_old = mu_new cova_old = cova_new #matplotlib.cm=get_cmap("jet") cm = plt.get_cmap('jet') ax = plt.gca() # colors = ['r' if i==0 else 'g' for i in ?] #colors = 'r' #ax.scatter(X[:,0], X[:,1], c=colors,alpha=0.8) # plt.show() for j in range(N): likehood = p[1][j] color = cm(likehood) plt.plot(X[j,0], X[j,1] ,"o", color=color) plt.show() for j in range(N): likehood = p[0][j] color = cm(likehood) plt.plot(X[j,0], X[j,1] ,"o", color=color) plt.show()
from __future__ import absolute_import import argparse import numpy as np import kmeans as km if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--verbosity", help="increase output verbosity") args = parser.parse_args() # data = load_data("../data/data.txt") data = np.load("data/clusterable_data.npy") clusters, centroids = km.kmeansClustering(data, k=6, iter=20, verbose=args.verbosity) plot_data(data) plot_clusters(clusters, centroids)
def pre_kmeans(X,k): # X can be unnorlized, which will be done in kmeans (normalizeX, r) = kmeans.kmeansClustering(X,k) return normalizeX, r