def initialize_gmm_from_kmeans(self, samples): from machine_learning import KMeans kmeans = KMeans(n_clusters = len(self.S), init = 'Katsavounidis', verbosity = 1) print("HMM.initializegmm_from_kmeans() begins the fit", flush = True) kmeans.fit(numpy.vstack(samples)) print("HMM.initializegmm_from_kmeans() ends the fit", flush = True) for k in range(len(self.S)): self.S[k].gmm.initialize_from_centroids(kmeans.cluster_centers_[k])
def __init__(self, X): self.num_clusters = 17 self.X_ = X self.data_ = numpy.zeros((2, X.shape[0] + self.num_clusters)) self.data_[0, :len(X)] = X[:, 0] self.data_[1, :len(X)] = X[:, 1] self.kmeans = KMeans(self.num_clusters, init='random', max_iter=1) #self.kmeans = KMeans( self.num_clusters, init='Katsavounidis', max_iter=1 ) self.Y_ = numpy.ones(len(X), dtype='int') self.fig_, self.ax_ = pyplot.subplots() self.ani_ = animation.FuncAnimation(self.fig_, self.update_figure, self.generate_data, init_func=self.setup_plot, interval=1000, blit=True, repeat=False) self.changes_ = 0
class AnimatedClustering(object): def __init__(self, X, clust): self.num_clusters = clust self.X_ = X self.data_ = numpy.zeros((2, X.shape[0] + self.num_clusters)) self.data_[0, :len(X)] = X[:, 0] self.data_[1, :len(X)] = X[:, 1] #self.kmeans = KMeans( self.num_clusters, init='random', max_iter=1 ) self.kmeans = KMeans(self.num_clusters, init='Katsavounidis', max_iter=1) self.Y_ = numpy.ones(len(X), dtype='int') self.fig_, self.ax_ = pyplot.subplots() self.ani_ = animation.FuncAnimation(self.fig_, self.update_figure, self.generate_data, init_func=self.setup_plot, interval=1000, blit=True, repeat=False) self.changes_ = 0 def setup_plot(self): self.colour_ = numpy.ones(len(self.X_) + self.num_clusters) * 3 self.sizes_ = numpy.ones(self.X_.shape[0] + self.num_clusters) * 30 self.colour_[len(self.X_):] = self.num_clusters + 1 self.sizes_[len(self.X_):] = 100 self.scat_ = self.ax_.scatter(self.data_[0, :], self.data_[1, :], c=self.colour_, s=self.sizes_, marker='o', edgecolors='none', animated=False) return self.scat_, def generate_data(self): self.changes_ = len(self.X_) self.kmeans.fit(self.X_) #self.kmeans.lloyd( self.X_, num_iter=1 ) self.colour_[:len(self.X_)] = self.kmeans.predict(self.X_) self.data_[0, len(self.X_):] = self.kmeans.cluster_centers_[:, 0] self.data_[1, len(self.X_):] = self.kmeans.cluster_centers_[:, 1] yield self.data_, self.colour_, self.sizes_ while self.changes_ > 0: self.changes_ = self.kmeans.fit_iteration(self.X_) self.Y_[:] = self.kmeans.predict(self.X_) self.colour_[:len(self.Y_)] = self.Y_[:] self.data_[0, len(self.X_):] = self.kmeans.cluster_centers_[:, 0] self.data_[1, len(self.X_):] = self.kmeans.cluster_centers_[:, 1] yield self.data_, self.colour_, self.sizes_ def update_figure(self, generated_data): data, colour, sizes = generated_data print("clusters = %d changes = %12d J = %20.8f %.8f" % (self.num_clusters, self.changes_, self.kmeans.J, self.kmeans.improvement())) pyplot.clf() #pyplot.set_axis_bgcolor( 'white' ) self.scat_ = self.ax_.scatter(self.data_[0, :], self.data_[1, :], c=colour, s=sizes, marker='o', edgecolors='none', animated=False) #pyplot.draw() return self.scat_, def show(self): pyplot.show()
N_2 = 5000 X_1 = numpy.random.rand(N_1, 2) X_1[:, 0] *= 10 X_1[:, 1] *= 3 X_1[:, 0] += 2 X_1[:, 1] += 2 X_2 = numpy.random.rand(N_2, 2) X_2[:, 0] *= 3 X_2[:, 1] *= 10 X_2[:, 0] += 8 X_2[:, 1] += 1 # k-means codebook generators lloyd_1 = KMeans(n_clusters=K, verbosity=1, modality='Lloyd', init='KMeans++') lloyd_1.epsilon = 1.0e-9 lloyd_2 = KMeans(n_clusters=K, verbosity=1, modality='Lloyd', init='KMeans++') lloyd_2.epsilon = 1.0e-9 # k-means codebook estimation lloyd_1.fit(X_1) lloyd_2.fit(X_2) def assignment_1(p, h): h_2 = h**2 density_1 = numpy.exp(-0.5 * ((
import numpy from sklearn.datasets import make_blobs from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score from matplotlib import pyplot from machine_learning import KMeans if __name__ == '__main__': K = 10 K2 = 7 # set a smaller value to study the behaviour of the different algorithms when using make_blobs() lloyd = KMeans(n_clusters=K, verbosity=1, modality='Lloyd', init='KMeans++') lloyd.epsilon = 1.0e-9 kmediods = KMeans(n_clusters=K, verbosity=1, modality='k-Mediods', init='KMeans++') lloyd.epsilon = 1.0e-8 original_kmeans = KMeans(n_clusters=K, verbosity=1, modality='original-k-Means') selective_splitting = KMeans(n_clusters=K, verbosity=1, modality='Lloyd', init='KMeans++') selective_splitting.epsilon = 1.0e-9