def build_codebook(self, k): print('Building a GMM of {} components as a codebook'.format(k)) return gaussian_mixture.GaussianMixture(n_components=k, verbose=False, covariance_type='diag', tol=1e-3, reg_covar=1e-6, max_iter=100)
def em(data): # Initialize the clusterer to make 3 clusters # seed of 10 for reproducibility. clusterer = gaussian_mixture.GaussianMixture(n_components=3, covariance_type="spherical") train, test = seperateData(data) clusterer.fit(train) #train cluster_labels = clusterer.predict(test) #test cluster_labels_list = cluster_labels.tolist() #print(cluster_labels_list) return cluster_labels_list
def cluster_image(mic, n_comps): hsv = matplotlib.colors.rgb_to_hsv(mic.data.cpu().permute(1, 2, 0).numpy()) rgb = mic.data.cpu().permute(1, 2, 0).numpy() comb_data = np.concatenate([rgb, hsv], -1) newdata = comb_data.reshape(mic.data.shape[1] * mic.data.shape[2], 6) model = gmm.GaussianMixture(n_components=n_comps, covariance_type="full") model = model.fit(newdata) cluster = model.predict(newdata) cluster = cluster.reshape(mic.data.shape[1], mic.data.shape[2]) return cluster
def train_model(user_name): warnings.filterwarnings(action="ignore", category=DeprecationWarning) # DATASET_PATH = "DATASET" #fhand=open("user_name.txt",mode='r+') features = np.asarray(()) dest = "user_models/" a = user_name for dirpath, dirnames, filenames in os.walk(dest): # print(filenames) temp_name = a + ".gmm" if temp_name in filenames: # print("already exist") sys.exit() count = 1 DATASET_PATH = "DATASET/" + a for dirpath, dirnames, filenames in os.walk(DATASET_PATH): for f in filenames: file_path = os.path.join(dirpath, f) #read audio file sr, audio = read(file_path) vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) if count == 5: gmm = gaussian_mixture.GaussianMixture(n_components=16, max_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = a + ".gmm" # print("model name ",picklefile) file = open(dest + picklefile, mode="wb") pickle.dump(gmm, file) # print ('+ modeling completed for speaker:',picklefile," with data point = ",features.shape) features = np.asarray(()) count = 0 count = count + 1
# print("gathering ROOT SIFT descriptors...") # descriptors=fun.compute_save_reduce_vector(paths,id,pc_comp=pc_comp,reduced=True).T descriptors = np.atleast_2d( np.asarray( fun.file_counter(paths, ".npy", "reduced_data", remove=False, loader=True))) # print("descriptors gathered") print("training GMM %d..." % (gmm_comp)) #GMM MODEL GMM = gauss.GaussianMixture(n_components=gmm_comp, covariance_type=covariance_type, max_iter=100000, n_init=1, init_params="kmeans") GMM.fit(descriptors) # print(np.sum(GMM.predict_proba(descriptors[0:20]),axis=1)) print("trained GMM %d..." % (gmm_comp)) print("saving the GMM model") means = GMM.means_ covs = GMM.covariances_ weights = GMM.weights_ gmm_means_file = "./GMM/means" + str(gmm_comp) + ".gmm.npy" gmm_covariance_file = "./GMM/covs" + str(gmm_comp) + ".gmm.npy" gmm_weight_file = "./GMM/weights" + str(gmm_comp) + ".gmm.npy" np.save(gmm_means_file, means)
def GMM(x, i): gmm = gaussian_mixture.GaussianMixture(n_components=i).fit(x) labels = gmm.predict(x) plt.scatter(x[:, 0], x[:, 1], c=labels, cmap='viridis') plt.show()
max_iter=1000, n_init=10, random_state=0, verbose=False, tol=1e-4) # K-Means Algorithm kmeans.fit(X) y_kmeans = kmeans.predict(X) # Prediction using K-Means #print(kmeans.labels_) a = silhouette_score(X, kmeans.labels_) sil.append(a) wcss.append( kmeans.inertia_ ) #appending the sum of squared distances to the cluster center gmm = GM.GaussianMixture(n_components=j, init_params='kmeans', max_iter=1000, covariance_type='full', tol=1e-04, random_state=0) # Gaussian Mixture Modelling gmm.fit(X) bics.append(gmm.bic(X)) # updating the BIC list index_gmm = np.argmin(bics) # index of minimum BIC penalty index_sil = np.argmax(sil) #Plotting cost vs number of clusters plt.figure("K-Means Clustering Analysis using Elbow Method") plt.plot(K, wcss, 'go--') plt.title('The Elbow Method') plt.xlabel('Number of clusters') plt.ylabel('inertia') plt.show() #plt.show()
import matplotlib.pyplot as plt from sklearn.datasets import samples_generator from sklearn import metrics, cluster from sklearn.mixture import gaussian_mixture # x,y = samples_generator.make_blobs(n_samples=200,n_features=3,cluster_std=0.6,random_state=0) x, y = samples_generator.make_circles(n_samples=200, noise=.05, random_state=0, factor=0.4) # x,y = samples_generator.make_moons(n_samples=200,noise=.05,random_state=0) # print(x.shape,y.shape) # clu = cluster.KMeans(2) # clu = cluster.MeanShift() # clu = cluster.DBSCAN(eps=0.98,min_samples=4) # clu = cluster.SpectralClustering(2,affinity="nearest_neighbors") # clu = cluster.AffinityPropagation() clu = gaussian_mixture.GaussianMixture(n_components=2) labels = clu.fit_predict(x) print(metrics.silhouette_score(x, labels)) print(metrics.calinski_harabasz_score(x, labels)) print(metrics.davies_bouldin_score(x, labels)) plt.scatter(x[:, 0], x[:, 1], c=labels) plt.show()
data1 = np.random.multivariate_normal(miu1, cov1, size=n) data2 = np.random.multivariate_normal(miu2, cov2, size=n) data3 = np.random.multivariate_normal(miu3, cov3, size=n) data = np.concatenate((data1, data2, data3)) np.random.shuffle(data) print(data.shape) plt.xlim(-40, 50) plt.ylim(-40, 50) start_time = time.time() gmm = gaussian_mixture.GaussianMixture(n_components=3) gmm.fit(data) data_labels = gmm.predict(data) test_labels = gmm.predict(test_data) end_time = time.time() print((end_time - start_time)) f = plt.figure(1) plt.scatter(data[:, 0], data[:, 1], c=data_labels, s=20, cmap='viridis'); # plt.plot(data[:, 0], data[:, 1], marker="o", linestyle="") # plt.show()
def showSilhouette(data, cluster_labels, attributes): X = [] for instance in data: X.append([instance[attributes[0]], instance[attributes[1]]]) X = np.array(X) #print(X) # Create a subplot with 1 row and 2 columns fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(13, 5.75) # The 1st subplot is the silhouette plot # The silhouette coefficient can range from -1, 1 but in this example all # lie within [-0.1, 1] ax1.set_xlim([-0.1, 1]) n_clusters = 3 # The (n_clusters+1)*10 is for inserting blank space between silhouette # plots of individual clusters, to demarcate them clearly. ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10]) # The silhouette_score gives the average value for all the samples. # This gives a perspective into the density and separation of the formed # clusters clusterer_s = gaussian_mixture.GaussianMixture(n_components=3, covariance_type="spherical") clusterer_s.fit(X) cluster_labels_s = clusterer_s.predict(X) colors_scatter = [] colorT = "" for value in cluster_labels_s: if (value == 0): colorT = "red" if (value == 1): colorT = "blue" if (value == 2): colorT = "green" colors_scatter.append(colorT) #print(cluster_labels_s) silhouette_avg = silhouette_score(X, cluster_labels_s) print("For 3 clusters the average silhouette_score is:", round(silhouette_avg, 5)) # Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(X, cluster_labels_s) y_lower = 10 for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = \ sample_silhouette_values[cluster_labels_s == i] ith_cluster_silhouette_values.sort() #print(ith_cluster_silhouette_values) size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i if (i == 0): color = "red" if (i == 1): color = "blue" if (i == 2): color = "green" #print(y_lower) #print(y_upper) #print(np.arange(y_lower, y_upper)) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=1) # Label the silhouette plots with their cluster numbers at the middle ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples ax1.set_title("The silhouette plot for the various clusters.") ax1.set_xlabel("The silhouette coefficient values") ax1.set_ylabel("Cluster label") # The vertical line for average silhouette score of all the values ax1.axvline(x=silhouette_avg, color="red", linestyle="--") ax1.set_yticks([]) # Clear the yaxis labels / ticks ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) # 2nd Plot showing the actual clusters formed colors = ["red", "blue", "green"] #print(type(X)) #print(colors) ax2.scatter(X[:, 0], X[:, 1], marker='.', s=50, lw=0, alpha=1, c=colors_scatter, edgecolor='k') # Labeling the clusters centers = clusterer_s.means_ # Draw white circles at cluster centers ax2.scatter(centers[:, 0], centers[:, 1], marker='o', c="white", alpha=1, s=200, edgecolor='k') for i, c in enumerate(centers): ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=70, edgecolor='k') #print(clusterer_s.means_) ax2.set_title("The visualization of the clustered data.") ax2.set_xlabel("Feature space for the 1st feature") ax2.set_ylabel("Feature space for the 2nd feature") plt.suptitle(("Silhouette analysis for EM clustering on seed data " "with n_clusters = %d" % n_clusters), fontsize=14, fontweight='bold') plt.show()