예제 #1
0
 def build_codebook(self, k):
     print('Building a GMM of {} components as a codebook'.format(k))
     return gaussian_mixture.GaussianMixture(n_components=k,
                                             verbose=False,
                                             covariance_type='diag',
                                             tol=1e-3,
                                             reg_covar=1e-6,
                                             max_iter=100)
예제 #2
0
def em(data):
    # Initialize the clusterer to make 3 clusters
    # seed of 10 for reproducibility.
    clusterer = gaussian_mixture.GaussianMixture(n_components=3,
                                                 covariance_type="spherical")
    train, test = seperateData(data)
    clusterer.fit(train)  #train
    cluster_labels = clusterer.predict(test)  #test
    cluster_labels_list = cluster_labels.tolist()
    #print(cluster_labels_list)
    return cluster_labels_list
예제 #3
0
def cluster_image(mic, n_comps):
    hsv = matplotlib.colors.rgb_to_hsv(mic.data.cpu().permute(1, 2, 0).numpy())
    rgb = mic.data.cpu().permute(1, 2, 0).numpy()
    comb_data = np.concatenate([rgb, hsv], -1)
    newdata = comb_data.reshape(mic.data.shape[1] * mic.data.shape[2], 6)
    model = gmm.GaussianMixture(n_components=n_comps, covariance_type="full")
    model = model.fit(newdata)

    cluster = model.predict(newdata)
    cluster = cluster.reshape(mic.data.shape[1], mic.data.shape[2])

    return cluster
def train_model(user_name):
    warnings.filterwarnings(action="ignore", category=DeprecationWarning)
    # DATASET_PATH = "DATASET"
    #fhand=open("user_name.txt",mode='r+')
    features = np.asarray(())
    dest = "user_models/"
    a = user_name
    for dirpath, dirnames, filenames in os.walk(dest):
        # print(filenames)
        temp_name = a + ".gmm"
        if temp_name in filenames:
            # print("already exist")
            sys.exit()

    count = 1
    DATASET_PATH = "DATASET/" + a
    for dirpath, dirnames, filenames in os.walk(DATASET_PATH):
        for f in filenames:
            file_path = os.path.join(dirpath, f)
            #read audio file
            sr, audio = read(file_path)
            vector = extract_features(audio, sr)

            if features.size == 0:
                features = vector
            else:
                features = np.vstack((features, vector))

            if count == 5:
                gmm = gaussian_mixture.GaussianMixture(n_components=16,
                                                       max_iter=200,
                                                       covariance_type='diag',
                                                       n_init=3)
                gmm.fit(features)

                # dumping the trained gaussian model
                picklefile = a + ".gmm"
                # print("model name ",picklefile)
                file = open(dest + picklefile, mode="wb")

                pickle.dump(gmm, file)

                # print ('+ modeling completed for speaker:',picklefile," with data point = ",features.shape)
                features = np.asarray(())
                count = 0

            count = count + 1
예제 #5
0
        # print("gathering ROOT SIFT descriptors...")
        # descriptors=fun.compute_save_reduce_vector(paths,id,pc_comp=pc_comp,reduced=True).T
        descriptors = np.atleast_2d(
            np.asarray(
                fun.file_counter(paths,
                                 ".npy",
                                 "reduced_data",
                                 remove=False,
                                 loader=True)))
        # print("descriptors gathered")
        print("training GMM %d..." % (gmm_comp))

        #GMM MODEL
        GMM = gauss.GaussianMixture(n_components=gmm_comp,
                                    covariance_type=covariance_type,
                                    max_iter=100000,
                                    n_init=1,
                                    init_params="kmeans")
        GMM.fit(descriptors)
        # print(np.sum(GMM.predict_proba(descriptors[0:20]),axis=1))
        print("trained GMM %d..." % (gmm_comp))
        print("saving the GMM model")
        means = GMM.means_
        covs = GMM.covariances_
        weights = GMM.weights_

        gmm_means_file = "./GMM/means" + str(gmm_comp) + ".gmm.npy"
        gmm_covariance_file = "./GMM/covs" + str(gmm_comp) + ".gmm.npy"
        gmm_weight_file = "./GMM/weights" + str(gmm_comp) + ".gmm.npy"

        np.save(gmm_means_file, means)
예제 #6
0
def GMM(x, i):
    gmm = gaussian_mixture.GaussianMixture(n_components=i).fit(x)
    labels = gmm.predict(x)
    plt.scatter(x[:, 0], x[:, 1], c=labels, cmap='viridis')
    plt.show()
예제 #7
0
                    max_iter=1000,
                    n_init=10,
                    random_state=0,
                    verbose=False,
                    tol=1e-4)  # K-Means Algorithm
    kmeans.fit(X)
    y_kmeans = kmeans.predict(X)  # Prediction using K-Means
    #print(kmeans.labels_)
    a = silhouette_score(X, kmeans.labels_)
    sil.append(a)
    wcss.append(
        kmeans.inertia_
    )  #appending the sum of squared distances to the cluster center
    gmm = GM.GaussianMixture(n_components=j,
                             init_params='kmeans',
                             max_iter=1000,
                             covariance_type='full',
                             tol=1e-04,
                             random_state=0)  # Gaussian Mixture Modelling
    gmm.fit(X)
    bics.append(gmm.bic(X))  # updating the BIC list

index_gmm = np.argmin(bics)  # index of minimum BIC penalty
index_sil = np.argmax(sil)
#Plotting cost vs number of clusters
plt.figure("K-Means Clustering Analysis using Elbow Method")
plt.plot(K, wcss, 'go--')
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('inertia')
plt.show()
#plt.show()
예제 #8
0
import matplotlib.pyplot as plt
from sklearn.datasets import samples_generator
from sklearn import metrics, cluster
from sklearn.mixture import gaussian_mixture

# x,y = samples_generator.make_blobs(n_samples=200,n_features=3,cluster_std=0.6,random_state=0)
x, y = samples_generator.make_circles(n_samples=200,
                                      noise=.05,
                                      random_state=0,
                                      factor=0.4)
# x,y = samples_generator.make_moons(n_samples=200,noise=.05,random_state=0)
# print(x.shape,y.shape)

# clu = cluster.KMeans(2)
# clu = cluster.MeanShift()
# clu = cluster.DBSCAN(eps=0.98,min_samples=4)
# clu = cluster.SpectralClustering(2,affinity="nearest_neighbors")
# clu = cluster.AffinityPropagation()
clu = gaussian_mixture.GaussianMixture(n_components=2)

labels = clu.fit_predict(x)
print(metrics.silhouette_score(x, labels))
print(metrics.calinski_harabasz_score(x, labels))
print(metrics.davies_bouldin_score(x, labels))
plt.scatter(x[:, 0], x[:, 1], c=labels)
plt.show()
예제 #9
0
data1 = np.random.multivariate_normal(miu1, cov1, size=n)
data2 = np.random.multivariate_normal(miu2, cov2, size=n)
data3 = np.random.multivariate_normal(miu3, cov3, size=n)

data = np.concatenate((data1, data2, data3))
np.random.shuffle(data)

print(data.shape)

plt.xlim(-40, 50)
plt.ylim(-40, 50)

start_time = time.time()

gmm = gaussian_mixture.GaussianMixture(n_components=3)
gmm.fit(data)

data_labels = gmm.predict(data)
test_labels = gmm.predict(test_data)

end_time = time.time()

print((end_time - start_time))

f = plt.figure(1)
plt.scatter(data[:, 0], data[:, 1], c=data_labels, s=20, cmap='viridis');


# plt.plot(data[:, 0], data[:, 1], marker="o", linestyle="")
# plt.show()
예제 #10
0
def showSilhouette(data, cluster_labels, attributes):
    X = []
    for instance in data:
        X.append([instance[attributes[0]], instance[attributes[1]]])
    X = np.array(X)
    #print(X)
    # Create a subplot with 1 row and 2 columns
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(13, 5.75)
    # The 1st subplot is the silhouette plot
    # The silhouette coefficient can range from -1, 1 but in this example all
    # lie within [-0.1, 1]
    ax1.set_xlim([-0.1, 1])
    n_clusters = 3
    # The (n_clusters+1)*10 is for inserting blank space between silhouette
    # plots of individual clusters, to demarcate them clearly.
    ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])

    # The silhouette_score gives the average value for all the samples.
    # This gives a perspective into the density and separation of the formed
    # clusters

    clusterer_s = gaussian_mixture.GaussianMixture(n_components=3,
                                                   covariance_type="spherical")
    clusterer_s.fit(X)
    cluster_labels_s = clusterer_s.predict(X)

    colors_scatter = []
    colorT = ""
    for value in cluster_labels_s:
        if (value == 0):
            colorT = "red"
        if (value == 1):
            colorT = "blue"
        if (value == 2):
            colorT = "green"
        colors_scatter.append(colorT)

    #print(cluster_labels_s)

    silhouette_avg = silhouette_score(X, cluster_labels_s)
    print("For 3 clusters the average silhouette_score is:",
          round(silhouette_avg, 5))

    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X, cluster_labels_s)

    y_lower = 10
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels_s == i]

        ith_cluster_silhouette_values.sort()

        #print(ith_cluster_silhouette_values)

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        if (i == 0):
            color = "red"
        if (i == 1):
            color = "blue"
        if (i == 2):
            color = "green"

        #print(y_lower)
        #print(y_upper)
        #print(np.arange(y_lower, y_upper))
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=1)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax1.set_title("The silhouette plot for the various clusters.")
    ax1.set_xlabel("The silhouette coefficient values")
    ax1.set_ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

    ax1.set_yticks([])  # Clear the yaxis labels / ticks
    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

    # 2nd Plot showing the actual clusters formed
    colors = ["red", "blue", "green"]
    #print(type(X))
    #print(colors)
    ax2.scatter(X[:, 0],
                X[:, 1],
                marker='.',
                s=50,
                lw=0,
                alpha=1,
                c=colors_scatter,
                edgecolor='k')

    # Labeling the clusters
    centers = clusterer_s.means_
    # Draw white circles at cluster centers
    ax2.scatter(centers[:, 0],
                centers[:, 1],
                marker='o',
                c="white",
                alpha=1,
                s=200,
                edgecolor='k')

    for i, c in enumerate(centers):
        ax2.scatter(c[0],
                    c[1],
                    marker='$%d$' % i,
                    alpha=1,
                    s=70,
                    edgecolor='k')

    #print(clusterer_s.means_)

    ax2.set_title("The visualization of the clustered data.")
    ax2.set_xlabel("Feature space for the 1st feature")
    ax2.set_ylabel("Feature space for the 2nd feature")

    plt.suptitle(("Silhouette analysis for EM clustering on seed data "
                  "with n_clusters = %d" % n_clusters),
                 fontsize=14,
                 fontweight='bold')
    plt.show()