Exemple #1
0
# casestudy_iris_pca.py

import data_iris
import hierarchical
import matplotlib.pyplot as plt
import metrics
import numpy as np
import pca
import plot_data

# (1) load data
iris = data_iris.iris()
X,class_label = iris.load()
# perform pca and reduce dimension to 2
model_pca = pca.pca()
model_pca.fit(X)
R = model_pca.data_reduced_dimension(reduced_dim=2)
plot_data.plot_scatter_class(R,class_label,"Iris Data Projected to 2 Dimensions using PCA","u0","u1")
# (2) create model
model = hierarchical.hierarchical()
# (3) fit model
model.fit(R)
print("Time fit: {}".format(model.time_fit))
# (4) results
level = -3
print("Purity: {}".format(metrics.purity(model.clustersave[level],class_label)))
print("Davies-Bouldin: {}".format(metrics.davies_bouldin(R,model.clustersave[level])))
print("Silhouette: {}".format(metrics.silhouette(R,model.clustersave[level])))
model.plot_cluster(nlevel=level,title="Hierarchical Clustering for Iris Dataset reduced to 2d",xlabel="u0",ylabel="u1")
metrics.plot_cluster_distribution(model.clustersave[level],class_label)
plt.show()
Exemple #2
0
    print("Time pca: {}".format(time.time() - time_pca_start))
# (2) clustering model
np.random.seed(31)
initialization = "kmeans++"
ncluster = 10
model = gaussianmm_spherical.gaussianmm(ncluster, initialization)
# (3) fit model
max_iter = 100
tolerance = 1e-4
model.fit(R, max_iter, tolerance)
print("Time fit: {}".format(model.time_fit))
# (4) results
level = -1
print("Purity: {}".format(metrics.purity(model.clustersave[level],
                                         class_label)))
print("Davies-Bouldin: {}".format(
    metrics.davies_bouldin(X, model.clustersave[level])))
# plot images from clusters 1,4,5,6
seed = 31
mnist.plot_image(X[:, model.get_index(level, 1)], seed)
mnist.plot_image(X[:, model.get_index(level, 4)], seed)
mnist.plot_image(X[:, model.get_index(level, 5)], seed)
mnist.plot_image(X[:, model.get_index(level, 6)], seed)
model.plot_objective(title="Gaussian MM Clustering",
                     xlabel="Iteration",
                     ylabel="Objective")
metrics.plot_cluster_distribution(model.clustersave[level],
                                  class_label,
                                  figsize=(8, 4),
                                  figrow=2)
plt.show()