# casestudy_iris_pca.py import data_iris import hierarchical import matplotlib.pyplot as plt import metrics import numpy as np import pca import plot_data # (1) load data iris = data_iris.iris() X,class_label = iris.load() # perform pca and reduce dimension to 2 model_pca = pca.pca() model_pca.fit(X) R = model_pca.data_reduced_dimension(reduced_dim=2) plot_data.plot_scatter_class(R,class_label,"Iris Data Projected to 2 Dimensions using PCA","u0","u1") # (2) create model model = hierarchical.hierarchical() # (3) fit model model.fit(R) print("Time fit: {}".format(model.time_fit)) # (4) results level = -3 print("Purity: {}".format(metrics.purity(model.clustersave[level],class_label))) print("Davies-Bouldin: {}".format(metrics.davies_bouldin(R,model.clustersave[level]))) print("Silhouette: {}".format(metrics.silhouette(R,model.clustersave[level]))) model.plot_cluster(nlevel=level,title="Hierarchical Clustering for Iris Dataset reduced to 2d",xlabel="u0",ylabel="u1") metrics.plot_cluster_distribution(model.clustersave[level],class_label) plt.show()
print("Time pca: {}".format(time.time() - time_pca_start)) # (2) clustering model np.random.seed(31) initialization = "kmeans++" ncluster = 10 model = gaussianmm_spherical.gaussianmm(ncluster, initialization) # (3) fit model max_iter = 100 tolerance = 1e-4 model.fit(R, max_iter, tolerance) print("Time fit: {}".format(model.time_fit)) # (4) results level = -1 print("Purity: {}".format(metrics.purity(model.clustersave[level], class_label))) print("Davies-Bouldin: {}".format( metrics.davies_bouldin(X, model.clustersave[level]))) # plot images from clusters 1,4,5,6 seed = 31 mnist.plot_image(X[:, model.get_index(level, 1)], seed) mnist.plot_image(X[:, model.get_index(level, 4)], seed) mnist.plot_image(X[:, model.get_index(level, 5)], seed) mnist.plot_image(X[:, model.get_index(level, 6)], seed) model.plot_objective(title="Gaussian MM Clustering", xlabel="Iteration", ylabel="Objective") metrics.plot_cluster_distribution(model.clustersave[level], class_label, figsize=(8, 4), figrow=2) plt.show()