def compare_cluster_runtime(data, n_clusters, n_components): t0 = time() features = data[0] KMeans(n_clusters=n_clusters).fit_transform(features) t1 = time() - t0 t0 = time() mixture.GMM(n_components=n_clusters).fit(features) t2 = time() - t0 reduced_higgs_data, t3 = pca_eval.transform(data, n_components=n_components) reduced_higgs_data, t4 = rand_projections.transform(data, n_components=n_components) reduced_higgs_data, t5 = ica_eval.transform(data, n_components=n_components) reduced_higgs_data, t6 = trunk_svd.transform(data, n_components=n_components) t0 = time() KMeans(n_clusters=n_clusters).fit_transform(reduced_higgs_data) t7 = time() - t0 t0 = time() mixture.GMM(n_components=n_clusters).fit(reduced_higgs_data) t8 = time() - t0 ser = pd.Series([t1, t2, t3, t4, t5, t6, t7, t8], index=['original Kmeans clustering', 'original GMM clustering', 'PCA', 'RCA', 'ICA', 'LSA', 'reduced Kmeans clustering', 'reduced GMM clustering']) ser.name = 'time' return ser
def run_higg_dimensionality_reduction(higgs_data, n_components): pca_trns, pca_elapsed = pca_eval.transform(higgs_data, n_components=n_components) rand_proj_trns, rand_proj_elapsed = rand_projections.transform(higgs_data, n_components=n_components) ica_trns, ica_elapsed = ica_eval.transform(higgs_data, n_components=n_components) lsa_trns, lsa_elapsed = trunk_svd.transform(higgs_data, n_components=n_components) transformation_time = pd.Series([pca_elapsed, rand_proj_elapsed, ica_elapsed, lsa_elapsed], index=['PCA', 'RCA', 'ICA', 'LSA'], name='transformation_time') return {'PCA': pca_trns, 'RCA': rand_proj_trns, 'ICA': ica_trns, 'LSA': lsa_trns}, transformation_time
def run_higg_dimensionality_reduction(higgs_data, n_components): pca_trns, pca_elapsed = pca_eval.transform(higgs_data, n_components=n_components) rand_proj_trns, rand_proj_elapsed = rand_projections.transform( higgs_data, n_components=n_components) ica_trns, ica_elapsed = ica_eval.transform(higgs_data, n_components=n_components) lsa_trns, lsa_elapsed = trunk_svd.transform(higgs_data, n_components=n_components) transformation_time = pd.Series( [pca_elapsed, rand_proj_elapsed, ica_elapsed, lsa_elapsed], index=['PCA', 'RCA', 'ICA', 'LSA'], name='transformation_time') return { 'PCA': pca_trns, 'RCA': rand_proj_trns, 'ICA': ica_trns, 'LSA': lsa_trns }, transformation_time