print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('iK-Means') start = time.time() [u, centroids, ite, dist_tmp, init_centroids] = cl.ik_means(data, 3) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('WK-Means') start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.wk_means(data, 3, 1.1, replicates=10) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('MWK-Means') start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.mwk_means(data, 3, 1.1, replicates=10) print('Time elapsed: ',time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('iMWK-Means') start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.imwk_means(data, 1.1, 3) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('Ward') start = time.time() [u, centroids] = cl.ward(data, 3) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0])
def subspace_cluster(datapath, ypath, algorithm, k, sep=",", preprocess_method="standard", replicates=10, max_ite=100, beta=2, init_weights=None, init_centroids=None, init_weights_method="random", is_sparse=0, threshold=0.9, l=2, minDeviation=0.1, A=30, B=10): np.seterr(all='raise') cl = Clustering() data_ori = np.genfromtxt(datapath, delimiter=sep) y = np.genfromtxt(ypath) data = data_preprocess(data_ori, preprocess_method) if algorithm == 'K-Means': print 'using K-Means' start = time.time() [u, centroids, ite, dist_tmp] = cl.k_means(data, k, replicates) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, ite, dist_tmp], time_elapsed, acc elif algorithm == 'iK-Means': print 'using iK-Means' start = time.time() [u, centroids, ite, dist_tmp, init_centroids] = cl.ik_means(data, k) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, ite, dist_tmp, init_centroids], time_elapsed, acc elif algorithm == 'WK-Means': print 'using WK-Means' start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.wk_means(data, k, beta, init_centroids=init_centroids, init_weights=init_weights, replicates=replicates, max_ite=max_ite, init_weights_method=init_weights_method, is_sparse=is_sparse, threshold=threshold) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc elif algorithm == 'MWK-Means': print 'using MWK-Means' start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.mwk_means(data, k, beta, init_centroids=init_centroids, init_weights=init_weights, replicates=replicates, max_ite=max_ite, init_weights_method=init_weights_method, is_sparse=is_sparse, threshold=threshold) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc elif algorithm == 'iMWK-Means': print 'using iMWK-Means' start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.imwk_means(data, beta, k) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc elif algorithm == 'proclus': print 'using proclus' start = time.time() M, D, A, acc = cl.proclus(data, y, k, l, minDeviation=minDeviation, A=A, B=B, niters=max_ite) time_elapsed = time.time() - start print 'Time elapsed: ', time_elapsed print "Accuracy: %.4f" % acc return [M, D, A], time_elapsed, acc