def clusterize(self): print("\nclusterize") self.process_clustering_data() c = Clustering(self.clustering_df) c.k_means(2) c.k_means(3) c.k_means(4)
def main(fn, clusters_no): geo_locs = [] #read location data from csv file and store each location as a Point(latit,longit) object df = pd.read_csv(fn) for index, row in df.iterrows(): loc_ = Point(float(row['LAT']), float(row['LON'])) #tuples for location geo_locs.append(loc_) #run k_means clustering cluster = Clustering(geo_locs, clusters_no) flag = cluster.k_means(False) if flag == -1: print("Error in arguments!") else: #clustering results is a list of lists where each list represents one cluster print("Clustering results:") cluster.print_clusters(cluster.clusters)
#################################################################### print('Minkowski Weighted PAM') start = time.time() [u, medoids, weights, ite, dist_tmp] = cl.mwpam(data, 3, 1.1, False, 10) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('Minkowski Weighted PAM (Initialized with Minkowski Build)') start = time.time() [u, medoids, weights, ite, dist_tmp] = cl.mwpam(data, 3, 1.1) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('K-Means') start = time.time() [u, centroids, ite, dist_tmp] = cl.k_means(data, 3, replicates=10) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('iK-Means') start = time.time() [u, centroids, ite, dist_tmp, init_centroids] = cl.ik_means(data, 3) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) #################################################################### print('WK-Means') start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.wk_means(data, 3, 1.1, replicates=10) print('Time elapsed: ', time.time()-start) print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0]) ####################################################################
def subspace_cluster(datapath, ypath, algorithm, k, sep=",", preprocess_method="standard", replicates=10, max_ite=100, beta=2, init_weights=None, init_centroids=None, init_weights_method="random", is_sparse=0, threshold=0.9, l=2, minDeviation=0.1, A=30, B=10): np.seterr(all='raise') cl = Clustering() data_ori = np.genfromtxt(datapath, delimiter=sep) y = np.genfromtxt(ypath) data = data_preprocess(data_ori, preprocess_method) if algorithm == 'K-Means': print 'using K-Means' start = time.time() [u, centroids, ite, dist_tmp] = cl.k_means(data, k, replicates) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, ite, dist_tmp], time_elapsed, acc elif algorithm == 'iK-Means': print 'using iK-Means' start = time.time() [u, centroids, ite, dist_tmp, init_centroids] = cl.ik_means(data, k) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, ite, dist_tmp, init_centroids], time_elapsed, acc elif algorithm == 'WK-Means': print 'using WK-Means' start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.wk_means(data, k, beta, init_centroids=init_centroids, init_weights=init_weights, replicates=replicates, max_ite=max_ite, init_weights_method=init_weights_method, is_sparse=is_sparse, threshold=threshold) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc elif algorithm == 'MWK-Means': print 'using MWK-Means' start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.mwk_means(data, k, beta, init_centroids=init_centroids, init_weights=init_weights, replicates=replicates, max_ite=max_ite, init_weights_method=init_weights_method, is_sparse=is_sparse, threshold=threshold) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc elif algorithm == 'iMWK-Means': print 'using iMWK-Means' start = time.time() [u, centroids, weights, ite, dist_tmp] = cl.imwk_means(data, beta, k) time_elapsed = time.time() - start acc = cl.my_math.compare_categorical_vectors(u, y)[0] print 'Time elapsed: ', time_elapsed print 'Accuracy: ', acc return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc elif algorithm == 'proclus': print 'using proclus' start = time.time() M, D, A, acc = cl.proclus(data, y, k, l, minDeviation=minDeviation, A=A, B=B, niters=max_ite) time_elapsed = time.time() - start print 'Time elapsed: ', time_elapsed print "Accuracy: %.4f" % acc return [M, D, A], time_elapsed, acc