예제 #1
0
    def clusterize(self):
        print("\nclusterize")

        self.process_clustering_data()

        c = Clustering(self.clustering_df)
        c.k_means(2)
        c.k_means(3)
        c.k_means(4)
예제 #2
0
def main(fn, clusters_no):
    geo_locs = []
    #read location data from csv file and store each location as a Point(latit,longit) object
    df = pd.read_csv(fn)
    for index, row in df.iterrows():
        loc_ = Point(float(row['LAT']), float(row['LON']))  #tuples for location
        geo_locs.append(loc_)
    #run k_means clustering
    cluster = Clustering(geo_locs, clusters_no)
    flag = cluster.k_means(False)
    if flag == -1:
        print("Error in arguments!")
    else:
        #clustering results is a list of lists where each list represents one cluster
        print("Clustering results:")
        cluster.print_clusters(cluster.clusters)
예제 #3
0
####################################################################
print('Minkowski Weighted PAM')
start = time.time()
[u, medoids, weights, ite, dist_tmp] = cl.mwpam(data, 3, 1.1, False, 10)
print('Time elapsed: ', time.time()-start)
print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0])
####################################################################
print('Minkowski Weighted PAM (Initialized with Minkowski Build)')
start = time.time()
[u, medoids, weights, ite, dist_tmp] = cl.mwpam(data, 3, 1.1)
print('Time elapsed: ', time.time()-start)
print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0])
####################################################################
print('K-Means')
start = time.time()
[u, centroids, ite, dist_tmp] = cl.k_means(data, 3, replicates=10)
print('Time elapsed: ', time.time()-start)
print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0])
####################################################################
print('iK-Means')
start = time.time()
[u, centroids, ite, dist_tmp, init_centroids] = cl.ik_means(data, 3)
print('Time elapsed: ', time.time()-start)
print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0])
####################################################################
print('WK-Means')
start = time.time()
[u, centroids, weights, ite, dist_tmp] = cl.wk_means(data, 3, 1.1, replicates=10)
print('Time elapsed: ', time.time()-start)
print('Accuracy: ', cl.my_math.compare_categorical_vectors(u, y)[0])
####################################################################
예제 #4
0
def subspace_cluster(datapath,
                     ypath,
                     algorithm,
                     k,
                     sep=",",
                     preprocess_method="standard",
                     replicates=10,
                     max_ite=100,
                     beta=2,
                     init_weights=None,
                     init_centroids=None,
                     init_weights_method="random",
                     is_sparse=0,
                     threshold=0.9,
                     l=2,
                     minDeviation=0.1,
                     A=30,
                     B=10):
    np.seterr(all='raise')
    cl = Clustering()
    data_ori = np.genfromtxt(datapath, delimiter=sep)
    y = np.genfromtxt(ypath)
    data = data_preprocess(data_ori, preprocess_method)

    if algorithm == 'K-Means':
        print 'using K-Means'
        start = time.time()
        [u, centroids, ite, dist_tmp] = cl.k_means(data, k, replicates)
        time_elapsed = time.time() - start
        acc = cl.my_math.compare_categorical_vectors(u, y)[0]
        print 'Time elapsed: ', time_elapsed
        print 'Accuracy: ', acc
        return [u, centroids, ite, dist_tmp], time_elapsed, acc
    elif algorithm == 'iK-Means':
        print 'using iK-Means'
        start = time.time()
        [u, centroids, ite, dist_tmp, init_centroids] = cl.ik_means(data, k)
        time_elapsed = time.time() - start
        acc = cl.my_math.compare_categorical_vectors(u, y)[0]
        print 'Time elapsed: ', time_elapsed
        print 'Accuracy: ', acc
        return [u, centroids, ite, dist_tmp, init_centroids], time_elapsed, acc
    elif algorithm == 'WK-Means':
        print 'using WK-Means'
        start = time.time()
        [u, centroids, weights, ite,
         dist_tmp] = cl.wk_means(data,
                                 k,
                                 beta,
                                 init_centroids=init_centroids,
                                 init_weights=init_weights,
                                 replicates=replicates,
                                 max_ite=max_ite,
                                 init_weights_method=init_weights_method,
                                 is_sparse=is_sparse,
                                 threshold=threshold)
        time_elapsed = time.time() - start
        acc = cl.my_math.compare_categorical_vectors(u, y)[0]
        print 'Time elapsed: ', time_elapsed
        print 'Accuracy: ', acc
        return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc
    elif algorithm == 'MWK-Means':
        print 'using MWK-Means'
        start = time.time()
        [u, centroids, weights, ite,
         dist_tmp] = cl.mwk_means(data,
                                  k,
                                  beta,
                                  init_centroids=init_centroids,
                                  init_weights=init_weights,
                                  replicates=replicates,
                                  max_ite=max_ite,
                                  init_weights_method=init_weights_method,
                                  is_sparse=is_sparse,
                                  threshold=threshold)
        time_elapsed = time.time() - start
        acc = cl.my_math.compare_categorical_vectors(u, y)[0]
        print 'Time elapsed: ', time_elapsed
        print 'Accuracy: ', acc
        return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc
    elif algorithm == 'iMWK-Means':
        print 'using iMWK-Means'
        start = time.time()
        [u, centroids, weights, ite, dist_tmp] = cl.imwk_means(data, beta, k)
        time_elapsed = time.time() - start
        acc = cl.my_math.compare_categorical_vectors(u, y)[0]
        print 'Time elapsed: ', time_elapsed
        print 'Accuracy: ', acc
        return [u, centroids, weights, ite, dist_tmp], time_elapsed, acc
    elif algorithm == 'proclus':
        print 'using proclus'
        start = time.time()
        M, D, A, acc = cl.proclus(data,
                                  y,
                                  k,
                                  l,
                                  minDeviation=minDeviation,
                                  A=A,
                                  B=B,
                                  niters=max_ite)
        time_elapsed = time.time() - start
        print 'Time elapsed: ', time_elapsed
        print "Accuracy: %.4f" % acc
        return [M, D, A], time_elapsed, acc