Beispiel #1
0
def each_cluster(locations, users):
    sorted_locations = sorted(locations.values(), key=lambda x:x.cluster1)
    groups = {x:list(y) for x, y in itertools.groupby(sorted_locations, lambda x:x.cluster1)}
    
    # for each cluster
    for c, a_group in groups.items():
        print("In layer 2 - cluster:", c, ", #:", len(a_group))
        corpus = []
        for a_location in a_group:
            doc = " ".join([" ".join(x.tags) for x in a_location.posts])
            corpus.append(doc)
        tfidf, tags_name = clda.get_tfidf(corpus)
        cntr, u, u0, d, jm, p, fpc, membership = cfuzzy.cmeans(tfidf.T, CLUSTER_NUM_2)
        #set_location_cluster(a_group, membership, "cluster2")

        output_on_map([(float(x.lat), float(x.lng), x.lname) for x in a_group], membership, CLUSTER_NUM_2, "./data/Summary/map_cluster3_" + str(c) + ".html")
Beispiel #2
0
print("--------------------------------------")
# setting cluster number
if len(sys.argv) > 1:
    CLUSTER_NUM = int(sys.argv[1])

locations = clocation.get_locations_list()
users = cuser.get_users_posts_afile(USER_POSTS_FILE)
fit_users_to_location(locations, users)

coordinate = numpy.array([(float(x.lat), float(x.lng))
                          for x in locations.values()])
#print("coordinate.shape:", coordinate.shape)

# tags distance: tfidf
corpus = get_corpus(locations.values())
tfidf, tags_name = clda.get_tfidf(corpus)
print("END getting data:", datetime.datetime.now())
tfidf, tags_name = filter_tag(tfidf.T, tags_name)
#print("tfidf:", tfidf.shape)

cntr1, cntr2, u, u0, d1, d2, d, jm, p, fpc, cluster_membership = cfuzzy.cmeans_comb(
    coordinate.T, tfidf, CLUSTER_NUM, WEIGHT, ERROR)
output_on_map([(float(x.lat), float(x.lng), x.lname) for x in locations.values()] \
    , cluster_membership, CLUSTER_NUM, OUTPUT_MAP)

for i, key in enumerate(locations.keys()):
    setattr(locations[key], "cluster1", cluster_membership[i])
output_location_cluster(locations, OUTPUT_LOCATION_CLUSTER)

#set_location_cluster(locations, membership, "cluster1")
Beispiel #3
0
print("STARTTIME:", (datetime.datetime.now()))
print("--------------------------------------")
# setting cluster number
if len(sys.argv) > 1:
    CLUSTER_NUM = int(sys.argv[1])

locations = clocation.get_locations_list()
users = cuser.get_users_posts_afile(USER_POSTS_FILE)
fit_users_to_location(locations, users)

coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()])
#print("coordinate.shape:", coordinate.shape)

# tags distance: tfidf
corpus = get_corpus(locations.values())
tfidf, tags_name = clda.get_tfidf(corpus)
print("END getting data:", datetime.datetime.now())
tfidf, tags_name = filter_tag(tfidf.T, tags_name)
#print("tfidf:", tfidf.shape)

cntr1, cntr2, u, u0, d1, d2, d, jm, p, fpc, cluster_membership = cfuzzy.cmeans_comb(coordinate.T, tfidf, CLUSTER_NUM, WEIGHT, ERROR)
output_on_map([(float(x.lat), float(x.lng), x.lname) for x in locations.values()] \
    , cluster_membership, CLUSTER_NUM, OUTPUT_MAP)

for i, key in enumerate(locations.keys()):
    setattr(locations[key], "cluster1", cluster_membership[i])
output_location_cluster(locations, OUTPUT_LOCATION_CLUSTER)

#set_location_cluster(locations, membership, "cluster1")

#each_cluster(locations, users)