Python get_tfidf Beispiele

Programmiersprache: Python

Namespace / Paketname: custommodule.lda

Methode / Funktion: get_tfidf

Beispiele auf hotexamples.com: 3

Python get_tfidf - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die custommodule.lda.get_tfidf, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def each_cluster(locations, users):
    sorted_locations = sorted(locations.values(), key=lambda x:x.cluster1)
    groups = {x:list(y) for x, y in itertools.groupby(sorted_locations, lambda x:x.cluster1)}
    
    # for each cluster
    for c, a_group in groups.items():
        print("In layer 2 - cluster:", c, ", #:", len(a_group))
        corpus = []
        for a_location in a_group:
            doc = " ".join([" ".join(x.tags) for x in a_location.posts])
            corpus.append(doc)
        tfidf, tags_name = clda.get_tfidf(corpus)
        cntr, u, u0, d, jm, p, fpc, membership = cfuzzy.cmeans(tfidf.T, CLUSTER_NUM_2)
        #set_location_cluster(a_group, membership, "cluster2")

        output_on_map([(float(x.lat), float(x.lng), x.lname) for x in a_group], membership, CLUSTER_NUM_2, "./data/Summary/map_cluster3_" + str(c) + ".html")

Beispiel #2

Datei anzeigen

print("--------------------------------------")
# setting cluster number
if len(sys.argv) > 1:
    CLUSTER_NUM = int(sys.argv[1])

locations = clocation.get_locations_list()
users = cuser.get_users_posts_afile(USER_POSTS_FILE)
fit_users_to_location(locations, users)

coordinate = numpy.array([(float(x.lat), float(x.lng))
                          for x in locations.values()])
#print("coordinate.shape:", coordinate.shape)

# tags distance: tfidf
corpus = get_corpus(locations.values())
tfidf, tags_name = clda.get_tfidf(corpus)
print("END getting data:", datetime.datetime.now())
tfidf, tags_name = filter_tag(tfidf.T, tags_name)
#print("tfidf:", tfidf.shape)

cntr1, cntr2, u, u0, d1, d2, d, jm, p, fpc, cluster_membership = cfuzzy.cmeans_comb(
    coordinate.T, tfidf, CLUSTER_NUM, WEIGHT, ERROR)
output_on_map([(float(x.lat), float(x.lng), x.lname) for x in locations.values()] \
    , cluster_membership, CLUSTER_NUM, OUTPUT_MAP)

for i, key in enumerate(locations.keys()):
    setattr(locations[key], "cluster1", cluster_membership[i])
output_location_cluster(locations, OUTPUT_LOCATION_CLUSTER)

#set_location_cluster(locations, membership, "cluster1")

Beispiel #3

Datei anzeigen

Datei: gps_tag.py Projekt: amy22292003/Liu

print("STARTTIME:", (datetime.datetime.now()))
print("--------------------------------------")
# setting cluster number
if len(sys.argv) > 1:
    CLUSTER_NUM = int(sys.argv[1])

locations = clocation.get_locations_list()
users = cuser.get_users_posts_afile(USER_POSTS_FILE)
fit_users_to_location(locations, users)

coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()])
#print("coordinate.shape:", coordinate.shape)

# tags distance: tfidf
corpus = get_corpus(locations.values())
tfidf, tags_name = clda.get_tfidf(corpus)
print("END getting data:", datetime.datetime.now())
tfidf, tags_name = filter_tag(tfidf.T, tags_name)
#print("tfidf:", tfidf.shape)

cntr1, cntr2, u, u0, d1, d2, d, jm, p, fpc, cluster_membership = cfuzzy.cmeans_comb(coordinate.T, tfidf, CLUSTER_NUM, WEIGHT, ERROR)
output_on_map([(float(x.lat), float(x.lng), x.lname) for x in locations.values()] \
    , cluster_membership, CLUSTER_NUM, OUTPUT_MAP)

for i, key in enumerate(locations.keys()):
    setattr(locations[key], "cluster1", cluster_membership[i])
output_location_cluster(locations, OUTPUT_LOCATION_CLUSTER)

#set_location_cluster(locations, membership, "cluster1")

#each_cluster(locations, users)