Beispiel #1
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # getting data
    #locations = ctag.open_location_tags(USER_TAGS_FILE)

    #corpus = ctag.get_corpus([x.tags for x in locations.values()])

    # getting data - way 2
    locations = clocation.open_locations()
    users = cuser.open_users_posts_afile(USER_POSTS_FILE)
    print("Sampling users posts...")
    for key, a_user in users.items():
        posts = [x for x in a_user.posts if (x.time > FILTER_TIME_S) and (x.time < FILTER_TIME_E)]
        users[key].posts = posts
    locations = clocation.fit_users_to_location(locations, users, "tags")

    corpus = ctag.get_location_posts_corpus(locations)
    
    vector, tag_name = clda.get_tag_vector(corpus)
    topic_word, doc_topic = clda.fit_lda(vector, tag_name, TOPIC_NUM)
    ccluster.output_topics(topic_word, doc_topic, tag_name, [x.lid for x in locations.values()], OUTPUT_TAG_TOPIC, OUTPUT_LOCATION_TOPIC)

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Beispiel #2
0
def main():
    """end for intersection clustering"""

    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
    # getting data
    locations = clocation.get_locations_list()
    users = cuser.get_users_posts_afile(USER_POSTS_FILE)
    locations = clocation.fit_users_to_location(locations, users, "tags",
                                                "uid")
    del users
    set_location_tags(locations)
    set_location_user_count(locations)

    coordinate = numpy.array([(float(x.lat), float(x.lng))
                              for x in locations.values()])
    intersection = get_tag_intersection(locations.values())
    location_frequency = numpy.array([x.usercount for x in locations.values()])
    print("avg location_frequency:",
          sum(location_frequency) / len(location_frequency), " max:",
          max(location_frequency), " min:", min(location_frequency))

    print("location 1:",
          list(locations.values())[0].lname,
          list(locations.values())[0].lid)
    print("intersection.sum:",
          intersection.sum(axis=0)[0:6],
          intersection.sum(axis=1)[0:6])
    print("location_frequency:", location_frequency.shape)
    # original intersect clustering
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR)

    # intersect clustering with the kth locations in each cluster
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM,  MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster")

    # intersect clustering with the kth locations in each cluster & location frequency as weight
    cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(
        coordinate.T,
        intersection,
        CLUSTER_NUM,
        MAX_KTH,
        location_frequency,
        w=WEIGHT,
        e=ERROR,
        algorithm="kthCluster_LocationFrequency")

    for i, key in enumerate(locations.keys()):
        setattr(locations[key], "cluster", membership[i])

    cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \
        membership, CLUSTER_NUM, OUTPUT_MAP)

    cfuzzy.output_location_cluster(locations.values(), "cluster",
                                   OUTPUT_CLUSTER)

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Beispiel #3
0
def main(*argv):
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # set UNIXTIME
    global FILTER_TIME_S
    global FILTER_TIME_E
    if len(argv) > 0:
        FILTER_TIME_S = argv[0]
        FILTER_TIME_E = argv[1]

    # getting data
    locations = clocation.open_locations()
    users = cuser.open_users_posts_afile(USER_POSTS_FILE)

    # preporcessing. remove unqualified users
    removes = filter_users_timeperiod(users, FILTER_TIME_S, FILTER_TIME_E)
    sequences = ctrajectory.split_trajectory_byday([a_user.posts for a_user in users.values() if len(a_user.posts) != 0])
    sequences = ctrajectory.remove_adjacent_location(sequences)
    sequences = ctrajectory.remove_short(sequences)
    removes = list(set(removes) | (set(users.keys()) - set([x[0].uid for x in sequences])))

    for key in removes:
        del users[key]
    print("  remain users #:", len(users.keys()))

    locations = clocation.fit_users_to_location(locations, users, "uid")
    set_location_user_count(locations)

    coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()])
    location_frequency = numpy.array([x.usercount for x in locations.values()])
    
    # clustering locations
    cntr, u, u0, d, jm, p, fpc, membership = cfuzzy.cmeans_location(coordinate.T, CLUSTER_NUM, MAX_KTH, location_frequency, Y, e=ERROR, algorithm="kthCluster_LocationFrequency")
    locations = ccluster.fit_locations_membership(locations, u, locations.keys())
    locations = ccluster.fit_locations_cluster(locations, membership, locations.keys())
    
    
    # output result
    """
    cpygmaps.output_clusters(\
        [(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname + "(" + x.lid + ")>>u:" + str(u[x.cluster, i])) \
            for i, x in enumerate(locations.values())], membership, CLUSTER_NUM, OUTPUT_MAP)
    output_representatives(coordinate, u, MAX_KTH)
    """
    #ccluster.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER)
    

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    return users, locations
Beispiel #4
0
def main():
    """end for intersection clustering"""

    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")    
    # getting data
    locations = clocation.get_locations_list()
    users = cuser.get_users_posts_afile(USER_POSTS_FILE)
    locations = clocation.fit_users_to_location(locations, users, "tags", "uid")
    del users
    set_location_tags(locations)
    set_location_user_count(locations)

    coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()])
    intersection = get_tag_intersection(locations.values())
    location_frequency = numpy.array([x.usercount for x in locations.values()])
    print("avg location_frequency:", sum(location_frequency) / len(location_frequency), " max:", max(location_frequency), " min:", min(location_frequency))

    print("location 1:", list(locations.values())[0].lname, list(locations.values())[0].lid)
    print("intersection.sum:", intersection.sum(axis=0)[0:6], intersection.sum(axis=1)[0:6])
    print("location_frequency:", location_frequency.shape)
    # original intersect clustering
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR)
    
    # intersect clustering with the kth locations in each cluster
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM,  MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster")
    
    # intersect clustering with the kth locations in each cluster & location frequency as weight
    cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM,  MAX_KTH, location_frequency, w=WEIGHT, e=ERROR, algorithm="kthCluster_LocationFrequency")
    
    for i, key in enumerate(locations.keys()):
        setattr(locations[key], "cluster", membership[i])

    cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \
        membership, CLUSTER_NUM, OUTPUT_MAP)

    cfuzzy.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER)


    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")