Example #1
0
def main(*argv):
    start_time = datetime.datetime.now()
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # set parameters
    global CLUSTER_NUM
    global MAX_KTH
    global FILTER_TIME_S
    global FILTER_TIME_E
    global OUTPUT_MAP
    global OUTPUT_PATTERN
    if len(argv) > 0:
        CLUSTER_NUM = argv[0]
        MAX_KTH = argv[1]
        FILTER_TIME_S = argv[3]
        FILTER_TIME_E = argv[4]
        LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_OCT_c35.txt"
    else:
        LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_NOVDEC_c35.txt"

    OUTPUT_MAP = OUTPUT_MAP + str(CLUSTER_NUM) + "k" + str(MAX_KTH)
    OUTPUT_PATTERN = OUTPUT_PATTERN + str(CLUSTER_NUM) + "k" + str(MAX_KTH)

    # Getting data
    users, locations = locationclustering.main(FILTER_TIME_S, FILTER_TIME_E)
    location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    semantic_cluster = numpy.argmax(doc_topic, axis = 1)
    locations = ccluster.fit_locations_cluster(locations, semantic_cluster, location_id, "semantic_cluster")
    print("  users # :", len(users))




    # Getting sequences cluster
    #sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY)
    sequences = ctrajectory.split_trajectory_byday([a_user.posts for a_user in users.values() if len(a_user.posts) != 0])
    sequences = ctrajectory.remove_adjacent_location(sequences)

    #sequences = get_specific(sequences) 

    sequences = ctrajectory.remove_short(sequences)
    print("  remain users #:", len(set([x[0].uid for x in sequences])))

    location_sequences, longest_len = ctrajectory.convertto_location_sequences(sequences, locations)
    spatial_sequences, semantic_sequences = ctrajectory.get_cluster_array(location_sequences, longest_len, "cluster", "semantic_cluster")

    u, u0, d, jm, p, fpc, center, membership = cfuzzy.sequences_clustering_i("Cluster", spatial_sequences, CLUSTER_NUM, MAX_KTH, semantic_sequences, GPS_WEIGHT, e = ERROR, algorithm="2WeightedDistance")

    #sequences, location_sequences, membership = filter_sequence(sequences, location_sequences, u, membership)
    #ouput_pattern(sequences, location_sequences, u, membership)
    output_each_pattern(sequences, location_sequences, u, membership, 30)
    ctrajectory.output_clusters(sequences, membership, u, OUTPUT_PATTERN)
    
    #output_cluster_detail(sequences, location_sequences, u, membership, 26, file = OUTPUT_ANALYSIS)
    
    """
    ll = count(sequences, location_sequences, u, membership, 40)
    tag = from_to(location_sequences, membership, 40, [ll[0], ll[1], ll[2]])
    tag.extend(from_to(location_sequences, membership, 40, [ll[1], ll[0], ll[2]]))
    tag.extend(from_to(location_sequences, membership, 40, [ll[0], ll[0]]))
    tag.extend(from_to(location_sequences, membership, 40, [ll[1], ll[1]]))
    tag.extend(from_to(location_sequences, membership, 40, [ll[1], ll[2], ll[0]]))
    tag.extend(from_to(location_sequences, membership, 40, [ll[2], ll[1]]))
    f = open("./data/Result/Ana_40.txt", "a")
    f.write("\naboves:" + str(len(set(tag))))
    f.close()
    """

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()), ", SPEND:", datetime.datetime.now() - start_time)
    print("--------------------------------------")
    return location_sequences, spatial_sequences, semantic_sequences, u
Example #2
0
def main(*argv):
    start_time = datetime.datetime.now()
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # set parameters
    global CLUSTER_NUM
    global MAX_KTH
    global GPS_WEIGHT
    global FILTER_TIME_S
    global FILTER_TIME_E
    global OUTPUT_MAP
    global OUTPUT_PATTERN
    if len(argv) > 0:
        CLUSTER_NUM = argv[0]
        MAX_KTH = argv[1]
        GPS_WEIGHT = argv[2]
        FILTER_TIME_S = argv[3]
        FILTER_TIME_E = argv[4]
        LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_OCT_c35.txt"
    else:
        LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_NOVDEC_c35.txt"

    OUTPUT_MAP = OUTPUT_MAP + str(CLUSTER_NUM) + "k" + str(
        MAX_KTH) + "w" + str(GPS_WEIGHT)
    OUTPUT_PATTERN = OUTPUT_PATTERN + str(CLUSTER_NUM) + "k" + str(MAX_KTH)

    # Getting data
    users, locations = locationclustering.main(FILTER_TIME_S, FILTER_TIME_E)
    location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    locations = ccluster.fit_locations_membership(locations,
                                                  numpy.transpose(doc_topic),
                                                  location_id, "semantic_mem")
    print("  users # :", len(users))

    users = cuser.open_users_posts_afile(USER_POSTS_FILE)

    # Getting sequences of posts & locations
    #sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY)
    sequences = ctrajectory.split_trajectory_byday(
        [a_user.posts for a_user in users.values() if len(a_user.posts) != 0])
    #sequences = ctrajectory.remove_adjacent_location(sequences)

    #sequences = get_specific(sequences)

    #sequences = ctrajectory.remove_short(sequences)
    print("  remain users #:", len(set([x[0].uid for x in sequences])))

    location_sequences, longest_len = ctrajectory.convertto_location_sequences(
        sequences, locations)
    spatial_array = ctrajectory.get_vector_array(location_sequences,
                                                 longest_len)
    semantic_array = ctrajectory.get_vector_array(location_sequences,
                                                  longest_len, "semantic_mem")

    u, u0, d, jm, p, fpc, center, membership = cfuzzy.sequences_clustering_i(
        "Location",
        spatial_array,
        CLUSTER_NUM,
        MAX_KTH,
        semantic_array,
        GPS_WEIGHT,
        e=ERROR,
        algorithm="2WeightedDistance")

    #ouput_pattern(sequences, location_sequences, u, membership)
    output_each_pattern(sequences, location_sequences, u, membership, 20)
    ctrajectory.output_clusters(sequences, membership, u, OUTPUT_PATTERN)
    """ll = count(sequences, location_sequences, u, membership, 39)
    tag = from_to(location_sequences, membership, 39, ll)
    tag.extend(from_to(location_sequences, membership, 39, [ll[1]] + [ll[0]]))
    tag.extend(from_to(location_sequences, membership, 39, [ll[0]] + [ll[0]]))
    tag.extend(from_to(location_sequences, membership, 39, [ll[1]] + [ll[1]]))
    f = open("./data/Result/Ana_39.txt", "a")
    f.write("\naboves:" + str(len(set(tag))))
    f.close()
    """

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()), ", SPEND:",
          datetime.datetime.now() - start_time)
    print("--------------------------------------")
    return location_sequences, spatial_array, semantic_array, u
Example #3
0
def main(*argv):
    start_time = datetime.datetime.now()
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # set parameters
    global CLUSTER_NUM
    global MAX_KTH
    global GPS_WEIGHT
    global FILTER_TIME_S
    global FILTER_TIME_E
    global OUTPUT_MAP
    global OUTPUT_PATTERN
    if len(argv) > 0:
        CLUSTER_NUM = argv[0]
        MAX_KTH = argv[1]
        GPS_WEIGHT = argv[2]
        FILTER_TIME_S = argv[3]
        FILTER_TIME_E = argv[4]
        LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_OCT_c35.txt"
    else:
        LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_NOVDEC_c35.txt"

    OUTPUT_MAP = OUTPUT_MAP + str(CLUSTER_NUM) + "k" + str(MAX_KTH) + "w" + str(GPS_WEIGHT)
    OUTPUT_PATTERN = OUTPUT_PATTERN + str(CLUSTER_NUM) + "k" + str(MAX_KTH)
    
    # Getting data
    users, locations = locationclustering.main(FILTER_TIME_S, FILTER_TIME_E)
    location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem")
    print("  users # :", len(users))

    users = cuser.open_users_posts_afile(USER_POSTS_FILE)

    # Getting sequences of posts & locations
    #sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY)
    sequences = ctrajectory.split_trajectory_byday([a_user.posts for a_user in users.values() if len(a_user.posts) != 0])
    #sequences = ctrajectory.remove_adjacent_location(sequences)

    #sequences = get_specific(sequences) 

    #sequences = ctrajectory.remove_short(sequences)
    print("  remain users #:", len(set([x[0].uid for x in sequences])))

    location_sequences, longest_len = ctrajectory.convertto_location_sequences(sequences, locations)
    spatial_array = ctrajectory.get_vector_array(location_sequences, longest_len)
    semantic_array = ctrajectory.get_vector_array(location_sequences, longest_len, "semantic_mem")

    u, u0, d, jm, p, fpc, center, membership = cfuzzy.sequences_clustering_i("Location", spatial_array, CLUSTER_NUM, MAX_KTH, semantic_array, GPS_WEIGHT, e = ERROR, algorithm="2WeightedDistance")

    
    #ouput_pattern(sequences, location_sequences, u, membership)
    output_each_pattern(sequences, location_sequences, u, membership, 20)
    ctrajectory.output_clusters(sequences, membership, u, OUTPUT_PATTERN)
    
    """ll = count(sequences, location_sequences, u, membership, 39)
    tag = from_to(location_sequences, membership, 39, ll)
    tag.extend(from_to(location_sequences, membership, 39, [ll[1]] + [ll[0]]))
    tag.extend(from_to(location_sequences, membership, 39, [ll[0]] + [ll[0]]))
    tag.extend(from_to(location_sequences, membership, 39, [ll[1]] + [ll[1]]))
    f = open("./data/Result/Ana_39.txt", "a")
    f.write("\naboves:" + str(len(set(tag))))
    f.close()
    """
    
    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()), ", SPEND:", datetime.datetime.now() - start_time)
    print("--------------------------------------")
    return location_sequences, spatial_array, semantic_array, u