Ejemplo n.º 1
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    users, locations = locationclustering.main()

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY)
    vector_sequences = ctrajectory.get_vector_sequence(sequences, locations)
    location_sequences = ctrajectory.convertto_location_sequences(sequences, locations)

    u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering("Location", vector_sequences, CLUSTER_NUM, MAX_KTH, e = ERROR, algorithm="Original")

    print("Start Outputting...")
    for c in range(0, SC_CLUSTER_NUM):
        this_cluster_indices = [i for i, x in enumerate(membership) if x == c]
        if len(this_cluster_indices) is not 0:
            print(c, ">>", u[c, this_cluster_indices].shape)
            top_10_u = sorted(u[c, this_cluster_indices], reverse=True)[9]
            top_10_indices = [i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u]
            #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10]
            print(top_10_indices)
            print(u[c, this_cluster_indices][top_10_indices])
            points_sequences = numpy.array(location_sequences)[this_cluster_indices][top_10_indices]
            color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x])
            print("  color:", color)
            cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 2
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # Getting data
    users, locations = locationclustering.main()
    location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem")
    semantic_cluster = numpy.argmax(doc_topic, axis=1)
    locations = ccluster.fit_locations_cluster(locations, semantic_cluster, location_id, "semantic_cluster")

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory(
        [a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY
    )
    cluster_sequences = ctrajectory.get_cluster_sequence(sequences, locations)
    semantic_sequences = ctrajectory.get_cluster_sequence(sequences, locations, "semantic_cluster")
    location_sequences = ctrajectory.convertto_location_sequences(sequences, locations)

    print("Filtering short trajectories...")
    fail_indices = []
    for i, s in enumerate(sequences):
        if len(s) <= 2:
            fail_indices.append(i)
    print("  will delete #:", len(fail_indices))
    sequences = numpy.delete(numpy.array(sequences), fail_indices)
    cluster_sequences = numpy.delete(numpy.array(cluster_sequences), fail_indices)
    semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices)
    location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices)
    print("  remain sequences #:", len(sequences))

    u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering(
        "Cluster", cluster_sequences, CLUSTER_NUM, MAX_KTH, semantic_sequences, e=ERROR, algorithm="2Distance"
    )

    print("Start Outputting...")
    for c in range(CLUSTER_NUM):
        this_cluster_indices = [i for i, x in enumerate(membership) if x == c]
        print(c, " >> this cluster #:", len(this_cluster_indices))
        if len(this_cluster_indices) is not 0:
            top_10_u = sorted(u[c, this_cluster_indices], reverse=True)
            if len(top_10_u) >= MAX_KTH:
                top_10_u = top_10_u[MAX_KTH - 1]
            else:
                top_10_u = top_10_u[-1]
            top_10_indices = [i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u]
            # top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10]
            print("  top_10:", top_10_u, ">", top_10_indices)
            print(u[c, this_cluster_indices][top_10_indices])
            points_sequences = numpy.array(location_sequences)[this_cluster_indices][top_10_indices]
            color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x])
            cpygmaps.output_patterns_l(
                points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html"
            )

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 3
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    users, locations = locationclustering.main()

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY)
    cluster_sequences = ctrajectory.get_cluster_sequence(sequences, locations)
    location_sequences = ctrajectory.convertto_location_sequences(sequences, locations)

    print("Filtering short trajectories...")
    fail_indices = []
    for i, s in enumerate(cluster_sequences):
        if len(s) <= 2:
            fail_indices.append(i)
    print("  will delete #:", len(fail_indices))
    sequences = numpy.delete(numpy.array(sequences), fail_indices)
    cluster_sequences = numpy.delete(numpy.array(cluster_sequences), fail_indices)
    location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices)
    print("  remain sequences #:", len(sequences))

    u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering("Cluster", vector_sequences, CLUSTER_NUM, MAX_KTH, e = ERROR, algorithm="Original")

    print("Start Outputting...")
    for c in range(CLUSTER_NUM):
        this_cluster_indices = [i for i, x in enumerate(membership) if x == c]
        print(c, " >> this cluster #:", len(this_cluster_indices))
        if len(this_cluster_indices) is not 0:
            top_10_u = sorted(u[c, this_cluster_indices], reverse=True)
            if len(top_10_u) >= MAX_KTH:
                top_10_u = top_10_u[MAX_KTH - 1]
            else:
                top_10_u = top_10_u[-1]
            top_10_indices = [i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u]
            #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10]
            print("  top_10:", top_10_u, ">", top_10_indices)
            print(u[c, this_cluster_indices][top_10_indices])
            points_sequences = numpy.array(location_sequences)[this_cluster_indices][top_10_indices]
            color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x])
            cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html")

            #print("  center:", this_cluster_indices[top_10_indices[0]], " ;distance:", distance[:, this_cluster_indices[top_10_indices[0]]])
            #closest_indices = [i for i, x in enumerate(distance[:, this_cluster_indices[top_10_indices[0]]]) if x <= 0.25]
            #print("  closest_indices:", closest_indices)
            #dist_points_sequences = numpy.array(location_sequences)[closest_indices]
            #dist_color = range(len(dist_points_sequences))
            #cpygmaps.output_patterns_l(dist_points_sequences, dist_color, len(dist_points_sequences), "./data/Summary/distance_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 4
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
    # Getting data
    users, locations = locationclustering.main()
    #location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    #locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem")

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory(
        [a_user.posts for a_user in users.values() if len(a_user.posts) != 0],
        SPLIT_DAY)
    vector_sequences = ctrajectory.get_vector_sequence(sequences, locations)
    #semantic_sequences = ctrajectory.get_vector_sequence(sequences, locations, "semantic_mem")
    location_sequences = ctrajectory.convertto_location_sequences(
        sequences, locations)

    print("Filtering short trajectories...")
    fail_indices = []
    for i, s in enumerate(sequences):
        if len(s) <= 2:
            fail_indices.append(i)
    print("  will delete #:", len(fail_indices))
    sequences = numpy.delete(numpy.array(sequences), fail_indices)
    vector_sequences = numpy.delete(numpy.array(vector_sequences),
                                    fail_indices)
    #semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices)
    location_sequences = numpy.delete(numpy.array(location_sequences),
                                      fail_indices)
    print("  remain sequences #:", len(sequences), " ,average length=",
          sum([len(x) for x in sequences]) / len(sequences))

    #cluster_num, cluster_membership, noise = lee.line_segment_clustering(vector_sequences, semantic_sequences, "Mine", "Location", GPS_WEIGHT, ep = EPSILON, minlns = MINLNS)
    cluster_num, cluster_membership, noise = lee.line_segment_clustering(
        vector_sequences, "Mine", "Location", ep=EPSILON, minlns=MINLNS)
    print(cluster_membership)

    print("Start Outputting...")
    for c in range(cluster_num):
        this_cluster_indices = numpy.where(cluster_membership == c)[0]
        print(c, " >> this cluster #:", len(this_cluster_indices))
        points_sequences = numpy.array(
            location_sequences)[this_cluster_indices]
        color = range(len(points_sequences))
        cpygmaps.output_patterns_l(points_sequences, color,
                                   len(points_sequences),
                                   OUTPUT_MAP + "_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 5
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    users, locations = locationclustering.main()

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory(
        [a_user.posts for a_user in users.values() if len(a_user.posts) != 0],
        SPLIT_DAY)
    vector_sequences = ctrajectory.get_vector_sequence(sequences, locations)
    location_sequences = ctrajectory.convertto_location_sequences(
        sequences, locations)

    u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering(
        "Location",
        vector_sequences,
        CLUSTER_NUM,
        MAX_KTH,
        e=ERROR,
        algorithm="Original")

    print("Start Outputting...")
    for c in range(0, SC_CLUSTER_NUM):
        this_cluster_indices = [i for i, x in enumerate(membership) if x == c]
        if len(this_cluster_indices) is not 0:
            print(c, ">>", u[c, this_cluster_indices].shape)
            top_10_u = sorted(u[c, this_cluster_indices], reverse=True)[9]
            top_10_indices = [
                i for i, x in enumerate(u[c, this_cluster_indices])
                if x >= top_10_u
            ]
            #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10]
            print(top_10_indices)
            print(u[c, this_cluster_indices][top_10_indices])
            points_sequences = numpy.array(
                location_sequences)[this_cluster_indices][top_10_indices]
            color = sorted(range(len(points_sequences)),
                           key=lambda x: top_10_indices[x])
            print("  color:", color)
            cpygmaps.output_patterns_l(points_sequences, color,
                                       len(points_sequences),
                                       OUTPUT_MAP + "_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 6
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
    # Getting data
    users, locations = locationclustering.main()
    #location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    #locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem")

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY)
    vector_sequences = ctrajectory.get_vector_sequence(sequences, locations)
    #semantic_sequences = ctrajectory.get_vector_sequence(sequences, locations, "semantic_mem")
    location_sequences = ctrajectory.convertto_location_sequences(sequences, locations)

    print("Filtering short trajectories...")
    fail_indices = []
    for i, s in enumerate(sequences):
        if len(s) <= 2:
            fail_indices.append(i)
    print("  will delete #:", len(fail_indices))
    sequences = numpy.delete(numpy.array(sequences), fail_indices)
    vector_sequences = numpy.delete(numpy.array(vector_sequences), fail_indices)
    #semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices)
    location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices)
    print("  remain sequences #:", len(sequences), " ,average length=", sum([len(x) for x in sequences]) / len(sequences))

    #cluster_num, cluster_membership, noise = lee.line_segment_clustering(vector_sequences, semantic_sequences, "Mine", "Location", GPS_WEIGHT, ep = EPSILON, minlns = MINLNS)
    cluster_num, cluster_membership, noise = lee.line_segment_clustering(vector_sequences, "Mine", "Location", ep = EPSILON, minlns = MINLNS)
    print(cluster_membership)

    print("Start Outputting...")
    for c in range(cluster_num):
        this_cluster_indices = numpy.where(cluster_membership == c)[0]
        print(c, " >> this cluster #:", len(this_cluster_indices))
        points_sequences = numpy.array(location_sequences)[this_cluster_indices]
        color = range(len(points_sequences))
        cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 7
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    # Getting data
    users, locations = locationclustering.main()
    location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC)
    locations = ccluster.fit_locations_membership(locations,
                                                  numpy.transpose(doc_topic),
                                                  location_id, "semantic_mem")

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory(
        [a_user.posts for a_user in users.values() if len(a_user.posts) != 0],
        SPLIT_DAY)
    vector_sequences = ctrajectory.get_vector_sequence(sequences, locations)
    semantic_sequences = ctrajectory.get_vector_sequence(
        sequences, locations, "semantic_mem")
    location_sequences = ctrajectory.convertto_location_sequences(
        sequences, locations)

    print("Filtering short trajectories...")
    fail_indices = []
    for i, s in enumerate(sequences):
        if len(s) <= 2:
            fail_indices.append(i)
    print("  will delete #:", len(fail_indices))
    sequences = numpy.delete(numpy.array(sequences), fail_indices)
    vector_sequences = numpy.delete(numpy.array(vector_sequences),
                                    fail_indices)
    semantic_sequences = numpy.delete(numpy.array(semantic_sequences),
                                      fail_indices)
    location_sequences = numpy.delete(numpy.array(location_sequences),
                                      fail_indices)
    print("  remain sequences #:", len(sequences))

    u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering(
        "Location",
        vector_sequences,
        CLUSTER_NUM,
        MAX_KTH,
        semantic_sequences,
        e=ERROR,
        algorithm="2Distance")

    print("Start Outputting...")
    for c in range(CLUSTER_NUM):
        this_cluster_indices = [i for i, x in enumerate(membership) if x == c]
        print(c, " >> this cluster #:", len(this_cluster_indices))
        if len(this_cluster_indices) is not 0:
            top_10_u = sorted(u[c, this_cluster_indices], reverse=True)
            if len(top_10_u) >= MAX_KTH:
                top_10_u = top_10_u[MAX_KTH - 1]
            else:
                top_10_u = top_10_u[-1]
            top_10_indices = [
                i for i, x in enumerate(u[c, this_cluster_indices])
                if x >= top_10_u
            ]
            #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10]
            print("  top_10:", top_10_u, ">", top_10_indices)
            print(u[c, this_cluster_indices][top_10_indices])
            points_sequences = numpy.array(
                location_sequences)[this_cluster_indices][top_10_indices]
            color = sorted(range(len(points_sequences)),
                           key=lambda x: top_10_indices[x])
            cpygmaps.output_patterns_l(points_sequences, color,
                                       len(points_sequences),
                                       OUTPUT_MAP + "_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Ejemplo n.º 8
0
def main():
    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")

    users, locations = locationclustering.main()

    # Getting sequences cluster
    sequences = ctrajectory.split_trajectory(
        [a_user.posts for a_user in users.values() if len(a_user.posts) != 0],
        SPLIT_DAY)
    cluster_sequences = ctrajectory.get_cluster_sequence(sequences, locations)
    location_sequences = ctrajectory.convertto_location_sequences(
        sequences, locations)

    print("Filtering short trajectories...")
    fail_indices = []
    for i, s in enumerate(cluster_sequences):
        if len(s) <= 2:
            fail_indices.append(i)
    print("  will delete #:", len(fail_indices))
    sequences = numpy.delete(numpy.array(sequences), fail_indices)
    cluster_sequences = numpy.delete(numpy.array(cluster_sequences),
                                     fail_indices)
    location_sequences = numpy.delete(numpy.array(location_sequences),
                                      fail_indices)
    print("  remain sequences #:", len(sequences))

    u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering(
        "Cluster",
        vector_sequences,
        CLUSTER_NUM,
        MAX_KTH,
        e=ERROR,
        algorithm="Original")

    print("Start Outputting...")
    for c in range(CLUSTER_NUM):
        this_cluster_indices = [i for i, x in enumerate(membership) if x == c]
        print(c, " >> this cluster #:", len(this_cluster_indices))
        if len(this_cluster_indices) is not 0:
            top_10_u = sorted(u[c, this_cluster_indices], reverse=True)
            if len(top_10_u) >= MAX_KTH:
                top_10_u = top_10_u[MAX_KTH - 1]
            else:
                top_10_u = top_10_u[-1]
            top_10_indices = [
                i for i, x in enumerate(u[c, this_cluster_indices])
                if x >= top_10_u
            ]
            #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10]
            print("  top_10:", top_10_u, ">", top_10_indices)
            print(u[c, this_cluster_indices][top_10_indices])
            points_sequences = numpy.array(
                location_sequences)[this_cluster_indices][top_10_indices]
            color = sorted(range(len(points_sequences)),
                           key=lambda x: top_10_indices[x])
            cpygmaps.output_patterns_l(points_sequences, color,
                                       len(points_sequences),
                                       OUTPUT_MAP + "_" + str(c) + ".html")

            #print("  center:", this_cluster_indices[top_10_indices[0]], " ;distance:", distance[:, this_cluster_indices[top_10_indices[0]]])
            #closest_indices = [i for i, x in enumerate(distance[:, this_cluster_indices[top_10_indices[0]]]) if x <= 0.25]
            #print("  closest_indices:", closest_indices)
            #dist_points_sequences = numpy.array(location_sequences)[closest_indices]
            #dist_color = range(len(dist_points_sequences))
            #cpygmaps.output_patterns_l(dist_points_sequences, dist_color, len(dist_points_sequences), "./data/Summary/distance_" + str(c) + ".html")

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")