def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") users, locations = locationclustering.main() # Getting sequences cluster sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) vector_sequences = ctrajectory.get_vector_sequence(sequences, locations) location_sequences = ctrajectory.convertto_location_sequences(sequences, locations) u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering("Location", vector_sequences, CLUSTER_NUM, MAX_KTH, e = ERROR, algorithm="Original") print("Start Outputting...") for c in range(0, SC_CLUSTER_NUM): this_cluster_indices = [i for i, x in enumerate(membership) if x == c] if len(this_cluster_indices) is not 0: print(c, ">>", u[c, this_cluster_indices].shape) top_10_u = sorted(u[c, this_cluster_indices], reverse=True)[9] top_10_indices = [i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u] #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10] print(top_10_indices) print(u[c, this_cluster_indices][top_10_indices]) points_sequences = numpy.array(location_sequences)[this_cluster_indices][top_10_indices] color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x]) print(" color:", color) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # Getting data users, locations = locationclustering.main() location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC) locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem") semantic_cluster = numpy.argmax(doc_topic, axis=1) locations = ccluster.fit_locations_cluster(locations, semantic_cluster, location_id, "semantic_cluster") # Getting sequences cluster sequences = ctrajectory.split_trajectory( [a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY ) cluster_sequences = ctrajectory.get_cluster_sequence(sequences, locations) semantic_sequences = ctrajectory.get_cluster_sequence(sequences, locations, "semantic_cluster") location_sequences = ctrajectory.convertto_location_sequences(sequences, locations) print("Filtering short trajectories...") fail_indices = [] for i, s in enumerate(sequences): if len(s) <= 2: fail_indices.append(i) print(" will delete #:", len(fail_indices)) sequences = numpy.delete(numpy.array(sequences), fail_indices) cluster_sequences = numpy.delete(numpy.array(cluster_sequences), fail_indices) semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices) location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices) print(" remain sequences #:", len(sequences)) u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering( "Cluster", cluster_sequences, CLUSTER_NUM, MAX_KTH, semantic_sequences, e=ERROR, algorithm="2Distance" ) print("Start Outputting...") for c in range(CLUSTER_NUM): this_cluster_indices = [i for i, x in enumerate(membership) if x == c] print(c, " >> this cluster #:", len(this_cluster_indices)) if len(this_cluster_indices) is not 0: top_10_u = sorted(u[c, this_cluster_indices], reverse=True) if len(top_10_u) >= MAX_KTH: top_10_u = top_10_u[MAX_KTH - 1] else: top_10_u = top_10_u[-1] top_10_indices = [i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u] # top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10] print(" top_10:", top_10_u, ">", top_10_indices) print(u[c, this_cluster_indices][top_10_indices]) points_sequences = numpy.array(location_sequences)[this_cluster_indices][top_10_indices] color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x]) cpygmaps.output_patterns_l( points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html" ) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") users, locations = locationclustering.main() # Getting sequences cluster sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) cluster_sequences = ctrajectory.get_cluster_sequence(sequences, locations) location_sequences = ctrajectory.convertto_location_sequences(sequences, locations) print("Filtering short trajectories...") fail_indices = [] for i, s in enumerate(cluster_sequences): if len(s) <= 2: fail_indices.append(i) print(" will delete #:", len(fail_indices)) sequences = numpy.delete(numpy.array(sequences), fail_indices) cluster_sequences = numpy.delete(numpy.array(cluster_sequences), fail_indices) location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices) print(" remain sequences #:", len(sequences)) u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering("Cluster", vector_sequences, CLUSTER_NUM, MAX_KTH, e = ERROR, algorithm="Original") print("Start Outputting...") for c in range(CLUSTER_NUM): this_cluster_indices = [i for i, x in enumerate(membership) if x == c] print(c, " >> this cluster #:", len(this_cluster_indices)) if len(this_cluster_indices) is not 0: top_10_u = sorted(u[c, this_cluster_indices], reverse=True) if len(top_10_u) >= MAX_KTH: top_10_u = top_10_u[MAX_KTH - 1] else: top_10_u = top_10_u[-1] top_10_indices = [i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u] #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10] print(" top_10:", top_10_u, ">", top_10_indices) print(u[c, this_cluster_indices][top_10_indices]) points_sequences = numpy.array(location_sequences)[this_cluster_indices][top_10_indices] color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x]) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") #print(" center:", this_cluster_indices[top_10_indices[0]], " ;distance:", distance[:, this_cluster_indices[top_10_indices[0]]]) #closest_indices = [i for i, x in enumerate(distance[:, this_cluster_indices[top_10_indices[0]]]) if x <= 0.25] #print(" closest_indices:", closest_indices) #dist_points_sequences = numpy.array(location_sequences)[closest_indices] #dist_color = range(len(dist_points_sequences)) #cpygmaps.output_patterns_l(dist_points_sequences, dist_color, len(dist_points_sequences), "./data/Summary/distance_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # Getting data users, locations = locationclustering.main() #location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC) #locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem") # Getting sequences cluster sequences = ctrajectory.split_trajectory( [a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) vector_sequences = ctrajectory.get_vector_sequence(sequences, locations) #semantic_sequences = ctrajectory.get_vector_sequence(sequences, locations, "semantic_mem") location_sequences = ctrajectory.convertto_location_sequences( sequences, locations) print("Filtering short trajectories...") fail_indices = [] for i, s in enumerate(sequences): if len(s) <= 2: fail_indices.append(i) print(" will delete #:", len(fail_indices)) sequences = numpy.delete(numpy.array(sequences), fail_indices) vector_sequences = numpy.delete(numpy.array(vector_sequences), fail_indices) #semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices) location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices) print(" remain sequences #:", len(sequences), " ,average length=", sum([len(x) for x in sequences]) / len(sequences)) #cluster_num, cluster_membership, noise = lee.line_segment_clustering(vector_sequences, semantic_sequences, "Mine", "Location", GPS_WEIGHT, ep = EPSILON, minlns = MINLNS) cluster_num, cluster_membership, noise = lee.line_segment_clustering( vector_sequences, "Mine", "Location", ep=EPSILON, minlns=MINLNS) print(cluster_membership) print("Start Outputting...") for c in range(cluster_num): this_cluster_indices = numpy.where(cluster_membership == c)[0] print(c, " >> this cluster #:", len(this_cluster_indices)) points_sequences = numpy.array( location_sequences)[this_cluster_indices] color = range(len(points_sequences)) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") users, locations = locationclustering.main() # Getting sequences cluster sequences = ctrajectory.split_trajectory( [a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) vector_sequences = ctrajectory.get_vector_sequence(sequences, locations) location_sequences = ctrajectory.convertto_location_sequences( sequences, locations) u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering( "Location", vector_sequences, CLUSTER_NUM, MAX_KTH, e=ERROR, algorithm="Original") print("Start Outputting...") for c in range(0, SC_CLUSTER_NUM): this_cluster_indices = [i for i, x in enumerate(membership) if x == c] if len(this_cluster_indices) is not 0: print(c, ">>", u[c, this_cluster_indices].shape) top_10_u = sorted(u[c, this_cluster_indices], reverse=True)[9] top_10_indices = [ i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u ] #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10] print(top_10_indices) print(u[c, this_cluster_indices][top_10_indices]) points_sequences = numpy.array( location_sequences)[this_cluster_indices][top_10_indices] color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x]) print(" color:", color) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # Getting data users, locations = locationclustering.main() #location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC) #locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem") # Getting sequences cluster sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) vector_sequences = ctrajectory.get_vector_sequence(sequences, locations) #semantic_sequences = ctrajectory.get_vector_sequence(sequences, locations, "semantic_mem") location_sequences = ctrajectory.convertto_location_sequences(sequences, locations) print("Filtering short trajectories...") fail_indices = [] for i, s in enumerate(sequences): if len(s) <= 2: fail_indices.append(i) print(" will delete #:", len(fail_indices)) sequences = numpy.delete(numpy.array(sequences), fail_indices) vector_sequences = numpy.delete(numpy.array(vector_sequences), fail_indices) #semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices) location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices) print(" remain sequences #:", len(sequences), " ,average length=", sum([len(x) for x in sequences]) / len(sequences)) #cluster_num, cluster_membership, noise = lee.line_segment_clustering(vector_sequences, semantic_sequences, "Mine", "Location", GPS_WEIGHT, ep = EPSILON, minlns = MINLNS) cluster_num, cluster_membership, noise = lee.line_segment_clustering(vector_sequences, "Mine", "Location", ep = EPSILON, minlns = MINLNS) print(cluster_membership) print("Start Outputting...") for c in range(cluster_num): this_cluster_indices = numpy.where(cluster_membership == c)[0] print(c, " >> this cluster #:", len(this_cluster_indices)) points_sequences = numpy.array(location_sequences)[this_cluster_indices] color = range(len(points_sequences)) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # Getting data users, locations = locationclustering.main() location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC) locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem") # Getting sequences cluster sequences = ctrajectory.split_trajectory( [a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) vector_sequences = ctrajectory.get_vector_sequence(sequences, locations) semantic_sequences = ctrajectory.get_vector_sequence( sequences, locations, "semantic_mem") location_sequences = ctrajectory.convertto_location_sequences( sequences, locations) print("Filtering short trajectories...") fail_indices = [] for i, s in enumerate(sequences): if len(s) <= 2: fail_indices.append(i) print(" will delete #:", len(fail_indices)) sequences = numpy.delete(numpy.array(sequences), fail_indices) vector_sequences = numpy.delete(numpy.array(vector_sequences), fail_indices) semantic_sequences = numpy.delete(numpy.array(semantic_sequences), fail_indices) location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices) print(" remain sequences #:", len(sequences)) u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering( "Location", vector_sequences, CLUSTER_NUM, MAX_KTH, semantic_sequences, e=ERROR, algorithm="2Distance") print("Start Outputting...") for c in range(CLUSTER_NUM): this_cluster_indices = [i for i, x in enumerate(membership) if x == c] print(c, " >> this cluster #:", len(this_cluster_indices)) if len(this_cluster_indices) is not 0: top_10_u = sorted(u[c, this_cluster_indices], reverse=True) if len(top_10_u) >= MAX_KTH: top_10_u = top_10_u[MAX_KTH - 1] else: top_10_u = top_10_u[-1] top_10_indices = [ i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u ] #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10] print(" top_10:", top_10_u, ">", top_10_indices) print(u[c, this_cluster_indices][top_10_indices]) points_sequences = numpy.array( location_sequences)[this_cluster_indices][top_10_indices] color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x]) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") users, locations = locationclustering.main() # Getting sequences cluster sequences = ctrajectory.split_trajectory( [a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) cluster_sequences = ctrajectory.get_cluster_sequence(sequences, locations) location_sequences = ctrajectory.convertto_location_sequences( sequences, locations) print("Filtering short trajectories...") fail_indices = [] for i, s in enumerate(cluster_sequences): if len(s) <= 2: fail_indices.append(i) print(" will delete #:", len(fail_indices)) sequences = numpy.delete(numpy.array(sequences), fail_indices) cluster_sequences = numpy.delete(numpy.array(cluster_sequences), fail_indices) location_sequences = numpy.delete(numpy.array(location_sequences), fail_indices) print(" remain sequences #:", len(sequences)) u, u0, d, jm, p, fpc, membership, distance = cfuzzy.sequences_clustering( "Cluster", vector_sequences, CLUSTER_NUM, MAX_KTH, e=ERROR, algorithm="Original") print("Start Outputting...") for c in range(CLUSTER_NUM): this_cluster_indices = [i for i, x in enumerate(membership) if x == c] print(c, " >> this cluster #:", len(this_cluster_indices)) if len(this_cluster_indices) is not 0: top_10_u = sorted(u[c, this_cluster_indices], reverse=True) if len(top_10_u) >= MAX_KTH: top_10_u = top_10_u[MAX_KTH - 1] else: top_10_u = top_10_u[-1] top_10_indices = [ i for i, x in enumerate(u[c, this_cluster_indices]) if x >= top_10_u ] #top_10_indices = sorted(range(len(u[c, this_cluster_indices])), key=lambda x: u[c, this_cluster_indices][x], reverse=True)[0:10] print(" top_10:", top_10_u, ">", top_10_indices) print(u[c, this_cluster_indices][top_10_indices]) points_sequences = numpy.array( location_sequences)[this_cluster_indices][top_10_indices] color = sorted(range(len(points_sequences)), key=lambda x: top_10_indices[x]) cpygmaps.output_patterns_l(points_sequences, color, len(points_sequences), OUTPUT_MAP + "_" + str(c) + ".html") #print(" center:", this_cluster_indices[top_10_indices[0]], " ;distance:", distance[:, this_cluster_indices[top_10_indices[0]]]) #closest_indices = [i for i, x in enumerate(distance[:, this_cluster_indices[top_10_indices[0]]]) if x <= 0.25] #print(" closest_indices:", closest_indices) #dist_points_sequences = numpy.array(location_sequences)[closest_indices] #dist_color = range(len(dist_points_sequences)) #cpygmaps.output_patterns_l(dist_points_sequences, dist_color, len(dist_points_sequences), "./data/Summary/distance_" + str(c) + ".html") print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")