def main(*argv): start_time = datetime.datetime.now() print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # set parameters global CLUSTER_NUM global MAX_KTH global GPS_WEIGHT global FILTER_TIME_S global FILTER_TIME_E global OUTPUT_MAP global OUTPUT_PATTERN if len(argv) > 0: CLUSTER_NUM = argv[0] MAX_KTH = argv[1] GPS_WEIGHT = argv[2] FILTER_TIME_S = argv[3] FILTER_TIME_E = argv[4] LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_OCT_c35.txt" else: LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_NOVDEC_c35.txt" OUTPUT_MAP = OUTPUT_MAP + str(CLUSTER_NUM) + "k" + str(MAX_KTH) + "w" + str(GPS_WEIGHT) OUTPUT_PATTERN = OUTPUT_PATTERN + str(CLUSTER_NUM) + "k" + str(MAX_KTH) # Getting data users, locations = locationclustering.main(FILTER_TIME_S, FILTER_TIME_E) location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC) locations = ccluster.fit_locations_membership(locations, numpy.transpose(doc_topic), location_id, "semantic_mem") print(" users # :", len(users)) # Getting sequences of posts & locations #sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) sequences = ctrajectory.split_trajectory_byday([a_user.posts for a_user in users.values() if len(a_user.posts) != 0]) sequences = ctrajectory.remove_adjacent_location(sequences) #sequences = get_specific(sequences) sequences = ctrajectory.remove_short(sequences) print(" remain users #:", len(set([x[0].uid for x in sequences]))) location_sequences, longest_len = ctrajectory.convertto_location_sequences(sequences, locations) spatial_array = ctrajectory.get_vector_array(location_sequences, longest_len) semantic_array = ctrajectory.get_vector_array(location_sequences, longest_len, "semantic_mem") u, u0, d, jm, p, fpc, center, membership = cfuzzy.sequences_clustering_i("Location", spatial_array, CLUSTER_NUM, MAX_KTH, semantic_array, GPS_WEIGHT, e = ERROR, algorithm="2WeightedDistance") #sequences, location_sequences, membership = filter_sequence(sequences, location_sequences, u, membership) #ouput_pattern(sequences, location_sequences, u, membership) #output_each_pattern(sequences, location_sequences, u, membership, 30) #ctrajectory.output_clusters(sequences, membership, u, OUTPUT_PATTERN) #output_cluster_detail(sequences, location_sequences, u, membership, 39, file = OUTPUT_ANALYSIS) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now()), ", SPEND:", datetime.datetime.now() - start_time) print("--------------------------------------") return location_sequences, spatial_array, semantic_array, u
def main(*argv): start_time = datetime.datetime.now() print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # set parameters global CLUSTER_NUM global MAX_KTH global FILTER_TIME_S global FILTER_TIME_E global OUTPUT_MAP global OUTPUT_PATTERN if len(argv) > 0: CLUSTER_NUM = argv[0] MAX_KTH = argv[1] FILTER_TIME_S = argv[3] FILTER_TIME_E = argv[4] LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_OCT_c35.txt" else: LOCATION_TOPIC = "./data/LocationTopic/LocationTopic_NOVDEC_c35.txt" OUTPUT_MAP = OUTPUT_MAP + str(CLUSTER_NUM) + "k" + str(MAX_KTH) OUTPUT_PATTERN = OUTPUT_PATTERN + str(CLUSTER_NUM) + "k" + str(MAX_KTH) # Getting data users, locations = locationclustering.main(FILTER_TIME_S, FILTER_TIME_E) location_id, doc_topic = ccluster.open_doc_topic(LOCATION_TOPIC) semantic_cluster = numpy.argmax(doc_topic, axis = 1) locations = ccluster.fit_locations_cluster(locations, semantic_cluster, location_id, "semantic_cluster") print(" users # :", len(users)) # Getting sequences cluster #sequences = ctrajectory.split_trajectory([a_user.posts for a_user in users.values() if len(a_user.posts) != 0], SPLIT_DAY) sequences = ctrajectory.split_trajectory_byday([a_user.posts for a_user in users.values() if len(a_user.posts) != 0]) sequences = ctrajectory.remove_adjacent_location(sequences) #sequences = get_specific(sequences) sequences = ctrajectory.remove_short(sequences) print(" remain users #:", len(set([x[0].uid for x in sequences]))) location_sequences, longest_len = ctrajectory.convertto_location_sequences(sequences, locations) spatial_sequences, semantic_sequences = ctrajectory.get_cluster_array(location_sequences, longest_len, "cluster", "semantic_cluster") u, u0, d, jm, p, fpc, center, membership = cfuzzy.sequences_clustering_i("Cluster", spatial_sequences, CLUSTER_NUM, MAX_KTH, semantic_sequences, GPS_WEIGHT, e = ERROR, algorithm="2WeightedDistance") #sequences, location_sequences, membership = filter_sequence(sequences, location_sequences, u, membership) #ouput_pattern(sequences, location_sequences, u, membership) output_each_pattern(sequences, location_sequences, u, membership, 30) ctrajectory.output_clusters(sequences, membership, u, OUTPUT_PATTERN) #output_cluster_detail(sequences, location_sequences, u, membership, 26, file = OUTPUT_ANALYSIS) """ ll = count(sequences, location_sequences, u, membership, 40) tag = from_to(location_sequences, membership, 40, [ll[0], ll[1], ll[2]]) tag.extend(from_to(location_sequences, membership, 40, [ll[1], ll[0], ll[2]])) tag.extend(from_to(location_sequences, membership, 40, [ll[0], ll[0]])) tag.extend(from_to(location_sequences, membership, 40, [ll[1], ll[1]])) tag.extend(from_to(location_sequences, membership, 40, [ll[1], ll[2], ll[0]])) tag.extend(from_to(location_sequences, membership, 40, [ll[2], ll[1]])) f = open("./data/Result/Ana_40.txt", "a") f.write("\naboves:" + str(len(set(tag)))) f.close() """ print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now()), ", SPEND:", datetime.datetime.now() - start_time) print("--------------------------------------") return location_sequences, spatial_sequences, semantic_sequences, u