def main(): """end for intersection clustering""" print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # getting data locations = clocation.get_locations_list() users = cuser.get_users_posts_afile(USER_POSTS_FILE) locations = clocation.fit_users_to_location(locations, users, "tags", "uid") del users set_location_tags(locations) set_location_user_count(locations) coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()]) intersection = get_tag_intersection(locations.values()) location_frequency = numpy.array([x.usercount for x in locations.values()]) print("avg location_frequency:", sum(location_frequency) / len(location_frequency), " max:", max(location_frequency), " min:", min(location_frequency)) print("location 1:", list(locations.values())[0].lname, list(locations.values())[0].lid) print("intersection.sum:", intersection.sum(axis=0)[0:6], intersection.sum(axis=1)[0:6]) print("location_frequency:", location_frequency.shape) # original intersect clustering #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR) # intersect clustering with the kth locations in each cluster #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster") # intersect clustering with the kth locations in each cluster & location frequency as weight cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect( coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, location_frequency, w=WEIGHT, e=ERROR, algorithm="kthCluster_LocationFrequency") for i, key in enumerate(locations.keys()): setattr(locations[key], "cluster", membership[i]) cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \ membership, CLUSTER_NUM, OUTPUT_MAP) cfuzzy.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def output_representatives(data, u, k): """ cpygmaps.output_clusters(\ [(float(x[0]), float(x[1]), "") \ for x in cntr], range(30), 30, "./data/Result/center" + str(R) + ".html") """ representatives = [] membership = [] for i in range(u.shape[0]): indices = numpy.where(u[i, :] >= sorted(u[i, :], reverse=True)[k - 1])[0] representatives.extend(data[indices, :]) membership.extend([i] * len(indices)) cpygmaps.output_clusters([(float(x[0]), float(x[1]), str(mem)) \ for x, mem in zip(representatives, membership)], membership, u.shape[0], OUTPUT_REPRESENTATIVES)
def main(): """end for intersection clustering""" print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # getting data locations = clocation.get_locations_list() users = cuser.get_users_posts_afile(USER_POSTS_FILE) locations = clocation.fit_users_to_location(locations, users, "tags", "uid") del users set_location_tags(locations) set_location_user_count(locations) coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()]) intersection = get_tag_intersection(locations.values()) location_frequency = numpy.array([x.usercount for x in locations.values()]) print("avg location_frequency:", sum(location_frequency) / len(location_frequency), " max:", max(location_frequency), " min:", min(location_frequency)) print("location 1:", list(locations.values())[0].lname, list(locations.values())[0].lid) print("intersection.sum:", intersection.sum(axis=0)[0:6], intersection.sum(axis=1)[0:6]) print("location_frequency:", location_frequency.shape) # original intersect clustering #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR) # intersect clustering with the kth locations in each cluster #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster") # intersect clustering with the kth locations in each cluster & location frequency as weight cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, location_frequency, w=WEIGHT, e=ERROR, algorithm="kthCluster_LocationFrequency") for i, key in enumerate(locations.keys()): setattr(locations[key], "cluster", membership[i]) cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \ membership, CLUSTER_NUM, OUTPUT_MAP) cfuzzy.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")