Example #1
0
def main():
    """end for intersection clustering"""

    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
    # getting data
    locations = clocation.get_locations_list()
    users = cuser.get_users_posts_afile(USER_POSTS_FILE)
    locations = clocation.fit_users_to_location(locations, users, "tags",
                                                "uid")
    del users
    set_location_tags(locations)
    set_location_user_count(locations)

    coordinate = numpy.array([(float(x.lat), float(x.lng))
                              for x in locations.values()])
    intersection = get_tag_intersection(locations.values())
    location_frequency = numpy.array([x.usercount for x in locations.values()])
    print("avg location_frequency:",
          sum(location_frequency) / len(location_frequency), " max:",
          max(location_frequency), " min:", min(location_frequency))

    print("location 1:",
          list(locations.values())[0].lname,
          list(locations.values())[0].lid)
    print("intersection.sum:",
          intersection.sum(axis=0)[0:6],
          intersection.sum(axis=1)[0:6])
    print("location_frequency:", location_frequency.shape)
    # original intersect clustering
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR)

    # intersect clustering with the kth locations in each cluster
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM,  MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster")

    # intersect clustering with the kth locations in each cluster & location frequency as weight
    cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(
        coordinate.T,
        intersection,
        CLUSTER_NUM,
        MAX_KTH,
        location_frequency,
        w=WEIGHT,
        e=ERROR,
        algorithm="kthCluster_LocationFrequency")

    for i, key in enumerate(locations.keys()):
        setattr(locations[key], "cluster", membership[i])

    cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \
        membership, CLUSTER_NUM, OUTPUT_MAP)

    cfuzzy.output_location_cluster(locations.values(), "cluster",
                                   OUTPUT_CLUSTER)

    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")
Example #2
0
def output_representatives(data, u, k):
    """
    cpygmaps.output_clusters(\
        [(float(x[0]), float(x[1]), "") \
            for x in cntr], range(30), 30, "./data/Result/center" + str(R) + ".html")
    """
    representatives = []
    membership = []
    for i in range(u.shape[0]):
        indices = numpy.where(u[i, :] >= sorted(u[i, :], reverse=True)[k - 1])[0]
        representatives.extend(data[indices, :])
        membership.extend([i] * len(indices))
    cpygmaps.output_clusters([(float(x[0]), float(x[1]), str(mem)) \
        for x, mem in zip(representatives, membership)], membership, u.shape[0], OUTPUT_REPRESENTATIVES)
Example #3
0
def main():
    """end for intersection clustering"""

    print("--------------------------------------")
    print("STARTTIME:", (datetime.datetime.now()))
    print("--------------------------------------")    
    # getting data
    locations = clocation.get_locations_list()
    users = cuser.get_users_posts_afile(USER_POSTS_FILE)
    locations = clocation.fit_users_to_location(locations, users, "tags", "uid")
    del users
    set_location_tags(locations)
    set_location_user_count(locations)

    coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()])
    intersection = get_tag_intersection(locations.values())
    location_frequency = numpy.array([x.usercount for x in locations.values()])
    print("avg location_frequency:", sum(location_frequency) / len(location_frequency), " max:", max(location_frequency), " min:", min(location_frequency))

    print("location 1:", list(locations.values())[0].lname, list(locations.values())[0].lid)
    print("intersection.sum:", intersection.sum(axis=0)[0:6], intersection.sum(axis=1)[0:6])
    print("location_frequency:", location_frequency.shape)
    # original intersect clustering
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR)
    
    # intersect clustering with the kth locations in each cluster
    #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM,  MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster")
    
    # intersect clustering with the kth locations in each cluster & location frequency as weight
    cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM,  MAX_KTH, location_frequency, w=WEIGHT, e=ERROR, algorithm="kthCluster_LocationFrequency")
    
    for i, key in enumerate(locations.keys()):
        setattr(locations[key], "cluster", membership[i])

    cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \
        membership, CLUSTER_NUM, OUTPUT_MAP)

    cfuzzy.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER)


    print("--------------------------------------")
    print("ENDTIME:", (datetime.datetime.now()))
    print("--------------------------------------")