コード例 #1
0
    def mClustering(dps, t):
        """
        Perform m clustering.
        dps: data points, t: theta

        return the left boundary of desired range
        """
        num_of_data_points = len(dps)                           # Number of data points
        keep_clustering = True
        k = 0               # used to generate the list of power of 2, e.g. k=0, 2**k=1...etc
        while keep_clustering:
            k += 1
            if 2**(k) > num_of_data_points:
                print "Special case (2v > N), k* = n:", num_of_data_points
                quit()
            cluster_v, cohesion_v = km.kmeans(dps, 2**(k-1))
            cluster_2v, cohesion_2v = km.kmeans(dps, 2**k)
            rate_of_chg = abs(cohesion_v - cohesion_2v) / (cohesion_v*abs(2**(k-1)))
            #print rate_of_chg
            if rate_of_chg < t: keep_clustering = False
        #print "Pick interval:", 2**(k-1), 2**k
        return 2**(k-2)
コード例 #2
0
    def binarySearch(left, dps, t):
        """
        Perform binary search among the range [x,y], where y>x, z is the midpoint
        left: left boundary of desired range, dps: data points, t: theta

        return k-star
        """
        right = 2*left
        kstar = 0
        keep_search = True
        while keep_search:
            midpoint = (right+left)/2                   # midpoint for binary search
            cluster_m, cohesion_m = km.kmeans(dps, midpoint)
            cluster_r, cohesion_r = km.kmeans(dps, right)
            rate_of_chg = abs(cohesion_r - cohesion_m) / (cohesion_m*abs(right-midpoint))
            #print rate_of_chg
            if rate_of_chg > t: left = midpoint         # [z,y] is qualified => stay at [z,y]
            if rate_of_chg < t: right = midpoint        # [z,y] is not qualified => switch to [x,z]
            if right-left == 1: keep_search = False     # Criteria is met, break loop...
        cluster_l, cohesion_l = km.kmeans(dps, left)    # ... now output x or y,
        cluster_r, cohesion_r = km.kmeans(dps, right)   # whichever gives better cohesion,
        if cohesion_l < cohesion_r: kstar = left        # in other word, smaller diameter.
        else: kstar = right
        return kstar                                    # print kstar