Beispiel #1
0
def kmeans(points, k, cutoff):
    tmp = []
    for p in points:
        print p
        try:
            lat = float(p[1])
            lon = float(p[2])
            tmp.append(Point([lat, lon]))
        except:
            continue
    points = tmp
    # Randomly sample k Points from the points list, build Clusters around them
    initial = random.sample(points, k)
    clusters = []
    for p in initial:
        clusters.append(Cluster([p]))
    print "  clusters: %s" % clusters
    # Enter the program loop
    while True:
        # Make a list for each Cluster
        lists = []
        for c in clusters:
            lists.append([])
        # For each Point:
        for p in points:
            # Figure out which Cluster's centroid is the nearest
            smallest_distance = dist_on_earth(p.coords,
                                              clusters[0].centroid.coords)
            index = 0
            for i in range(len(clusters[1:])):
                distance = dist_on_earth(p.coords,
                                         clusters[i + 1].centroid.coords)
                if distance < smallest_distance:
                    smallest_distance = distance
                    index = i + 1
            # Add this Point to that Cluster's corresponding list
            lists[index].append(p)
        # Update each Cluster with the corresponding list
        # Record the biggest centroid shift for any Cluster
        biggest_shift = 0.0
        for i in range(len(clusters)):
            shift = clusters[i].update(lists[i])
            biggest_shift = max(biggest_shift, shift)
        # If the biggest centroid shift is less than the cutoff, stop
        if biggest_shift < cutoff: break
    tmp = []
    for c in clusters:
        tmp.append([len(c.points), c.centroid.coords])
    return tmp
    # Return the list of cluster attributes
    return tmp
def kmeans(points, k, cutoff):
    tmp = []
    for p in points:
        print p
        try:
            lat = float(p[1])
            lon = float(p[2])
            tmp.append(Point([lat,lon]))
        except:
            continue
    points = tmp
    # Randomly sample k Points from the points list, build Clusters around them
    initial = random.sample(points, k)
    clusters = []
    for p in initial: clusters.append(Cluster([p]))
    print "  clusters: %s" % clusters
    # Enter the program loop
    while True:
        # Make a list for each Cluster
        lists = []
        for c in clusters: lists.append([])
        # For each Point:
        for p in points:
            # Figure out which Cluster's centroid is the nearest
            smallest_distance = dist_on_earth(p.coords, clusters[0].centroid.coords)
            index = 0
            for i in range(len(clusters[1:])):
                distance = dist_on_earth(p.coords, clusters[i+1].centroid.coords)
                if distance < smallest_distance:
                    smallest_distance = distance
                    index = i+1
            # Add this Point to that Cluster's corresponding list
            lists[index].append(p)
        # Update each Cluster with the corresponding list
        # Record the biggest centroid shift for any Cluster
        biggest_shift = 0.0
        for i in range(len(clusters)):
            shift = clusters[i].update(lists[i])
            biggest_shift = max(biggest_shift, shift)
        # If the biggest centroid shift is less than the cutoff, stop
        if biggest_shift < cutoff: break
    tmp = []
    for c in clusters:
        tmp.append([len(c.points),c.centroid.coords])
    return tmp
    # Return the list of cluster attributes
    return tmp
Beispiel #3
0
 def update(self, points):
     old_centroid = self.centroid
     self.points = points
     self.centroid = self.calculateCentroid()
     return dist_on_earth(old_centroid.coords, self.centroid.coords)
 def update(self, points):
     old_centroid = self.centroid
     self.points = points
     self.centroid = self.calculateCentroid()
     return dist_on_earth(old_centroid.coords, self.centroid.coords)