コード例 #1
0
    def __merge_clusters(self):
        """ Naive approach"""
        min_distance = float('inf')
        closest_clusters = None

        for idx_a, cluster_a in enumerate(self.__clusters):
            for cluster_b in self.__clusters[idx_a + 1:]:
                distance = cluster_a.distance(cluster_b)
                if distance < min_distance:
                    min_distance = distance
                    closest_clusters = [cluster_a, cluster_b]

        merged_cluster = Cluster(None, None)
        merged_cluster.points = closest_clusters[0].points + closest_clusters[
            1].points
        merged_cluster.indexes = closest_clusters[
            0].indexes + closest_clusters[1].indexes

        merged_cluster.center = [0] * self.__dimension
        for point in merged_cluster.points:
            for idx_coord, coord in enumerate(point):
                merged_cluster.center[idx_coord] += coord

        merged_cluster.center = [
            coord / len(merged_cluster) for coord in merged_cluster.center
        ]

        self.__clusters.remove(closest_clusters[0])
        self.__clusters.remove(closest_clusters[1])
        self.__clusters.append(merged_cluster)
コード例 #2
0
    def clustering(self):
        self.__build_priority_queue(self.__compute_distances())
        old_clusters = []
        while len(self.__clusters) > self.__number_of_clusters:
            min_distance, min_heap_node = heapq.heappop(self.__heap)
            closest_clusters = min_heap_node[1]

            if not self.__valid_heap_node(min_heap_node, old_clusters):
                continue

            str_closest_clusters = list(map(str, closest_clusters))
            closest_clusters_objs = [
                self.__clusters[str_closest_clusters[0]],
                self.__clusters[str_closest_clusters[1]]
            ]

            merged_cluster = Cluster(None, None)
            merged_cluster.points = closest_clusters_objs[
                0].points + closest_clusters_objs[1].points
            merged_cluster.indexes = closest_clusters_objs[
                0].indexes + closest_clusters_objs[1].indexes
            merged_cluster.center = self.__compute_centroid(merged_cluster)

            del self.__clusters[str_closest_clusters[0]]
            del self.__clusters[str_closest_clusters[1]]
            old_clusters.extend(closest_clusters)
            self.__update_heap(merged_cluster)
            self.__clusters[str(merged_cluster.indexes)] = merged_cluster
コード例 #3
0
    def __create_clusters(self, centroids, mtx):
        """
        Given a list of centroids and a matrix of assigned points, create cluster objects
        and store them
        """

        #for each centroid:
        for i in range(centroids.shape[0]):

            #create base cluster
            cluster_W = Cluster(None, None)
            #set center
            #take sum of points and divide by size of group
            cluster_W.center = list(centroids[i,:self.__dims] / centroids[i,-2])
            #add to list of clusters
            self.__clusters.append(cluster_W)
        #iterating through matrix to store values
        for i in range(mtx.shape[0]):
            #identify assigned centroid
            cent = mtx[i,-1]
            #get points
            point = list(mtx[i,:self.__dims])
            #add to the right cluster
            self.__clusters[int(cent)].points.append(point)
            self.__clusters[int(cent)].indexes.append(i)