def __merge_clusters(self): """ Naive approach""" min_distance = float('inf') closest_clusters = None for idx_a, cluster_a in enumerate(self.__clusters): for cluster_b in self.__clusters[idx_a + 1:]: distance = cluster_a.distance(cluster_b) if distance < min_distance: min_distance = distance closest_clusters = [cluster_a, cluster_b] merged_cluster = Cluster(None, None) merged_cluster.points = closest_clusters[0].points + closest_clusters[ 1].points merged_cluster.indexes = closest_clusters[ 0].indexes + closest_clusters[1].indexes merged_cluster.center = [0] * self.__dimension for point in merged_cluster.points: for idx_coord, coord in enumerate(point): merged_cluster.center[idx_coord] += coord merged_cluster.center = [ coord / len(merged_cluster) for coord in merged_cluster.center ] self.__clusters.remove(closest_clusters[0]) self.__clusters.remove(closest_clusters[1]) self.__clusters.append(merged_cluster)
def clustering(self): self.__build_priority_queue(self.__compute_distances()) old_clusters = [] while len(self.__clusters) > self.__number_of_clusters: min_distance, min_heap_node = heapq.heappop(self.__heap) closest_clusters = min_heap_node[1] if not self.__valid_heap_node(min_heap_node, old_clusters): continue str_closest_clusters = list(map(str, closest_clusters)) closest_clusters_objs = [ self.__clusters[str_closest_clusters[0]], self.__clusters[str_closest_clusters[1]] ] merged_cluster = Cluster(None, None) merged_cluster.points = closest_clusters_objs[ 0].points + closest_clusters_objs[1].points merged_cluster.indexes = closest_clusters_objs[ 0].indexes + closest_clusters_objs[1].indexes merged_cluster.center = self.__compute_centroid(merged_cluster) del self.__clusters[str_closest_clusters[0]] del self.__clusters[str_closest_clusters[1]] old_clusters.extend(closest_clusters) self.__update_heap(merged_cluster) self.__clusters[str(merged_cluster.indexes)] = merged_cluster
def __create_clusters(self, centroids, mtx): """ Given a list of centroids and a matrix of assigned points, create cluster objects and store them """ #for each centroid: for i in range(centroids.shape[0]): #create base cluster cluster_W = Cluster(None, None) #set center #take sum of points and divide by size of group cluster_W.center = list(centroids[i,:self.__dims] / centroids[i,-2]) #add to list of clusters self.__clusters.append(cluster_W) #iterating through matrix to store values for i in range(mtx.shape[0]): #identify assigned centroid cent = mtx[i,-1] #get points point = list(mtx[i,:self.__dims]) #add to the right cluster self.__clusters[int(cent)].points.append(point) self.__clusters[int(cent)].indexes.append(i)