Example #1
0
    def __improve_structure(self, clusters, centers):
        """!
        @brief Check for best structure: divides each cluster into two and checks for best results using splitting criterion.
        
        @param[in] clusters (list): Clusters that have been allocated (each cluster contains indexes of points from data).
        @param[in] centers (list): Centers of clusters.
        
        @return (list) Allocated centers for clustering.
        
        """

        difference = 0.001

        allocated_centers = []

        for index_cluster in range(len(clusters)):
            # split cluster into two child clusters
            parent_child_centers = []
            parent_child_centers.append(
                list_math_addition_number(centers[index_cluster], -difference))
            parent_child_centers.append(
                list_math_addition_number(centers[index_cluster], difference))

            # solve k-means problem for children where data of parent are used.
            (parent_child_clusters,
             parent_child_centers) = self.__improve_parameters(
                 parent_child_centers, clusters[index_cluster])

            # If it's possible to split current data
            if (len(parent_child_clusters) > 1):
                # Calculate splitting criterion
                parent_scores = self.__splitting_criterion(
                    [clusters[index_cluster]], [centers[index_cluster]])
                child_scores = self.__splitting_criterion(
                    [parent_child_clusters[0], parent_child_clusters[1]],
                    parent_child_centers)

                split_require = False

                # Reallocate number of centers (clusters) in line with scores
                if (self.__criterion ==
                        splitting_type.BAYESIAN_INFORMATION_CRITERION):
                    if (parent_scores < child_scores): split_require = True

                elif (self.__criterion ==
                      splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH):
                    # If its score for the split structure with two children is smaller than that for the parent structure,
                    # then representing the data samples with two clusters is more accurate in comparison to a single parent cluster.
                    if (parent_scores > child_scores): split_require = True

                if (split_require is True):
                    allocated_centers.append(parent_child_centers[0])
                    allocated_centers.append(parent_child_centers[1])
                else:
                    allocated_centers.append(centers[index_cluster])

            else:
                allocated_centers.append(centers[index_cluster])

        return allocated_centers
Example #2
0
    def __improve_structure(self, clusters, centers):
        """!
        @brief Check for best structure: divides each cluster into two and checks for best results using splitting criterion.
        
        @param[in] clusters (list): Clusters that have been allocated (each cluster contains indexes of points from data).
        @param[in] centers (list): Centers of clusters.
        
        @return (list) Allocated centers for clustering.
        
        """
         
        difference = 0.001;
          
        allocated_centers = [];
          
        for index_cluster in range(len(clusters)):
            # split cluster into two child clusters
            parent_child_centers = [];
            parent_child_centers.append(list_math_addition_number(centers[index_cluster], -difference));
            parent_child_centers.append(list_math_addition_number(centers[index_cluster], difference));
          
            # solve k-means problem for children where data of parent are used.
            (parent_child_clusters, parent_child_centers) = self.__improve_parameters(parent_child_centers, clusters[index_cluster]);
              
            # If it's possible to split current data
            if (len(parent_child_clusters) > 1):
                # Calculate splitting criterion
                parent_scores = self.__splitting_criterion([ clusters[index_cluster] ], [ centers[index_cluster] ]);
                child_scores = self.__splitting_criterion([ parent_child_clusters[0], parent_child_clusters[1] ], parent_child_centers);
              
                split_require = False;
                
                # Reallocate number of centers (clusters) in line with scores        
                if (self.__criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION):
                    if (parent_scores < child_scores): split_require = True;
                    
                elif (self.__criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH):
                    if (parent_scores > child_scores): split_require = True;
                    
                if (split_require is True):
                    allocated_centers.append(parent_child_centers[0]);
                    allocated_centers.append(parent_child_centers[1]);
                else:
                    allocated_centers.append(centers[index_cluster]);

                    
            else:
                allocated_centers.append(centers[index_cluster]);
          
        return allocated_centers;