Example #1
0
def __minimum_noiseless_description_length(data, clusters, centers):
    """
    @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.

    @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
    @param[in] centers (list): Centers of the clusters.

    @return (double) Returns splitting criterion in line with bayesian information criterion. 
            Low value of splitting cretion means that current structure is much better.

    """

    scores = [0.0] * len(clusters)

    W = 0.0
    K = len(clusters)
    N = 0.0

    sigma_sqrt = 0.0

    alpha = 0.9
    betta = 0.9

    for index_cluster in range(0, len(clusters), 1):
        for index_object in clusters[index_cluster]:
            delta_vector = list_math_subtraction(data[index_object],
                                                 centers[index_cluster])
            delta_sqrt = sum(
                list_math_multiplication(delta_vector, delta_vector))

            W += delta_sqrt
            sigma_sqrt += delta_sqrt

        N += len(clusters[index_cluster])

    if (N - K != 0):
        W /= N

        sigma_sqrt /= (N - K)
        sigma = sigma_sqrt**0.5

        for index_cluster in range(0, len(clusters), 1):
            Kw = (1.0 - K / N) * sigma_sqrt
            Ks = (2.0 * alpha * sigma / (N**0.5)) + (
                (alpha**2.0) * sigma_sqrt / N + W - Kw / 2.0)**0.5
            U = W - Kw + 2.0 * (alpha**2.0) * sigma_sqrt / N + Ks

            Z = K * sigma_sqrt / N + U + betta * (
                (2.0 * K)**0.5) * sigma_sqrt / N

            if (Z == 0.0):
                scores[index_cluster] = float("inf")
            else:
                scores[index_cluster] = Z

    else:
        scores = [float("inf")] * len(clusters)

    return sum(scores)
Example #2
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """
                
        scores = [0.0] * len(clusters);
        
        W = 0.0;
        K = len(clusters);
        N = 0.0;

        sigma_sqrt = 0.0;
        
        alpha = 0.9;
        betta = 0.9;
                
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]);
                delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector));
                
                W += delta_sqrt;
                sigma_sqrt += delta_sqrt;
            
            N += len(clusters[index_cluster]);     
        
        if (N - K != 0):
            W /= N;
            
            sigma_sqrt /= (N - K);
            sigma = sigma_sqrt ** 0.5;
            
            for index_cluster in range(0, len(clusters), 1):
                Kw = (1.0 - K / N) * sigma_sqrt;
                Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
                U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks;
                
                Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N;
                
                if (Z == 0.0):
                    scores[index_cluster] = float("inf");
                else:
                    scores[index_cluster] = Z;
                
        else:
            scores = [float("inf")] * len(clusters);
        
        return sum(scores);
Example #3
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """
                
        scores = [0.0] * len(clusters);
        
        W = 0.0;
        K = len(clusters);
        N = 0.0;

        sigma_sqrt = 0.0;
        
        alpha = 0.9;
        betta = 0.9;
                
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]);
                delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector));
                
                W += delta_sqrt;
                sigma_sqrt += delta_sqrt;
            
            N += len(clusters[index_cluster]);     
        
        if (N - K != 0):
            W /= N;
            
            sigma_sqrt /= (N - K);
            sigma = sigma_sqrt ** 0.5;
            
            for index_cluster in range(0, len(clusters), 1):
                Kw = (1.0 - K / N) * sigma_sqrt;
                Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
                U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks;
                
                Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N;
                
                if (Z == 0.0):
                    scores[index_cluster] = float("inf");
                else:
                    scores[index_cluster] = Z;
                
        else:
            scores = [float("inf")] * len(clusters);
        
        return sum(scores);
Example #4
0
 def __sub__(self, entry):
     """!
     @brief Overloaded operator sub. Performs substraction of two clustering features.
     @details Substraction can't be performed with clustering feature whose description is less then substractor.
     
     @param[in] entry (cfentry): Entry that is substracted from the current.
     
     @return (cfentry) Result of substraction of two clustering features.
     
     """
             
     number_points = self.number_points - entry.number_points
     result_linear_sum = list_math_subtraction(self.linear_sum, entry.linear_sum)
     result_square_sum = self.square_sum - entry.square_sum
     
     if (number_points < 0) or (result_square_sum < 0):
         raise NameError("Substruction with negative result is not possible for clustering features.")
     
     return cfentry(number_points, result_linear_sum, result_square_sum)
Example #5
0
 def __sub__(self, entry):
     """!
     @brief Overloaded operator sub. Performs substraction of two clustering features.
     @details Substraction can't be performed with clustering feature whose description is less then substractor.
     
     @param[in] entry (cfentry): Entry that is substracted from the current.
     
     @return (cfentry) Result of substraction of two clustering features.
     
     """
             
     number_points = self.number_points - entry.number_points;
     linear_sum = list_math_subtraction(self.linear_sum, entry.linear_sum);        
     square_sum = self.square_sum - entry.square_sum;
     
     if ( (number_points < 0) or (square_sum < 0) ):
         raise NameError("Substruction with negative result is not possible for clustering features.");
     
     return cfentry(number_points, linear_sum, square_sum);