Ejemplo n.º 1
0
 def get_radius(self):
     """!
     @brief Calculates radius of cluster that is represented by the entry.
     @details It's calculated once when it's requested after the last changes.
     
     @return (double) Radius of cluster that is represented by the entry.
     
     """
     
     if self.__radius is not None:
         return self.__radius
     
     centroid = self.get_centroid()
     
     radius_part_1 = self.square_sum
     
     if type(centroid) == list:
         radius_part_2 = 2.0 * sum(list_math_multiplication(self.linear_sum, centroid))
         radius_part_3 = self.number_points * sum(list_math_multiplication(centroid, centroid))
     else:
         radius_part_2 = 2.0 * self.linear_sum * centroid
         radius_part_3 = self.number_points * centroid * centroid
     
     self.__radius = ((1.0 / self.number_points) * (radius_part_1 - radius_part_2 + radius_part_3)) ** 0.5
     return self.__radius
Ejemplo n.º 2
0
 def get_radius(self):
     """!
     @brief Calculates radius of cluster that is represented by the entry.
     @details It's calculated once when it's requested after the last changes.
     
     @return (double) Radius of cluster that is represented by the entry.
     
     """
     
     if (self.__radius is not None):
         return self.__radius;
     
     centroid = self.get_centroid();
     
     radius_part_1 = self.square_sum;
     
     radius_part_2 = 0.0;
     radius_part_3 = 0.0;
     
     if (type(centroid) == list):
         radius_part_2 = 2.0 * sum(list_math_multiplication(self.linear_sum, centroid));
         radius_part_3 = self.number_points * sum(list_math_multiplication(centroid, centroid));
     else:
         radius_part_2 = 2.0 * self.linear_sum * centroid;
         radius_part_3 = self.number_points * centroid * centroid;
     
     self.__radius = ( (1.0 / self.number_points) * (radius_part_1 - radius_part_2 + radius_part_3) ) ** 0.5;
     return self.__radius;
Ejemplo n.º 3
0
    def __get_variance_increase_distance(self, entry):
        """!
        @brief Calculates variance increase distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Variance increase distance.
        
        """

        linear_part_12 = list_math_addition(self.linear_sum, entry.linear_sum)
        variance_part_first = (self.square_sum + entry.square_sum) - \
            2.0 * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points) + \
            (self.number_points + entry.number_points) * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points)**2.0

        linear_part_11 = sum(
            list_math_multiplication(self.linear_sum, self.linear_sum))
        variance_part_second = -(self.square_sum -
                                 (2.0 * linear_part_11 / self.number_points) +
                                 (linear_part_11 / self.number_points))

        linear_part_22 = sum(
            list_math_multiplication(entry.linear_sum, entry.linear_sum))
        variance_part_third = -(
            entry.square_sum - (2.0 / entry.number_points) * linear_part_22 +
            entry.number_points *
            (1.0 / entry.number_points**2.0) * linear_part_22)

        return (variance_part_first + variance_part_second +
                variance_part_third)
Ejemplo n.º 4
0
    def __get_average_intra_cluster_distance(self, entry):
        """!
        @brief Calculates average intra cluster distance between current and specified clusters.

        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.

        @return (double) Average intra cluster distance.

        """

        linear_part_first = list_math_addition(
            self.linear_sum, entry.linear_sum
        )
        linear_part_second = linear_part_first

        linear_part_distance = sum(
            list_math_multiplication(linear_part_first, linear_part_second)
        )

        general_part_distance = (
            2.0
            * (self.number_points + entry.number_points)
            * (self.square_sum + entry.square_sum)
            - 2.0 * linear_part_distance
        )

        return (
            general_part_distance
            / (
                (self.number_points + entry.number_points)
                * (self.number_points + entry.number_points - 1.0)
            )
        ) ** 0.5
Ejemplo n.º 5
0
    def get_diameter(self):
        """!
        @brief Calculates diameter of cluster that is represented by the entry.
        @details It's calculated once when it's requested after the last changes.

        @return (double) Diameter of cluster that is represented by the entry.

        """

        if self.__diameter is not None:
            return self.__diameter

        if type(self.linear_sum) == list:
            diameter_part = (
                self.square_sum * self.number_points
                - 2.0
                * sum(
                    list_math_multiplication(self.linear_sum, self.linear_sum)
                )
                + self.square_sum * self.number_points
            )
        else:
            diameter_part = (
                self.square_sum * self.number_points
                - 2.0 * self.linear_sum * self.linear_sum
                + self.square_sum * self.number_points
            )

        self.__diameter = (
            diameter_part / (self.number_points * (self.number_points - 1))
        ) ** 0.5
        return self.__diameter
Ejemplo n.º 6
0
def __minimum_noiseless_description_length(data, clusters, centers):
    """
    @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.

    @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
    @param[in] centers (list): Centers of the clusters.

    @return (double) Returns splitting criterion in line with bayesian information criterion. 
            Low value of splitting cretion means that current structure is much better.

    """

    scores = [0.0] * len(clusters)

    W = 0.0
    K = len(clusters)
    N = 0.0

    sigma_sqrt = 0.0

    alpha = 0.9
    betta = 0.9

    for index_cluster in range(0, len(clusters), 1):
        for index_object in clusters[index_cluster]:
            delta_vector = list_math_subtraction(data[index_object],
                                                 centers[index_cluster])
            delta_sqrt = sum(
                list_math_multiplication(delta_vector, delta_vector))

            W += delta_sqrt
            sigma_sqrt += delta_sqrt

        N += len(clusters[index_cluster])

    if (N - K != 0):
        W /= N

        sigma_sqrt /= (N - K)
        sigma = sigma_sqrt**0.5

        for index_cluster in range(0, len(clusters), 1):
            Kw = (1.0 - K / N) * sigma_sqrt
            Ks = (2.0 * alpha * sigma / (N**0.5)) + (
                (alpha**2.0) * sigma_sqrt / N + W - Kw / 2.0)**0.5
            U = W - Kw + 2.0 * (alpha**2.0) * sigma_sqrt / N + Ks

            Z = K * sigma_sqrt / N + U + betta * (
                (2.0 * K)**0.5) * sigma_sqrt / N

            if (Z == 0.0):
                scores[index_cluster] = float("inf")
            else:
                scores[index_cluster] = Z

    else:
        scores = [float("inf")] * len(clusters)

    return sum(scores)
Ejemplo n.º 7
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """
                
        scores = [0.0] * len(clusters);
        
        W = 0.0;
        K = len(clusters);
        N = 0.0;

        sigma_sqrt = 0.0;
        
        alpha = 0.9;
        betta = 0.9;
                
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]);
                delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector));
                
                W += delta_sqrt;
                sigma_sqrt += delta_sqrt;
            
            N += len(clusters[index_cluster]);     
        
        if (N - K != 0):
            W /= N;
            
            sigma_sqrt /= (N - K);
            sigma = sigma_sqrt ** 0.5;
            
            for index_cluster in range(0, len(clusters), 1):
                Kw = (1.0 - K / N) * sigma_sqrt;
                Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
                U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks;
                
                Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N;
                
                if (Z == 0.0):
                    scores[index_cluster] = float("inf");
                else:
                    scores[index_cluster] = Z;
                
        else:
            scores = [float("inf")] * len(clusters);
        
        return sum(scores);
Ejemplo n.º 8
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """
                
        scores = [0.0] * len(clusters);
        
        W = 0.0;
        K = len(clusters);
        N = 0.0;

        sigma_sqrt = 0.0;
        
        alpha = 0.9;
        betta = 0.9;
                
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]);
                delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector));
                
                W += delta_sqrt;
                sigma_sqrt += delta_sqrt;
            
            N += len(clusters[index_cluster]);     
        
        if (N - K != 0):
            W /= N;
            
            sigma_sqrt /= (N - K);
            sigma = sigma_sqrt ** 0.5;
            
            for index_cluster in range(0, len(clusters), 1):
                Kw = (1.0 - K / N) * sigma_sqrt;
                Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
                U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks;
                
                Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N;
                
                if (Z == 0.0):
                    scores[index_cluster] = float("inf");
                else:
                    scores[index_cluster] = Z;
                
        else:
            scores = [float("inf")] * len(clusters);
        
        return sum(scores);
Ejemplo n.º 9
0
 def __get_average_inter_cluster_distance(self, entry):
     """!
     @brief Calculates average inter cluster distance between current and specified clusters.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     
     @return (double) Average inter cluster distance.
     
     """
     
     linear_part_distance = sum(list_math_multiplication(self.linear_sum, entry.linear_sum))
     
     return ((entry.number_points * self.square_sum - 2.0 * linear_part_distance + self.number_points * entry.square_sum) / (self.number_points * entry.number_points)) ** 0.5
Ejemplo n.º 10
0
 def __get_average_inter_cluster_distance(self, entry):
     """!
     @brief Calculates average inter cluster distance between current and specified clusters.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     
     @return (double) Average inter cluster distance.
     
     """
     
     linear_part_distance = sum(list_math_multiplication(self.linear_sum, entry.linear_sum));
     
     return ( (entry.number_points * self.square_sum - 2.0 * linear_part_distance + self.number_points * entry.square_sum) / (self.number_points * entry.number_points) ) ** 0.5;
Ejemplo n.º 11
0
    def __get_variance_increase_distance(self, entry):
        """!
        @brief Calculates variance increase distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Variance increase distance.
        
        """
                
        linear_part_12 = list_math_addition(self.linear_sum, entry.linear_sum);
        variance_part_first = (self.square_sum + entry.square_sum) - \
            2.0 * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points) + \
            (self.number_points + entry.number_points) * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points)**2.0;

        
        linear_part_11 = sum(list_math_multiplication(self.linear_sum, self.linear_sum));
        variance_part_second = -( self.square_sum - (2.0 * linear_part_11 / self.number_points) + (linear_part_11 / self.number_points) );
        
        linear_part_22 = sum(list_math_multiplication(entry.linear_sum, entry.linear_sum));
        variance_part_third = -( entry.square_sum - (2.0 / entry.number_points) * linear_part_22 + entry.number_points * (1.0 / entry.number_points ** 2.0) * linear_part_22 );

        return (variance_part_first + variance_part_second + variance_part_third);
Ejemplo n.º 12
0
 def __get_average_intra_cluster_distance(self, entry):
     """!
     @brief Calculates average intra cluster distance between current and specified clusters.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     
     @return (double) Average intra cluster distance.
     
     """
     
     linear_part_first = list_math_addition(self.linear_sum, entry.linear_sum);
     linear_part_second = linear_part_first;
     
     linear_part_distance = sum(list_math_multiplication(linear_part_first, linear_part_second));
     
     general_part_distance = 2.0 * (self.number_points + entry.number_points) * (self.square_sum + entry.square_sum) - 2.0 * linear_part_distance;
     
     return (general_part_distance / ( (self.number_points + entry.number_points) * (self.number_points + entry.number_points - 1.0) )) ** 0.5;
Ejemplo n.º 13
0
 def get_diameter(self):
     """!
     @brief Calculates diameter of cluster that is represented by the entry.
     @details It's calculated once when it's requested after the last changes.
     
     @return (double) Diameter of cluster that is represented by the entry.
     
     """
     
     if (self.__diameter is not None):
         return self.__diameter;
     
     diameter_part = 0.0;
     if (type(self.linear_sum) == list):
         diameter_part = self.square_sum * self.number_points - 2.0 * sum(list_math_multiplication(self.linear_sum, self.linear_sum)) + self.square_sum * self.number_points;
     else:
         diameter_part = self.square_sum * self.number_points - 2.0 * self.linear_sum * self.linear_sum + self.square_sum * self.number_points;
         
     self.__diameter = ( diameter_part / (self.number_points * (self.number_points - 1)) ) ** 0.5;
     return self.__diameter;