예제 #1
0
    def __get_variance_increase_distance(self, entry):
        """!
        @brief Calculates variance increase distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Variance increase distance.
        
        """

        linear_part_12 = list_math_addition(self.linear_sum, entry.linear_sum)
        variance_part_first = (self.square_sum + entry.square_sum) - \
            2.0 * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points) + \
            (self.number_points + entry.number_points) * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points)**2.0

        linear_part_11 = sum(
            list_math_multiplication(self.linear_sum, self.linear_sum))
        variance_part_second = -(self.square_sum -
                                 (2.0 * linear_part_11 / self.number_points) +
                                 (linear_part_11 / self.number_points))

        linear_part_22 = sum(
            list_math_multiplication(entry.linear_sum, entry.linear_sum))
        variance_part_third = -(
            entry.square_sum - (2.0 / entry.number_points) * linear_part_22 +
            entry.number_points *
            (1.0 / entry.number_points**2.0) * linear_part_22)

        return (variance_part_first + variance_part_second +
                variance_part_third)
예제 #2
0
 def get_radius(self):
     """!
     @brief Calculates radius of cluster that is represented by the entry.
     @details It's calculated once when it's requested after the last changes.
     
     @return (double) Radius of cluster that is represented by the entry.
     
     """
     
     if (self.__radius is not None):
         return self.__radius;
     
     centroid = self.get_centroid();
     
     radius_part_1 = self.square_sum;
     
     radius_part_2 = 0.0;
     radius_part_3 = 0.0;
     
     if (type(centroid) == list):
         radius_part_2 = 2.0 * sum(list_math_multiplication(self.linear_sum, centroid));
         radius_part_3 = self.number_points * sum(list_math_multiplication(centroid, centroid));
     else:
         radius_part_2 = 2.0 * self.linear_sum * centroid;
         radius_part_3 = self.number_points * centroid * centroid;
     
     self.__radius = ( (1.0 / self.number_points) * (radius_part_1 - radius_part_2 + radius_part_3) ) ** 0.5;
     return self.__radius;
예제 #3
0
    def get_radius(self):
        """!
        @brief Calculates radius of cluster that is represented by the entry.
        @details It's calculated once when it's requested after the last changes.
        
        @return (double) Radius of cluster that is represented by the entry.
        
        """

        if (self.__radius is not None):
            return self.__radius

        centroid = self.get_centroid()

        radius_part_1 = self.square_sum

        radius_part_2 = 0.0
        radius_part_3 = 0.0

        if (type(centroid) == list):
            radius_part_2 = 2.0 * sum(
                list_math_multiplication(self.linear_sum, centroid))
            radius_part_3 = self.number_points * sum(
                list_math_multiplication(centroid, centroid))
        else:
            radius_part_2 = 2.0 * self.linear_sum * centroid
            radius_part_3 = self.number_points * centroid * centroid

        self.__radius = ((1.0 / self.number_points) *
                         (radius_part_1 - radius_part_2 + radius_part_3))**0.5
        return self.__radius
예제 #4
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """
                
        scores = [0.0] * len(clusters);
        
        W = 0.0;
        K = len(clusters);
        N = 0.0;

        sigma_sqrt = 0.0;
        
        alpha = 0.9;
        betta = 0.9;
                
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]);
                delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector));
                
                W += delta_sqrt;
                sigma_sqrt += delta_sqrt;
            
            N += len(clusters[index_cluster]);     
        
        if (N - K != 0):
            W /= N;
            
            sigma_sqrt /= (N - K);
            sigma = sigma_sqrt ** 0.5;
            
            for index_cluster in range(0, len(clusters), 1):
                Kw = (1.0 - K / N) * sigma_sqrt;
                Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
                U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks;
                
                Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N;
                
                if (Z == 0.0):
                    scores[index_cluster] = float("inf");
                else:
                    scores[index_cluster] = Z;
                
        else:
            scores = [float("inf")] * len(clusters);
        
        return sum(scores);
예제 #5
0
 def __get_average_inter_cluster_distance(self, entry):
     """!
     @brief Calculates average inter cluster distance between current and specified clusters.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     
     @return (double) Average inter cluster distance.
     
     """
     
     linear_part_distance = sum(list_math_multiplication(self.linear_sum, entry.linear_sum));
     
     return ( (entry.number_points * self.square_sum - 2.0 * linear_part_distance + self.number_points * entry.square_sum) / (self.number_points * entry.number_points) ) ** 0.5;
예제 #6
0
    def __get_variance_increase_distance(self, entry):
        """!
        @brief Calculates variance increase distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Variance increase distance.
        
        """
                
        linear_part_12 = list_math_addition(self.linear_sum, entry.linear_sum);
        variance_part_first = (self.square_sum + entry.square_sum) - \
            2.0 * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points) + \
            (self.number_points + entry.number_points) * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points)**2.0;

        
        linear_part_11 = sum(list_math_multiplication(self.linear_sum, self.linear_sum));
        variance_part_second = -( self.square_sum - (2.0 * linear_part_11 / self.number_points) + (linear_part_11 / self.number_points) );
        
        linear_part_22 = sum(list_math_multiplication(entry.linear_sum, entry.linear_sum));
        variance_part_third = -( entry.square_sum - (2.0 / entry.number_points) * linear_part_22 + entry.number_points * (1.0 / entry.number_points ** 2.0) * linear_part_22 );

        return (variance_part_first + variance_part_second + variance_part_third);
예제 #7
0
    def __get_average_inter_cluster_distance(self, entry):
        """!
        @brief Calculates average inter cluster distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Average inter cluster distance.
        
        """

        linear_part_distance = sum(
            list_math_multiplication(self.linear_sum, entry.linear_sum))

        return ((entry.number_points * self.square_sum -
                 2.0 * linear_part_distance +
                 self.number_points * entry.square_sum) /
                (self.number_points * entry.number_points))**0.5
예제 #8
0
 def __get_average_intra_cluster_distance(self, entry):
     """!
     @brief Calculates average intra cluster distance between current and specified clusters.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     
     @return (double) Average intra cluster distance.
     
     """
     
     linear_part_first = list_math_addition(self.linear_sum, entry.linear_sum);
     linear_part_second = linear_part_first;
     
     linear_part_distance = sum(list_math_multiplication(linear_part_first, linear_part_second));
     
     general_part_distance = 2.0 * (self.number_points + entry.number_points) * (self.square_sum + entry.square_sum) - 2.0 * linear_part_distance;
     
     return (general_part_distance / ( (self.number_points + entry.number_points) * (self.number_points + entry.number_points - 1.0) )) ** 0.5;
예제 #9
0
 def get_diameter(self):
     """!
     @brief Calculates diameter of cluster that is represented by the entry.
     @details It's calculated once when it's requested after the last changes.
     
     @return (double) Diameter of cluster that is represented by the entry.
     
     """
     
     if (self.__diameter is not None):
         return self.__diameter;
     
     diameter_part = 0.0;
     if (type(self.linear_sum) == list):
         diameter_part = self.square_sum * self.number_points - 2.0 * sum(list_math_multiplication(self.linear_sum, self.linear_sum)) + self.square_sum * self.number_points;
     else:
         diameter_part = self.square_sum * self.number_points - 2.0 * self.linear_sum * self.linear_sum + self.square_sum * self.number_points;
         
     self.__diameter = ( diameter_part / (self.number_points * (self.number_points - 1)) ) ** 0.5;
     return self.__diameter;
예제 #10
0
    def get_diameter(self):
        """!
        @brief Calculates diameter of cluster that is represented by the entry.
        @details It's calculated once when it's requested after the last changes.
        
        @return (double) Diameter of cluster that is represented by the entry.
        
        """

        if (self.__diameter is not None):
            return self.__diameter

        diameter_part = 0.0
        if (type(self.linear_sum) == list):
            diameter_part = self.square_sum * self.number_points - 2.0 * sum(
                list_math_multiplication(self.linear_sum, self.linear_sum)
            ) + self.square_sum * self.number_points
        else:
            diameter_part = self.square_sum * self.number_points - 2.0 * self.linear_sum * self.linear_sum + self.square_sum * self.number_points

        self.__diameter = (diameter_part / (self.number_points *
                                            (self.number_points - 1)))**0.5
        return self.__diameter
예제 #11
0
    def __get_average_intra_cluster_distance(self, entry):
        """!
        @brief Calculates average intra cluster distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Average intra cluster distance.
        
        """

        linear_part_first = list_math_addition(self.linear_sum,
                                               entry.linear_sum)
        linear_part_second = linear_part_first

        linear_part_distance = sum(
            list_math_multiplication(linear_part_first, linear_part_second))

        general_part_distance = 2.0 * (
            self.number_points + entry.number_points
        ) * (self.square_sum + entry.square_sum) - 2.0 * linear_part_distance

        return (general_part_distance /
                ((self.number_points + entry.number_points) *
                 (self.number_points + entry.number_points - 1.0)))**0.5