def __get_variance_increase_distance(self, entry): """! @brief Calculates variance increase distance between current and specified clusters. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @return (double) Variance increase distance. """ linear_part_12 = list_math_addition(self.linear_sum, entry.linear_sum) variance_part_first = (self.square_sum + entry.square_sum) - \ 2.0 * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points) + \ (self.number_points + entry.number_points) * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points)**2.0 linear_part_11 = sum( list_math_multiplication(self.linear_sum, self.linear_sum)) variance_part_second = -(self.square_sum - (2.0 * linear_part_11 / self.number_points) + (linear_part_11 / self.number_points)) linear_part_22 = sum( list_math_multiplication(entry.linear_sum, entry.linear_sum)) variance_part_third = -( entry.square_sum - (2.0 / entry.number_points) * linear_part_22 + entry.number_points * (1.0 / entry.number_points**2.0) * linear_part_22) return (variance_part_first + variance_part_second + variance_part_third)
def get_radius(self): """! @brief Calculates radius of cluster that is represented by the entry. @details It's calculated once when it's requested after the last changes. @return (double) Radius of cluster that is represented by the entry. """ if (self.__radius is not None): return self.__radius; centroid = self.get_centroid(); radius_part_1 = self.square_sum; radius_part_2 = 0.0; radius_part_3 = 0.0; if (type(centroid) == list): radius_part_2 = 2.0 * sum(list_math_multiplication(self.linear_sum, centroid)); radius_part_3 = self.number_points * sum(list_math_multiplication(centroid, centroid)); else: radius_part_2 = 2.0 * self.linear_sum * centroid; radius_part_3 = self.number_points * centroid * centroid; self.__radius = ( (1.0 / self.number_points) * (radius_part_1 - radius_part_2 + radius_part_3) ) ** 0.5; return self.__radius;
def get_radius(self): """! @brief Calculates radius of cluster that is represented by the entry. @details It's calculated once when it's requested after the last changes. @return (double) Radius of cluster that is represented by the entry. """ if (self.__radius is not None): return self.__radius centroid = self.get_centroid() radius_part_1 = self.square_sum radius_part_2 = 0.0 radius_part_3 = 0.0 if (type(centroid) == list): radius_part_2 = 2.0 * sum( list_math_multiplication(self.linear_sum, centroid)) radius_part_3 = self.number_points * sum( list_math_multiplication(centroid, centroid)) else: radius_part_2 = 2.0 * self.linear_sum * centroid radius_part_3 = self.number_points * centroid * centroid self.__radius = ((1.0 / self.number_points) * (radius_part_1 - radius_part_2 + radius_part_3))**0.5 return self.__radius
def __minimum_noiseless_description_length(self, clusters, centers): """! @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion. @param[in] clusters (list): Clusters for which splitting criterion should be calculated. @param[in] centers (list): Centers of the clusters. @return (double) Returns splitting criterion in line with bayesian information criterion. Low value of splitting cretion means that current structure is much better. @see __bayesian_information_criterion(clusters, centers) """ scores = [0.0] * len(clusters); W = 0.0; K = len(clusters); N = 0.0; sigma_sqrt = 0.0; alpha = 0.9; betta = 0.9; for index_cluster in range(0, len(clusters), 1): for index_object in clusters[index_cluster]: delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]); delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector)); W += delta_sqrt; sigma_sqrt += delta_sqrt; N += len(clusters[index_cluster]); if (N - K != 0): W /= N; sigma_sqrt /= (N - K); sigma = sigma_sqrt ** 0.5; for index_cluster in range(0, len(clusters), 1): Kw = (1.0 - K / N) * sigma_sqrt; Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5; U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks; Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N; if (Z == 0.0): scores[index_cluster] = float("inf"); else: scores[index_cluster] = Z; else: scores = [float("inf")] * len(clusters); return sum(scores);
def __get_average_inter_cluster_distance(self, entry): """! @brief Calculates average inter cluster distance between current and specified clusters. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @return (double) Average inter cluster distance. """ linear_part_distance = sum(list_math_multiplication(self.linear_sum, entry.linear_sum)); return ( (entry.number_points * self.square_sum - 2.0 * linear_part_distance + self.number_points * entry.square_sum) / (self.number_points * entry.number_points) ) ** 0.5;
def __get_variance_increase_distance(self, entry): """! @brief Calculates variance increase distance between current and specified clusters. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @return (double) Variance increase distance. """ linear_part_12 = list_math_addition(self.linear_sum, entry.linear_sum); variance_part_first = (self.square_sum + entry.square_sum) - \ 2.0 * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points) + \ (self.number_points + entry.number_points) * sum(list_math_multiplication(linear_part_12, linear_part_12)) / (self.number_points + entry.number_points)**2.0; linear_part_11 = sum(list_math_multiplication(self.linear_sum, self.linear_sum)); variance_part_second = -( self.square_sum - (2.0 * linear_part_11 / self.number_points) + (linear_part_11 / self.number_points) ); linear_part_22 = sum(list_math_multiplication(entry.linear_sum, entry.linear_sum)); variance_part_third = -( entry.square_sum - (2.0 / entry.number_points) * linear_part_22 + entry.number_points * (1.0 / entry.number_points ** 2.0) * linear_part_22 ); return (variance_part_first + variance_part_second + variance_part_third);
def __get_average_inter_cluster_distance(self, entry): """! @brief Calculates average inter cluster distance between current and specified clusters. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @return (double) Average inter cluster distance. """ linear_part_distance = sum( list_math_multiplication(self.linear_sum, entry.linear_sum)) return ((entry.number_points * self.square_sum - 2.0 * linear_part_distance + self.number_points * entry.square_sum) / (self.number_points * entry.number_points))**0.5
def __get_average_intra_cluster_distance(self, entry): """! @brief Calculates average intra cluster distance between current and specified clusters. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @return (double) Average intra cluster distance. """ linear_part_first = list_math_addition(self.linear_sum, entry.linear_sum); linear_part_second = linear_part_first; linear_part_distance = sum(list_math_multiplication(linear_part_first, linear_part_second)); general_part_distance = 2.0 * (self.number_points + entry.number_points) * (self.square_sum + entry.square_sum) - 2.0 * linear_part_distance; return (general_part_distance / ( (self.number_points + entry.number_points) * (self.number_points + entry.number_points - 1.0) )) ** 0.5;
def get_diameter(self): """! @brief Calculates diameter of cluster that is represented by the entry. @details It's calculated once when it's requested after the last changes. @return (double) Diameter of cluster that is represented by the entry. """ if (self.__diameter is not None): return self.__diameter; diameter_part = 0.0; if (type(self.linear_sum) == list): diameter_part = self.square_sum * self.number_points - 2.0 * sum(list_math_multiplication(self.linear_sum, self.linear_sum)) + self.square_sum * self.number_points; else: diameter_part = self.square_sum * self.number_points - 2.0 * self.linear_sum * self.linear_sum + self.square_sum * self.number_points; self.__diameter = ( diameter_part / (self.number_points * (self.number_points - 1)) ) ** 0.5; return self.__diameter;
def get_diameter(self): """! @brief Calculates diameter of cluster that is represented by the entry. @details It's calculated once when it's requested after the last changes. @return (double) Diameter of cluster that is represented by the entry. """ if (self.__diameter is not None): return self.__diameter diameter_part = 0.0 if (type(self.linear_sum) == list): diameter_part = self.square_sum * self.number_points - 2.0 * sum( list_math_multiplication(self.linear_sum, self.linear_sum) ) + self.square_sum * self.number_points else: diameter_part = self.square_sum * self.number_points - 2.0 * self.linear_sum * self.linear_sum + self.square_sum * self.number_points self.__diameter = (diameter_part / (self.number_points * (self.number_points - 1)))**0.5 return self.__diameter
def __get_average_intra_cluster_distance(self, entry): """! @brief Calculates average intra cluster distance between current and specified clusters. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @return (double) Average intra cluster distance. """ linear_part_first = list_math_addition(self.linear_sum, entry.linear_sum) linear_part_second = linear_part_first linear_part_distance = sum( list_math_multiplication(linear_part_first, linear_part_second)) general_part_distance = 2.0 * ( self.number_points + entry.number_points ) * (self.square_sum + entry.square_sum) - 2.0 * linear_part_distance return (general_part_distance / ((self.number_points + entry.number_points) * (self.number_points + entry.number_points - 1.0)))**0.5