def __minimum_noiseless_description_length(data, clusters, centers): """ @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion. @param[in] clusters (list): Clusters for which splitting criterion should be calculated. @param[in] centers (list): Centers of the clusters. @return (double) Returns splitting criterion in line with bayesian information criterion. Low value of splitting cretion means that current structure is much better. """ scores = [0.0] * len(clusters) W = 0.0 K = len(clusters) N = 0.0 sigma_sqrt = 0.0 alpha = 0.9 betta = 0.9 for index_cluster in range(0, len(clusters), 1): for index_object in clusters[index_cluster]: delta_vector = list_math_subtraction(data[index_object], centers[index_cluster]) delta_sqrt = sum( list_math_multiplication(delta_vector, delta_vector)) W += delta_sqrt sigma_sqrt += delta_sqrt N += len(clusters[index_cluster]) if (N - K != 0): W /= N sigma_sqrt /= (N - K) sigma = sigma_sqrt**0.5 for index_cluster in range(0, len(clusters), 1): Kw = (1.0 - K / N) * sigma_sqrt Ks = (2.0 * alpha * sigma / (N**0.5)) + ( (alpha**2.0) * sigma_sqrt / N + W - Kw / 2.0)**0.5 U = W - Kw + 2.0 * (alpha**2.0) * sigma_sqrt / N + Ks Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K)**0.5) * sigma_sqrt / N if (Z == 0.0): scores[index_cluster] = float("inf") else: scores[index_cluster] = Z else: scores = [float("inf")] * len(clusters) return sum(scores)
def __minimum_noiseless_description_length(self, clusters, centers): """! @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion. @param[in] clusters (list): Clusters for which splitting criterion should be calculated. @param[in] centers (list): Centers of the clusters. @return (double) Returns splitting criterion in line with bayesian information criterion. Low value of splitting cretion means that current structure is much better. @see __bayesian_information_criterion(clusters, centers) """ scores = [0.0] * len(clusters); W = 0.0; K = len(clusters); N = 0.0; sigma_sqrt = 0.0; alpha = 0.9; betta = 0.9; for index_cluster in range(0, len(clusters), 1): for index_object in clusters[index_cluster]: delta_vector = list_math_subtraction(self.__pointer_data[index_object], centers[index_cluster]); delta_sqrt = sum(list_math_multiplication(delta_vector, delta_vector)); W += delta_sqrt; sigma_sqrt += delta_sqrt; N += len(clusters[index_cluster]); if (N - K != 0): W /= N; sigma_sqrt /= (N - K); sigma = sigma_sqrt ** 0.5; for index_cluster in range(0, len(clusters), 1): Kw = (1.0 - K / N) * sigma_sqrt; Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) + ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5; U = W - Kw + 2.0 * (alpha ** 2.0) * sigma_sqrt / N + Ks; Z = K * sigma_sqrt / N + U + betta * ( (2.0 * K) ** 0.5 ) * sigma_sqrt / N; if (Z == 0.0): scores[index_cluster] = float("inf"); else: scores[index_cluster] = Z; else: scores = [float("inf")] * len(clusters); return sum(scores);
def __sub__(self, entry): """! @brief Overloaded operator sub. Performs substraction of two clustering features. @details Substraction can't be performed with clustering feature whose description is less then substractor. @param[in] entry (cfentry): Entry that is substracted from the current. @return (cfentry) Result of substraction of two clustering features. """ number_points = self.number_points - entry.number_points result_linear_sum = list_math_subtraction(self.linear_sum, entry.linear_sum) result_square_sum = self.square_sum - entry.square_sum if (number_points < 0) or (result_square_sum < 0): raise NameError("Substruction with negative result is not possible for clustering features.") return cfentry(number_points, result_linear_sum, result_square_sum)
def __sub__(self, entry): """! @brief Overloaded operator sub. Performs substraction of two clustering features. @details Substraction can't be performed with clustering feature whose description is less then substractor. @param[in] entry (cfentry): Entry that is substracted from the current. @return (cfentry) Result of substraction of two clustering features. """ number_points = self.number_points - entry.number_points; linear_sum = list_math_subtraction(self.linear_sum, entry.linear_sum); square_sum = self.square_sum - entry.square_sum; if ( (number_points < 0) or (square_sum < 0) ): raise NameError("Substruction with negative result is not possible for clustering features."); return cfentry(number_points, linear_sum, square_sum);