def templateDistanceCalculation(self, cluster1, cluster2, type_measurement): entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)); entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)); # check that the same distance from 1 to 2 and from 2 to 1. distance12 = entry1.get_distance(entry2, type_measurement); distance21 = entry2.get_distance(entry1, type_measurement); assert distance12 == distance21; # check with utils calculation float_delta = 0.0000001; if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE): assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid()); elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE): assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid()); elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True; elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True; elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE): assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
def __merge_by_signle_link(self): """! @brief Merges the most similar clusters in line with single link type. """ minimum_single_distance = float('Inf'); indexes = None; for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find nearest objects candidate_minimum_distance = float('Inf'); for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); if (distance < candidate_minimum_distance): candidate_minimum_distance = distance; if (candidate_minimum_distance < minimum_single_distance): minimum_single_distance = candidate_minimum_distance; indexes = [index_cluster1, index_cluster2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__clusters.pop(indexes[1]); # remove merged cluster.
def __update_clusters(self): """! @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data. """ clusters = [[] for i in range(len(self.__centers))]; for index_point in range(len(self.__pointer_data)): index_optim = -1; dist_optim = 0.0; for index in range(len(self.__centers)): # dist = euclidean_distance(data[index_point], centers[index]); # Slow solution dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]); # Fast solution if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); # If cluster is not able to capture object it should be removed clusters = [cluster for cluster in clusters if len(cluster) > 0]; return clusters;
def __update_clusters(self, centers, available_indexes = None): """! @brief Calculates Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...]. @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used. @return (list) Updated clusters. """ bypass = None; if (available_indexes is None): bypass = range(len(self.__pointer_data)); else: bypass = available_indexes; clusters = [[] for _ in range(len(centers))]; for index_point in bypass: index_optim = -1; dist_optim = 0.0; for index in range(len(centers)): # dist = euclidean_distance(data[index_point], centers[index]); # Slow solution dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]); # Fast solution if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def __improve_parameters(self, centers, available_indexes = None): """! @brief Performs k-means clustering in the specified region. @param[in] centers (list): Centers of clusters. @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None - then all points are used. @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data. """ changes = numpy.Inf; stop_condition = self.__tolerance * self.__tolerance; # Fast solution clusters = []; while (changes > stop_condition): clusters = self.__update_clusters(centers, available_indexes); clusters = [ cluster for cluster in clusters if len(cluster) > 0 ]; updated_centers = self.__update_centers(clusters); changes = max([euclidean_distance_sqrt(centers[index], updated_centers[index]) for index in range(len(updated_centers))]); # Fast solution centers = updated_centers; return (clusters, centers);
def __update_clusters(self, medoids): """! @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. """ self.__belong = [0] * len(self.__pointer_data) self.__clusters = [[] for i in range(len(medoids))] for index_point in range(len(self.__pointer_data)): index_optim = -1 dist_optim = 0.0 for index in range(len(medoids)): dist = euclidean_distance_sqrt( self.__pointer_data[index_point], self.__pointer_data[medoids[index]]) if ((dist < dist_optim) or (index is 0)): index_optim = index dist_optim = dist self.__clusters[index_optim].append(index_point) self.__belong[index_point] = index_optim # If cluster is not able to capture object it should be removed self.__clusters = [ cluster for cluster in self.__clusters if len(cluster) > 0 ]
def __update_clusters(self): """! @brief Calculate Manhattan distance to each point from the each cluster. @details Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. """ clusters = [[] for i in range(len(self.__medians))] for index_point in range(len(self.__pointer_data)): index_optim = -1 dist_optim = 0.0 for index in range(len(self.__medians)): dist = euclidean_distance_sqrt( self.__pointer_data[index_point], self.__medians[index]) if ((dist < dist_optim) or (index is 0)): index_optim = index dist_optim = dist clusters[index_optim].append(index_point) # If cluster is not able to capture object it should be removed clusters = [cluster for cluster in clusters if len(cluster) > 0] return clusters
def __update_clusters(self): """! @brief Calculate distance to each point from the each cluster. @details Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. """ clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoids))]; for index_point in range(len(self.__pointer_data)): if (index_point in self.__medoid_indexes): continue; index_optim = -1; dist_optim = float('Inf'); for index in range(len(self.__medoids)): dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]); if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def __update_clusters(self, centers, available_indexes = None): """! @brief Calculates Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...]. @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used. @return (list) Updated clusters. """ bypass = None; if (available_indexes is None): bypass = range(len(self.__pointer_data)); else: bypass = available_indexes; clusters = [[] for i in range(len(centers))]; for index_point in bypass: index_optim = -1; dist_optim = 0.0; for index in range(len(centers)): # dist = euclidean_distance(data[index_point], centers[index]); # Slow solution dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]); # Fast solution if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def get_distance_matrix(self): """! @brief Calculates distance matrix (U-matrix). @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map. @return (list) Distance matrix (U-matrix). @see show_distance_matrix() @see get_density_matrix() """ if (self.__ccore_som_pointer is not None): self._weights = wrapper.som_get_weights(self.__ccore_som_pointer) if (self._conn_type != type_conn.func_neighbor): self._neighbors = wrapper.som_get_neighbors( self.__ccore_som_pointer) distance_matrix = [[0.0] * self._cols for i in range(self._rows)] for i in range(self._rows): for j in range(self._cols): neuron_index = i * self._cols + j if (self._conn_type == type_conn.func_neighbor): self._create_connections(type_conn.grid_eight) for neighbor_index in self._neighbors[neuron_index]: distance_matrix[i][j] += euclidean_distance_sqrt( self._weights[neuron_index], self._weights[neighbor_index]) distance_matrix[i][j] /= len(self._neighbors[neuron_index]) return distance_matrix
def __merge_by_average_link(self): """! @brief Merges the most similar clusters in line with average link type. """ minimum_average_distance = float('Inf'); for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find farthest objects candidate_average_distance = 0.0; for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: candidate_average_distance += euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2])); if (candidate_average_distance < minimum_average_distance): minimum_average_distance = candidate_average_distance; indexes = [index_cluster1, index_cluster2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__clusters.pop(indexes[1]); # remove merged cluster.
def get_distance_matrix(self): """! @brief Calculates distance matrix (U-matrix). @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map. @return (list) Distance matrix (U-matrix). @see show_distance_matrix() @see get_density_matrix() """ if (self.__ccore_som_pointer is not None): self._weights = wrapper.som_get_weights(self.__ccore_som_pointer); if (self._conn_type != type_conn.func_neighbor): self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer); distance_matrix = [ [0.0] * self._cols for i in range(self._rows) ]; for i in range(self._rows): for j in range(self._cols): neuron_index = i * self._cols + j; if (self._conn_type == type_conn.func_neighbor): self._create_connections(type_conn.grid_eight); for neighbor_index in self._neighbors[neuron_index]: distance_matrix[i][j] += euclidean_distance_sqrt(self._weights[neuron_index], self._weights[neighbor_index]); distance_matrix[i][j] /= len(self._neighbors[neuron_index]); return distance_matrix;
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node, best_nodes): """! @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance. @param[in] point (list): Coordinates that is considered as centroind for searching @param[in] distance (double): Distance from the center where seaching is performed. @param[in] sqrt_distance (double): Square distance from the center where searching is performed. @param[in] node (node): Node from that searching is performed. @param[in|out] best_nodes (list): List of founded nodes. """ minimum = node.data[node.disc] - distance; maximum = node.data[node.disc] + distance; if (node.right is not None): if (point[node.disc] >= minimum): self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.right, best_nodes); if (node.left is not None): if (point[node.disc] < maximum): self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.left, best_nodes); candidate_distance = euclidean_distance_sqrt(point, node.data); if (candidate_distance <= sqrt_distance): best_nodes.append( (candidate_distance, node) );
def process(self): """! @brief Performs cluster analysis in line with rules of K-Means algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_centers() """ if (self.__ccore is True): self.__clusters = wrapper.kmeans(self.__pointer_data, self.__centers, self.__tolerance); self.__centers = self.__update_centers(); else: changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution # Check for dimension if (len(self.__pointer_data[0]) != len(self.__centers[0])): raise NameError('Dimension of the input data and dimension of the initial cluster centers must be equal.'); while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_centers = self.__update_centers(); # changes should be calculated before asignment #changes = max([euclidean_distance(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]); # Slow solution changes = max([euclidean_distance_sqrt(self.__centers[index], updated_centers[index]) for index in range(len(updated_centers))]); # Fast solution self.__centers = updated_centers;
def get_distance(self, entry, type_measurement): """! @brief Calculates distance between two clusters in line with measurement type. @details In case of usage CENTROID_EUCLIDIAN_DISTANCE square euclidian distance will be returned. Square root should be taken from the result for obtaining real euclidian distance between entries. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters. @return (double) Distance between two clusters. """ if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE): return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid()); elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE): return manhattan_distance(entry.get_centroid(), self.get_centroid()); elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): return self.__get_average_inter_cluster_distance(entry); elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): return self.__get_average_intra_cluster_distance(entry); elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE): return self.__get_variance_increase_distance(entry); else: assert 0;
def __merge_by_average_link(self): """! @brief Merges the most similar clusters in line with average link type. """ minimum_average_distance = float('Inf') for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find farthest objects candidate_average_distance = 0.0 for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: candidate_average_distance += euclidean_distance_sqrt( self.__pointer_data[index_object1], self.__pointer_data[index_object2]) candidate_average_distance /= ( len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2])) if (candidate_average_distance < minimum_average_distance): minimum_average_distance = candidate_average_distance indexes = [index_cluster1, index_cluster2] self.__clusters[indexes[0]] += self.__clusters[indexes[1]] self.__clusters.pop(indexes[1])
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node_head, best_nodes): """! @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance. @param[in] point (list): Coordinates that is considered as centroind for searching @param[in] distance (double): Distance from the center where seaching is performed. @param[in] sqrt_distance (double): Square distance from the center where searching is performed. @param[in] node_head (node): Node from that searching is performed. @param[in|out] best_nodes (list): List of founded nodes. """ if (node_head.right is not None): minimum = node_head.data[node_head.disc] - distance if (point[node_head.disc] >= minimum): self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.right, best_nodes) if (node_head.left is not None): maximum = node_head.data[node_head.disc] + distance if (point[node_head.disc] < maximum): self.__recursive_nearest_nodes(point, distance, sqrt_distance, node_head.left, best_nodes) candidate_distance = euclidean_distance_sqrt(point, node_head.data) if (candidate_distance <= sqrt_distance): best_nodes.append((candidate_distance, node_head))
def __update_clusters(self): """! @brief Calculate Manhattan distance to each point from the each cluster. @details Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. """ clusters = [[] for i in range(len(self.__medians))]; for index_point in range(len(self.__pointer_data)): index_optim = -1; dist_optim = 0.0; for index in range(len(self.__medians)): dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]); if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); # If cluster is not able to capture object it should be removed clusters = [cluster for cluster in clusters if len(cluster) > 0]; return clusters;
def get_distance(self, entry, type_measurement): """! @brief Calculates distance between two clusters in line with measurement type. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters. @return (double) Distance between two clusters. """ if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE): return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid()); elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE): return manhattan_distance(entry.get_centroid(), self.get_centroid()); elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): return self.__get_average_inter_cluster_distance(entry); elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): return self.__get_average_intra_cluster_distance(entry); elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE): return self.__get_variance_increase_distance(entry); else: assert 0;
def process(self): """! @brief Performs cluster analysis in line with rules of K-Medoids algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_medoids() """ if (self.__ccore is True): self.__clusters = wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance); self.__medoids, self.__medoid_indexes = self.__update_medoids(); else: changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_medoids, update_medoid_indexes = self.__update_medoids(); # changes should be calculated before asignment changes = max([euclidean_distance_sqrt(self.__medoids[index], updated_medoids[index]) for index in range(len(updated_medoids))]); # Fast solution self.__medoids = updated_medoids; self.__medoid_indexes = update_medoid_indexes;
def process(self): """! @brief Performs cluster analysis in line with rules of K-Medians algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_medians() """ changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution # Check for dimension if (len(self.__pointer_data[0]) != len(self.__medians[0])): raise NameError('Dimension of the input data and dimension of the initial cluster medians must be equal.'); while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_centers = self.__update_medians(); # changes should be calculated before asignment changes = max([euclidean_distance_sqrt(self.__medians[index], updated_centers[index]) for index in range(len(updated_centers))]); # Fast solution self.__medians = updated_centers;
def __init__(self, rows, cols, conn_type = type_conn.grid_eight, parameters = None, ccore = False): """! @brief Constructor of self-organized map. @param[in] rows (uint): Number of neurons in the column (number of rows). @param[in] cols (uint): Number of neurons in the row (number of columns). @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour). @param[in] parameters (som_parameters): Other specific parameters. @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering). """ # some of these parameters are required despite core implementation, for example, for network demonstration. self._cols = cols; self._rows = rows; self._size = cols * rows; self._conn_type = conn_type; if (parameters is not None): self._params = parameters; else: self._params = som_parameters(); if (self._params.init_radius is None): if ((cols + rows) / 4.0 > 1.0): self._params.init_radius = 2.0; elif ( (cols > 1) and (rows > 1) ): self._params.init_radius = 1.5; else: self._params.init_radius = 1.0; if (ccore is True): self.__ccore_som_pointer = wrapper.som_create(rows, cols, conn_type, self._params); else: # location self._location = list(); for i in range(self._rows): for j in range(self._cols): self._location.append([float(i), float(j)]); # awards self._award = [0] * self._size; self._capture_objects = [ [] for i in range(self._size) ]; # distances self._sqrt_distances = [ [ [] for i in range(self._size) ] for j in range(self._size) ]; for i in range(self._size): for j in range(i, self._size, 1): dist = euclidean_distance_sqrt(self._location[i], self._location[j]); self._sqrt_distances[i][j] = dist; self._sqrt_distances[j][i] = dist; # connections if (conn_type != type_conn.func_neighbor): self._create_connections(conn_type);
def _competition(self, x): """! @brief Calculates neuron winner (distance, neuron index). @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point. @return (uint) Returns index of neuron that is winner. """ index = 0; minimum = euclidean_distance_sqrt(self._weights[0], x); for i in range(1, self._size, 1): candidate = euclidean_distance_sqrt(self._weights[i], x); if (candidate < minimum): index = i; minimum = candidate; return index;
def _competition(self, x): """! @brief Calculates neuron winner (distance, neuron index). @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point. @return (uint) Returns index of neuron that is winner. """ index = 0 minimum = euclidean_distance_sqrt(self._weights[0], x) for i in range(1, self._size, 1): candidate = euclidean_distance_sqrt(self._weights[i], x) if (candidate < minimum): index = i minimum = candidate return index
def __neighbor_indexes(self, point): """! @brief Return list of indexes of neighbors of specified point for the data. @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius. @return (list) Return list of indexes of neighbors in line the connectivity radius. """ # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]]; # Slow mode return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[point], self.__pointer_data[i]) <= self.__sqrt_eps and self.__pointer_data[i] != self.__pointer_data[point]]; # Fast mode
def __calculate_weight(self, stimulus1, stimulus2): """! @brief Calculate weight between neurons that have external stimulus1 and stimulus2. @param[in] stimulus1 (list): External stimulus of the first neuron. @param[in] stimulus2 (list): External stimulus of the second neuron. @return (double) Weight between neurons that are under specified stimulus. """ distance = euclidean_distance_sqrt(stimulus1, stimulus2) return math.exp(-distance / (2.0 * self.__average_distance))
def __bayesian_information_criterion(self, clusters, centers): """! @brief Calculates splitting criterion for input clusters using bayesian information criterion. @param[in] clusters (list): Clusters for which splitting criterion should be calculated. @param[in] centers (list): Centers of the clusters. @return (double) Splitting criterion in line with bayesian information criterion. High value of splitting criterion means that current structure is much better. @see __minimum_noiseless_description_length(clusters, centers) """ scores = [float('inf')] * len(clusters) # splitting criterion dimension = len(self.__pointer_data[0]) # estimation of the noise variance in the data set sigma_sqrt = 0.0 K = len(clusters) N = 0.0 for index_cluster in range(0, len(clusters), 1): for index_object in clusters[index_cluster]: sigma_sqrt += euclidean_distance_sqrt( self.__pointer_data[index_object], centers[index_cluster]) N += len(clusters[index_cluster]) if (N - K > 0): sigma_sqrt /= (N - K) p = (K - 1) + dimension * K + 1 # in case of the same points, sigma_sqrt can be zero (issue: #407) sigma_multiplier = 0.0 if (sigma_sqrt <= 0.0): sigma_multiplier = float('-inf') else: sigma_multiplier = dimension * 0.5 * log(sigma_sqrt) # splitting criterion for index_cluster in range(0, len(clusters), 1): n = len(clusters[index_cluster]) L = n * log(n) - n * log(N) - n * 0.5 * log( 2.0 * numpy.pi) - n * sigma_multiplier - (n - K) * 0.5 # BIC calculation scores[index_cluster] = L - p * 0.5 * log(N) return sum(scores)
def __minimum_noiseless_description_length(self, clusters, centers): """! @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion. @param[in] clusters (list): Clusters for which splitting criterion should be calculated. @param[in] centers (list): Centers of the clusters. @return (double) Returns splitting criterion in line with bayesian information criterion. Low value of splitting cretion means that current structure is much better. @see __bayesian_information_criterion(clusters, centers) """ scores = 0.0 W = 0.0 K = len(clusters) N = 0.0 sigma_sqrt = 0.0 alpha = 0.9 betta = 0.9 for index_cluster in range(0, len(clusters), 1): Ni = len(clusters[index_cluster]) Wi = 0.0 for index_object in clusters[index_cluster]: Wi += euclidean_distance_sqrt( self.__pointer_data[index_object], centers[index_cluster]) sigma_sqrt += Wi W += Wi / Ni N += Ni if (N - K != 0): sigma_sqrt /= (N - K) sigma = sigma_sqrt**0.5 Kw = (1.0 - K / N) * sigma_sqrt Ks = (2.0 * alpha * sigma / (N**0.5)) * ( (alpha**2.0) * sigma_sqrt / N + W - Kw / 2.0)**0.5 scores = sigma_sqrt * (2 * K)**0.5 * ( (2 * K)**0.5 + betta ) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N return scores
def __calculate_estimation(self): """! @brief Calculates estimation (cost) of the current clusters. The lower the estimation, the more optimally configuration of clusters. @return (double) estimation of current clusters. """ estimation = 0.0; for index_cluster in range(0, len(self.__clusters)): cluster = self.__clusters[index_cluster]; index_medoid = self.__current[index_cluster]; for index_point in cluster: estimation += euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[index_medoid]); return estimation;
def __neighbor_indexes(self, point): """! @brief Return list of indexes of neighbors of specified point for the data. @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius. @return (list) Return list of indexes of neighbors in line the connectivity radius. """ # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]]; # Slow mode return [ i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[point], self. __pointer_data[i]) <= self.__sqrt_eps and (i != point) ]
def __initialize_distances(self, size, location): """! @brief Initialize distance matrix in SOM grid. @param[in] size (uint): Amount of neurons in the network. @param[in] location (list): List of coordinates of each neuron in the network. @return (list) Distance matrix between neurons in the network. """ sqrt_distances = [[[] for i in range(size)] for j in range(size)] for i in range(size): for j in range(i, size, 1): dist = euclidean_distance_sqrt(location[i], location[j]) sqrt_distances[i][j] = dist sqrt_distances[j][i] = dist return sqrt_distances
def __calculate_nearest_distance(self, index_cluster1, index_cluster2): """! @brief Finds two nearest objects in two specified clusters and returns distance between them. @param[in] (uint) Index of the first cluster. @param[in] (uint) Index of the second cluster. @return The nearest euclidean distance between two clusters. """ candidate_minimum_distance = float('Inf'); for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); if (distance < candidate_minimum_distance): candidate_minimum_distance = distance; return candidate_minimum_distance;
def __find_another_nearest_medoid(self, point_index, current_medoid_index): """! @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid. @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed. @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest. @return (uint) index of the another nearest medoid for the point. """ other_medoid_index = -1; other_distance_nearest = float('inf'); for index_medoid in self.__current: if (index_medoid != current_medoid_index): other_distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]); if (other_distance_candidate < other_distance_nearest): other_distance_nearest = other_distance_candidate; other_medoid_index = index_medoid; return other_medoid_index;
def __has_object_connection(self, oscillator_index1, oscillator_index2): """! @brief Searches for pair of objects that are encoded by specified neurons and that are connected in line with connectivity radius. @param[in] oscillator_index1 (uint): Index of the first oscillator in the second layer. @param[in] oscillator_index2 (uint): Index of the second oscillator in the second layer. @return (bool) True - if there is pair of connected objects encoded by specified oscillators. """ som_neuron_index1 = self._som_osc_table[oscillator_index1] som_neuron_index2 = self._som_osc_table[oscillator_index2] for index_object1 in self._som.capture_objects[som_neuron_index1]: for index_object2 in self._som.capture_objects[som_neuron_index2]: distance = euclidean_distance_sqrt(self._data[index_object1], self._data[index_object2]) if (distance <= self._radius): return True return False
def __calculate_farthest_distance(self, index_cluster1, index_cluster2): """! @brief Finds two farthest objects in two specified clusters in terms and returns distance between them. @param[in] (uint) Index of the first cluster. @param[in] (uint) Index of the second cluster. @return The farthest euclidean distance between two clusters. """ candidate_maximum_distance = 0.0 for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_sqrt( self.__pointer_data[index_object1], self.__pointer_data[index_object2]) if (distance > candidate_maximum_distance): candidate_maximum_distance = distance return candidate_maximum_distance
def __merge_by_centroid_link(self): """! @brief Merges the most similar clusters in line with centroid link type. """ minimum_centroid_distance = float('Inf'); indexes = None; for index1 in range(0, len(self.__centers)): for index2 in range(index1 + 1, len(self.__centers)): distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]); if (distance < minimum_centroid_distance): minimum_centroid_distance = distance; indexes = [index1, index2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]); self.__clusters.pop(indexes[1]); # remove merged cluster. self.__centers.pop(indexes[1]); # remove merged center.
def __calculate_initial_clusters(self, centers): """! @brief Calculate Euclidean distance to each point from the each cluster. @brief Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data. """ clusters = [[] for _ in range(len(centers))]; for index_point in range(len(self.__sample)): index_optim, dist_optim = -1, 0.0; for index in range(len(centers)): dist = euclidean_distance_sqrt(self.__sample[index_point], centers[index]); if ( (dist < dist_optim) or (index is 0)): index_optim, dist_optim = index, dist; clusters[index_optim].append(index_point); return clusters;
def __update_clusters(self, medoids): """! @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. """ self.__belong = [0] * len(self.__pointer_data); self.__clusters = [[] for i in range(len(medoids))]; for index_point in range(len(self.__pointer_data)): index_optim = -1; dist_optim = 0.0; for index in range(len(medoids)): dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[medoids[index]]); if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; self.__clusters[index_optim].append(index_point); self.__belong[index_point] = index_optim; # If cluster is not able to capture object it should be removed self.__clusters = [cluster for cluster in self.__clusters if len(cluster) > 0];
def process(self): """! @brief Performs cluster analysis in line with rules of K-Medoids algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_medoids() """ changes = float('inf') stop_condition = self.__tolerance * self.__tolerance # Fast solution #stop_condition = self.__tolerance; # Slow solution # Check for dimension if (len(self.__pointer_data[0]) != len(self.__medoids[0])): raise NameError( 'Dimension of the input data and dimension of the initial cluster medians must be equal.' ) while (changes > stop_condition): self.__clusters = self.__update_clusters() updated_medoids = self.__update_medoids() # changes should be calculated before asignment changes = max([ euclidean_distance_sqrt(self.__medoids[index], updated_medoids[index]) for index in range(len(updated_medoids)) ]) # Fast solution self.__medoids = updated_medoids
def __optimize_configuration(self): """! @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. """ index_neighbor = 0; while (index_neighbor < self.__maxneighbor): # get random current medoid that is to be replaced current_medoid_index = self.__current[random.randint(0, self.__number_clusters - 1)]; current_medoid_cluster_index = self.__belong[current_medoid_index]; # get new candidate to be medoid candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1); candidate_medoid_cluster_index = self.__belong[candidate_medoid_index]; while (candidate_medoid_index in self.__current): candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1); candidate_cost = 0.0; for point_index in range(0, len(self.__pointer_data)): if (point_index not in self.__current): # get non-medoid point and its medoid point_cluster_index = self.__belong[point_index]; point_medoid_index = self.__current[point_cluster_index]; # get other medoid that is nearest to the point (except current and candidate) other_medoid_index = self.__find_another_nearest_medoid(point_index, current_medoid_index); other_medoid_cluster_index = self.__belong[other_medoid_index]; # for optimization calculate all required distances # from the point to current medoid distance_current = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]); # from the point to candidate median distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[candidate_medoid_index]); # from the point to nearest (own) medoid distance_nearest = float('inf'); if ( (point_medoid_index != candidate_medoid_index) and (point_medoid_index != current_medoid_cluster_index) ): distance_nearest = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[point_medoid_index]); # apply rules for cost calculation if (point_cluster_index == current_medoid_cluster_index): # case 1: if (distance_candidate >= distance_nearest): candidate_cost += distance_nearest - distance_current; # case 2: else: candidate_cost += distance_candidate - distance_current; elif (point_cluster_index == other_medoid_cluster_index): # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'): if (distance_candidate > distance_nearest): pass; # case 4: else: candidate_cost += distance_candidate - distance_nearest; if (candidate_cost < 0): # set candidate that has won self.__current[current_medoid_cluster_index] = candidate_medoid_index; # recalculate clusters self.__update_clusters(self.__current); # reset iterations and starts investigation from the begining index_neighbor = 0; else: index_neighbor += 1;
def __init__(self, rows, cols, conn_type=type_conn.grid_eight, parameters=None, ccore=False): """! @brief Constructor of self-organized map. @param[in] rows (uint): Number of neurons in the column (number of rows). @param[in] cols (uint): Number of neurons in the row (number of columns). @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour). @param[in] parameters (som_parameters): Other specific parameters. @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering). """ # some of these parameters are required despite core implementation, for example, for network demonstration. self._cols = cols self._rows = rows self._size = cols * rows self._conn_type = conn_type if (parameters is not None): self._params = parameters else: self._params = som_parameters() if (self._params.init_radius is None): if ((cols + rows) / 4.0 > 1.0): self._params.init_radius = 2.0 elif ((cols > 1) and (rows > 1)): self._params.init_radius = 1.5 else: self._params.init_radius = 1.0 if (ccore is True): self.__ccore_som_pointer = wrapper.som_create( rows, cols, conn_type, self._params) else: # location self._location = list() for i in range(self._rows): for j in range(self._cols): self._location.append([float(i), float(j)]) # awards self._award = [0] * self._size self._capture_objects = [[] for i in range(self._size)] # distances self._sqrt_distances = [[[] for i in range(self._size)] for j in range(self._size)] for i in range(self._size): for j in range(i, self._size, 1): dist = euclidean_distance_sqrt(self._location[i], self._location[j]) self._sqrt_distances[i][j] = dist self._sqrt_distances[j][i] = dist # connections if (conn_type != type_conn.func_neighbor): self._create_connections(conn_type)
def __optimize_configuration(self): """! @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. """ index_neighbor = 0; while (index_neighbor < self.__maxneighbor): # get random current medoid that is to be replaced current_medoid_index = self.__current[random.randint(0, self.__number_clusters - 1)]; current_medoid_cluster_index = self.__belong[current_medoid_index]; # get new candidate to be medoid candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1); while (candidate_medoid_index in self.__current): candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1); candidate_cost = 0.0; for point_index in range(0, len(self.__pointer_data)): if (point_index not in self.__current): # get non-medoid point and its medoid point_cluster_index = self.__belong[point_index]; point_medoid_index = self.__current[point_cluster_index]; # get other medoid that is nearest to the point (except current and candidate) other_medoid_index = self.__find_another_nearest_medoid(point_index, current_medoid_index); other_medoid_cluster_index = self.__belong[other_medoid_index]; # for optimization calculate all required distances # from the point to current medoid distance_current = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]); # from the point to candidate median distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[candidate_medoid_index]); # from the point to nearest (own) medoid distance_nearest = float('inf'); if ( (point_medoid_index != candidate_medoid_index) and (point_medoid_index != current_medoid_cluster_index) ): distance_nearest = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[point_medoid_index]); # apply rules for cost calculation if (point_cluster_index == current_medoid_cluster_index): # case 1: if (distance_candidate >= distance_nearest): candidate_cost += distance_nearest - distance_current; # case 2: else: candidate_cost += distance_candidate - distance_current; elif (point_cluster_index == other_medoid_cluster_index): # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'): if (distance_candidate > distance_nearest): pass; # case 4: else: candidate_cost += distance_candidate - distance_nearest; if (candidate_cost < 0): # set candidate that has won self.__current[current_medoid_cluster_index] = candidate_medoid_index; # recalculate clusters self.__update_clusters(self.__current); # reset iterations and starts investigation from the begining index_neighbor = 0; else: index_neighbor += 1;