def find_nearest_node(self, point): "Search the nearest node of the given point" "Note: Does not work properly" cur_node = self.__root; best_node = None; best_distance = numpy.Inf; while True: # Check if it's best candidate and maybe it's owner of the coordinates. candidate_distance = euclidean_distance_sqrt(cur_node.data, point); if ((candidate_distance < best_distance) and (candidate_distance != 0)): best_node = cur_node; best_distance = candidate_distance; # Sort the children, nearer one first children = iter( sorted(self.children(cur_node), key = lambda node: euclidean_distance_sqrt(node.data[cur_node.disc], point[cur_node.disc])) ); c1 = next(children, None); if c1: cur_node = c1; continue; c2 = next(children, None); if c2 and ( euclidean_distance_sqrt(cur_node.data[cur_node.disc], point[cur_node.disc]) < best_distance ): cur_node = c2; continue; return best_node;
def __merge_by_average_link(self): """! @brief Merges the most similar clusters in line with average link type. """ minimum_average_distance = float('Inf'); for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find farthest objects candidate_average_distance = 0.0; for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: candidate_average_distance += euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2])); if (candidate_average_distance < minimum_average_distance): minimum_average_distance = candidate_average_distance; indexes = [index_cluster1, index_cluster2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__clusters.pop(indexes[1]); # remove merged cluster.
def get_distance(self, entry, type_measurement): """! @brief Calculates distance between two clusters in line with measurement type. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters. @return (double) Distance between two clusters. """ if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE): return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid()); elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE): return manhattan_distance(entry.get_centroid(), self.get_centroid()); elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): return self.__get_average_inter_cluster_distance(entry); elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): return self.__get_average_intra_cluster_distance(entry); elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE): return self.__get_variance_increase_distance(entry); else: assert 0;
def process(self): """! @brief Performs cluster analysis in line with rules of K-Medians algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_medians() """ changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution # Check for dimension if (len(self.__pointer_data[0]) != len(self.__medians[0])): raise NameError('Dimension of the input data and dimension of the initial cluster medians must be equal.'); while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_centers = self.__update_medians(); # changes should be calculated before asignment changes = max([euclidean_distance_sqrt(self.__medians[index], updated_centers[index]) for index in range(len(self.__medians))]); # Fast solution self.__medians = updated_centers;
def __merge_by_signle_link(self): """! @brief Merges the most similar clusters in line with single link type. """ minimum_single_distance = float('Inf'); indexes = None; for index_cluster1 in range(0, len(self.__clusters)): for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)): # Find nearest objects candidate_minimum_distance = float('Inf'); for index_object1 in self.__clusters[index_cluster1]: for index_object2 in self.__clusters[index_cluster2]: distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]); if (distance < candidate_minimum_distance): candidate_minimum_distance = distance; if (candidate_minimum_distance < minimum_single_distance): minimum_single_distance = candidate_minimum_distance; indexes = [index_cluster1, index_cluster2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__clusters.pop(indexes[1]); # remove merged cluster.
def __update_clusters(self, centers, available_indexes = None): """! @brief Calculates Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...]. @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used. @return (list) Updated clusters. """ bypass = None; if (available_indexes is None): bypass = range(len(self.__pointer_data)); else: bypass = available_indexes; clusters = [[] for i in range(len(centers))]; for index_point in bypass: index_optim = -1; dist_optim = 0.0; for index in range(len(centers)): # dist = euclidean_distance(data[index_point], centers[index]); # Slow solution dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]); # Fast solution if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def get_distance_matrix(self): """! @brief Calculates distance matrix (U-matrix). @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map. @return (list) Distance matrix (U-matrix). @see show_distance_matrix() @see get_density_matrix() """ if (self.__ccore_som_pointer is not None): self._weights = wrapper.som_get_weights(self.__ccore_som_pointer); if (self._conn_type != type_conn.func_neighbor): self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer); distance_matrix = [ [0.0] * self._cols for i in range(self._rows) ]; for i in range(self._rows): for j in range(self._cols): neuron_index = i * self._cols + j; if (self._conn_type == type_conn.func_neighbor): self._create_connections(type_conn.grid_eight); for neighbor_index in self._neighbors[neuron_index]: distance_matrix[i][j] += euclidean_distance_sqrt(self._weights[neuron_index], self._weights[neighbor_index]); distance_matrix[i][j] /= len(self._neighbors[neuron_index]); return distance_matrix;
def __improve_parameters(self, centers, available_indexes = None): """! @brief Performs k-means clustering in the specified region. @param[in] centers (list): Centers of clusters. @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None - then all points are used. @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data. """ changes = numpy.Inf; stop_condition = self.__tolerance * self.__tolerance; # Fast solution clusters = []; while (changes > stop_condition): clusters = self.__update_clusters(centers, available_indexes); clusters = [ cluster for cluster in clusters if len(cluster) > 0 ]; updated_centers = self.__update_centers(clusters); changes = max([euclidean_distance_sqrt(centers[index], updated_centers[index]) for index in range(len(updated_centers))]); # Fast solution centers = updated_centers; return (clusters, centers);
def process(self): """! @brief Performs cluster analysis in line with rules of K-Means algorithm. @remark Results of clustering can be obtained using corresponding get methods. @see get_clusters() @see get_centers() """ if (self.__ccore is True): self.__clusters = wrapper.kmeans(self.__pointer_data, self.__centers, self.__tolerance); self.__centers = self.__update_centers(); else: changes = float('inf'); stop_condition = self.__tolerance * self.__tolerance; # Fast solution #stop_condition = self.__tolerance; # Slow solution # Check for dimension if (len(self.__pointer_data[0]) != len(self.__centers[0])): raise NameError('Dimension of the input data and dimension of the initial cluster centers must be equal.'); while (changes > stop_condition): self.__clusters = self.__update_clusters(); updated_centers = self.__update_centers(); # changes should be calculated before asignment #changes = max([euclidean_distance(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]); # Slow solution changes = max([euclidean_distance_sqrt(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]); # Fast solution self.__centers = updated_centers;
def get_distance(self, entry, type_measurement): """! @brief Calculates distance between two clusters in line with measurement type. @param[in] entry (cfentry): Clustering feature to which distance should be obtained. @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters. @return (double) Distance between two clusters. """ if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE): return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid()) elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE): return manhattan_distance(entry.get_centroid(), self.get_centroid()) elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): return self.__get_average_inter_cluster_distance(entry) elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): return self.__get_average_intra_cluster_distance(entry) elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE): return self.__get_variance_increase_distance(entry) else: assert 0
def get_distance_matrix(self): """! @brief Calculates distance matrix (U-matrix). @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map. @return (list) Distance matrix (U-matrix). @see show_distance_matrix() @see get_density_matrix() """ if (self.__ccore_som_pointer is not None): self._weights = wrapper.som_get_weights(self.__ccore_som_pointer); if (self._conn_type != type_conn.func_neighbor): self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer); distance_matrix = [ [0.0] * self._cols for i in range(self._rows) ]; for i in range(self._rows): for j in range(self._cols): neuron_index = i * self._cols + j; if (self._conn_type == type_conn.func_neighbor): self._create_connections(type_conn.grid_eight); for neighbor_index in self._neighbors[neuron_index]: distance_matrix[i][j] += euclidean_distance_sqrt(self._weights[neuron_index], self._weights[neighbor_index]); distance_matrix[i][j] /= len(self._neighbors[neuron_index]); return distance_matrix;
def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node, best_nodes): """! @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance. @param[in] point (list): Coordinates that is considered as centroind for searching @param[in] distance (double): Distance from the center where seaching is performed. @param[in] sqrt_distance (double): Square distance from the center where searching is performed. @param[in] node (node): Node from that searching is performed. @param[in|out] best_nodes (list): List of founded nodes. """ minimum = node.data[node.disc] - distance; maximum = node.data[node.disc] + distance; if (node.right is not None): if (point[node.disc] >= minimum): self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.right, best_nodes); if (node.left is not None): if (point[node.disc] < maximum): self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.left, best_nodes); candidate_distance = euclidean_distance_sqrt(point, node.data); if (candidate_distance <= sqrt_distance): best_nodes.append( (candidate_distance, node) );
def _competition(self, x): """! @brief Calculates neuron winner (distance, neuron index). @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point. @return (uint) Returns index of neuron that is winner. """ index = 0; minimum = euclidean_distance_sqrt(self._weights[0], x); for i in range(1, self._size, 1): candidate = euclidean_distance_sqrt(self._weights[i], x); if (candidate < minimum): index = i; minimum = candidate; return index;
def _competition(self, x): """! @brief Calculates neuron winner (distance, neuron index). @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point. @return (uint) Returns index of neuron that is winner. """ index = 0; minimum = euclidean_distance_sqrt(self._weights[0], x); for i in range(1, self._size, 1): candidate = euclidean_distance_sqrt(self._weights[i], x); if (candidate < minimum): index = i; minimum = candidate; return index;
def __neighbor_indexes(self, point): """! @brief Return list of indexes of neighbors of specified point for the data. @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius. @return (list) Return list of indexes of neighbors in line the connectivity radius. """ # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]]; # Slow mode return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[point], self.__pointer_data[i]) <= self.__sqrt_eps and self.__pointer_data[i] != self.__pointer_data[point]]; # Fast mode
def __neighbor_indexes(self, point): """! @brief Return list of indexes of neighbors of specified point for the data. @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius. @return (list) Return list of indexes of neighbors in line the connectivity radius. """ # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]]; # Slow mode return [ i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[point], self. __pointer_data[i]) <= self.__sqrt_eps and self.__pointer_data[i] != self.__pointer_data[point] ]
def __find_nearest_clusters(self): """! @brief Find two indexes of two clusters whose distance is the smallest. @return (list) List with two indexes of two clusters whose distance is the smallest. """ min_dist = 0 indexes = None for index1 in range(0, len(self.__centers)): for index2 in range(index1 + 1, len(self.__centers)): distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]) if ((distance < min_dist) or (indexes == None)): min_dist = distance indexes = [index1, index2] return indexes
def __merge_by_centroid_link(self): """! @brief Merges the most similar clusters in line with centroid link type. """ minimum_centroid_distance = float('Inf'); indexes = None; for index1 in range(0, len(self.__centers)): for index2 in range(index1 + 1, len(self.__centers)): distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]); if (distance < minimum_centroid_distance): minimum_centroid_distance = distance; indexes = [index1, index2]; self.__clusters[indexes[0]] += self.__clusters[indexes[1]]; self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]); self.__clusters.pop(indexes[1]); # remove merged cluster. self.__centers.pop(indexes[1]); # remove merged center.
def __update_clusters(self): """! @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data. """ clusters = [[] for i in range(len(self.__centers))]; for index_point in range(len(self.__pointer_data)): index_optim = -1; dist_optim = 0.0; for index in range(len(self.__centers)): # dist = euclidean_distance(data[index_point], centers[index]); # Slow solution dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]); # Fast solution if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def __update_clusters(self): """! @brief Calculate Manhattan distance to each point from the each cluster. @details Nearest points are captured by according clusters and as a result clusters are updated. @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. """ clusters = [[] for i in range(len(self.__medians))]; for index_point in range(len(self.__pointer_data)): index_optim = -1; dist_optim = 0.0; for index in range(len(self.__medians)): dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]); if ( (dist < dist_optim) or (index is 0)): index_optim = index; dist_optim = dist; clusters[index_optim].append(index_point); return clusters;
def __init__(self, rows, cols, data, epochs, conn_type = type_conn.grid_eight, parameters = None, ccore = False): """! @brief Constructor of self-organized map. @param[in] rows (uint): Number of neurons in the column (number of rows). @param[in] cols (uint): Number of neurons in the row (number of columns). @param[in] data (list): Input data - list of points where each point is represented by list of features, for example coordinates. @param[in] epochs (uint): Number of epochs for training. @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour). @param[in] parameters (som_parameters): Other specific parameters. @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering). """ # some of these parameters are required despite core implementation, for example, for network demonstration. self._cols = cols; self._rows = rows; self._data = data; self._size = cols * rows; self._epochs = epochs; self._conn_type = conn_type; if (parameters is not None): self._params = parameters; else: self._params = som_parameters(); if (self._params.init_radius is None): if ((cols + rows) / 4.0 > 1.0): self._params.init_radius = 2.0; elif ( (cols > 1) and (rows > 1) ): self._params.init_radius = 1.5; else: self._params.init_radius = 1.0; if (ccore is True): self.__ccore_som_pointer = wrapper.som_create(data, rows, cols, epochs, conn_type, self._params); else: # location self._location = list(); for i in range(self._rows): for j in range(self._cols): self._location.append([float(i), float(j)]); # awards self._award = [0] * self._size; self._capture_objects = [ [] for i in range(self._size) ]; # distances self._sqrt_distances = [ [ [] for i in range(self._size) ] for j in range(self._size) ]; for i in range(self._size): for j in range(i, self._size, 1): dist = euclidean_distance_sqrt(self._location[i], self._location[j]); self._sqrt_distances[i][j] = dist; self._sqrt_distances[j][i] = dist; # connections if (conn_type != type_conn.func_neighbor): self._create_connections(conn_type); # weights self._create_initial_weights(self._params.init_type);
def __init__(self, rows, cols, data, epochs, conn_type = type_conn.grid_eight, parameters = None, ccore = False): """! @brief Constructor of self-organized map. @param[in] rows (uint): Number of neurons in the column (number of rows). @param[in] cols (uint): Number of neurons in the row (number of columns). @param[in] data (list): Input data - list of points where each point is represented by list of features, for example coordinates. @param[in] epochs (uint): Number of epochs for training. @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour). @param[in] parameters (som_parameters): Other specific parameters. @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering). """ # some of these parameters are required despite core implementation, for example, for network demonstration. self._cols = cols; self._rows = rows; self._data = data; self._size = cols * rows; self._epochs = epochs; self._conn_type = conn_type; if (parameters is not None): self._params = parameters; else: self._params = som_parameters(); if (self._params.init_radius is None): if ((cols + rows) / 4.0 > 1.0): self._params.init_radius = 2.0; elif ( (cols > 1) and (rows > 1) ): self._params.init_radius = 1.5; else: self._params.init_radius = 1.0; if (ccore is True): self.__ccore_som_pointer = wrapper.som_create(data, rows, cols, epochs, conn_type, self._params); else: # location self._location = list(); for i in range(self._rows): for j in range(self._cols): self._location.append([float(i), float(j)]); # awards self._award = [0] * self._size; self._capture_objects = [ [] for i in range(self._size) ]; # distances self._sqrt_distances = [ [ [] for i in range(self._size) ] for j in range(self._size) ]; for i in range(self._size): for j in range(i, self._size, 1): dist = euclidean_distance_sqrt(self._location[i], self._location[j]); self._sqrt_distances[i][j] = dist; self._sqrt_distances[j][i] = dist; # connections if (conn_type != type_conn.func_neighbor): self._create_connections(conn_type); # weights self._create_initial_weights(self._params.init_type);