Example #1
0
    def find_nearest_node(self, point):
        "Search the nearest node of the given point"
        "Note: Does not work properly"
        cur_node = self.__root;
        
        best_node = None;
        best_distance = numpy.Inf;
        
        while True:
            # Check if it's best candidate and maybe it's owner of the coordinates.
            candidate_distance = euclidean_distance_sqrt(cur_node.data, point);
            if ((candidate_distance < best_distance) and (candidate_distance != 0)):
                best_node = cur_node;
                best_distance = candidate_distance;

            # Sort the children, nearer one first
            children = iter( sorted(self.children(cur_node), key = lambda node: euclidean_distance_sqrt(node.data[cur_node.disc], point[cur_node.disc])) );

            c1 = next(children, None);
            if c1:
                cur_node = c1;
                continue;

            c2 = next(children, None);
            if c2 and ( euclidean_distance_sqrt(cur_node.data[cur_node.disc], point[cur_node.disc]) < best_distance ):
                cur_node = c2;
                continue;

            return best_node;
Example #2
0
 def __merge_by_average_link(self):
     """!
     @brief Merges the most similar clusters in line with average link type.
     
     """
     
     minimum_average_distance = float('Inf');
     
     for index_cluster1 in range(0, len(self.__clusters)):
         for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
             
             # Find farthest objects
             candidate_average_distance = 0.0;
             for index_object1 in self.__clusters[index_cluster1]:
                 for index_object2 in self.__clusters[index_cluster2]:
                     candidate_average_distance += euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
             
             candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2]));
             
             if (candidate_average_distance < minimum_average_distance):
                 minimum_average_distance = candidate_average_distance;
                 indexes = [index_cluster1, index_cluster2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
     self.__clusters.pop(indexes[1]);   # remove merged cluster.  
Example #3
0
 def get_distance(self, entry, type_measurement):
     """!
     @brief Calculates distance between two clusters in line with measurement type.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
     
     @return (double) Distance between two clusters.
     
     """
     
     if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE):
         return manhattan_distance(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         return self.__get_average_inter_cluster_distance(entry);
         
     elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         return self.__get_average_intra_cluster_distance(entry);
     
     elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE):
         return self.__get_variance_increase_distance(entry);
     
     else:
         assert 0;
Example #4
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Medians algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_medians()
     
     """
     
     changes = float('inf');
      
     stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
     #stop_condition = self.__tolerance;              # Slow solution
      
     # Check for dimension
     if (len(self.__pointer_data[0]) != len(self.__medians[0])):
         raise NameError('Dimension of the input data and dimension of the initial cluster medians must be equal.');
      
     while (changes > stop_condition):
         self.__clusters = self.__update_clusters();
         updated_centers = self.__update_medians();  # changes should be calculated before asignment
      
         changes = max([euclidean_distance_sqrt(self.__medians[index], updated_centers[index]) for index in range(len(self.__medians))]);    # Fast solution
          
         self.__medians = updated_centers;
Example #5
0
    def __merge_by_signle_link(self):
        """!
        @brief Merges the most similar clusters in line with single link type.
        
        """
        
        minimum_single_distance = float('Inf');
        indexes = None;
        
        for index_cluster1 in range(0, len(self.__clusters)):
            for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
                
                # Find nearest objects
                candidate_minimum_distance = float('Inf');
                for index_object1 in self.__clusters[index_cluster1]:
                    for index_object2 in self.__clusters[index_cluster2]:
                        distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
                        if (distance < candidate_minimum_distance):
                            candidate_minimum_distance = distance;
                
                if (candidate_minimum_distance < minimum_single_distance):
                    minimum_single_distance = candidate_minimum_distance;
                    indexes = [index_cluster1, index_cluster2];

        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
        self.__clusters.pop(indexes[1]);   # remove merged cluster.
Example #6
0
 def __update_clusters(self, centers, available_indexes = None):
     """!
     @brief Calculates Euclidean distance to each point from the each cluster.
            Nearest points are captured by according clusters and as a result clusters are updated.
            
     @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
     @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
     
     @return (list) Updated clusters.
     
     """
         
     bypass = None;
     if (available_indexes is None):
         bypass = range(len(self.__pointer_data));
     else:
         bypass = available_indexes;
       
     clusters = [[] for i in range(len(centers))];
     for index_point in bypass:
         index_optim = -1;
         dist_optim = 0.0;
           
         for index in range(len(centers)):
             # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]);      # Fast solution
               
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
           
         clusters[index_optim].append(index_point);
           
     return clusters;
Example #7
0
 def get_distance_matrix(self):
     """!
     @brief Calculates distance matrix (U-matrix).
     @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
     
     @return (list) Distance matrix (U-matrix).
     
     @see show_distance_matrix()
     @see get_density_matrix()
     
     """
     if (self.__ccore_som_pointer is not None):
         self._weights = wrapper.som_get_weights(self.__ccore_som_pointer);
         
         if (self._conn_type != type_conn.func_neighbor):
             self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer);
         
     distance_matrix = [ [0.0] * self._cols for i in range(self._rows) ];
     
     for i in range(self._rows):
         for j in range(self._cols):
             neuron_index = i * self._cols + j;
             
             if (self._conn_type == type_conn.func_neighbor):
                 self._create_connections(type_conn.grid_eight);
             
             for neighbor_index in self._neighbors[neuron_index]:
                 distance_matrix[i][j] += euclidean_distance_sqrt(self._weights[neuron_index], self._weights[neighbor_index]);
                 
             distance_matrix[i][j] /= len(self._neighbors[neuron_index]);
 
     return distance_matrix;
Example #8
0
 def __improve_parameters(self, centers, available_indexes = None):
     """!
     @brief Performs k-means clustering in the specified region.
     
     @param[in] centers (list): Centers of clusters.
     @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None - then all points are used.
     
     @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
     
     """
     
     changes = numpy.Inf;
     
     stop_condition = self.__tolerance * self.__tolerance; # Fast solution
       
     clusters = [];
       
     while (changes > stop_condition):
         clusters = self.__update_clusters(centers, available_indexes);
         clusters = [ cluster for cluster in clusters if len(cluster) > 0 ]; 
         
         updated_centers = self.__update_centers(clusters);
       
         changes = max([euclidean_distance_sqrt(centers[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
           
         centers = updated_centers;
       
     return (clusters, centers);
Example #9
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Means algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_centers()
     
     """
     
     if (self.__ccore is True):
         self.__clusters = wrapper.kmeans(self.__pointer_data, self.__centers, self.__tolerance);
         self.__centers = self.__update_centers();
     else: 
         changes = float('inf');
          
         stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
         #stop_condition = self.__tolerance;              # Slow solution
          
         # Check for dimension
         if (len(self.__pointer_data[0]) != len(self.__centers[0])):
             raise NameError('Dimension of the input data and dimension of the initial cluster centers must be equal.');
          
         while (changes > stop_condition):
             self.__clusters = self.__update_clusters();
             updated_centers = self.__update_centers();  # changes should be calculated before asignment
          
             #changes = max([euclidean_distance(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]);        # Slow solution
             changes = max([euclidean_distance_sqrt(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]);    # Fast solution
              
             self.__centers = updated_centers;
Example #10
0
    def get_distance(self, entry, type_measurement):
        """!
        @brief Calculates distance between two clusters in line with measurement type.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
        
        @return (double) Distance between two clusters.
        
        """

        if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
            return euclidean_distance_sqrt(entry.get_centroid(),
                                           self.get_centroid())

        elif (type_measurement is
              measurement_type.CENTROID_MANHATTAN_DISTANCE):
            return manhattan_distance(entry.get_centroid(),
                                      self.get_centroid())

        elif (type_measurement is
              measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
            return self.__get_average_inter_cluster_distance(entry)

        elif (type_measurement is
              measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
            return self.__get_average_intra_cluster_distance(entry)

        elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE):
            return self.__get_variance_increase_distance(entry)

        else:
            assert 0
Example #11
0
 def get_distance_matrix(self):
     """!
     @brief Calculates distance matrix (U-matrix).
     @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
     
     @return (list) Distance matrix (U-matrix).
     
     @see show_distance_matrix()
     @see get_density_matrix()
     
     """
     if (self.__ccore_som_pointer is not None):
         self._weights = wrapper.som_get_weights(self.__ccore_som_pointer);
         
         if (self._conn_type != type_conn.func_neighbor):
             self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer);
         
     distance_matrix = [ [0.0] * self._cols for i in range(self._rows) ];
     
     for i in range(self._rows):
         for j in range(self._cols):
             neuron_index = i * self._cols + j;
             
             if (self._conn_type == type_conn.func_neighbor):
                 self._create_connections(type_conn.grid_eight);
             
             for neighbor_index in self._neighbors[neuron_index]:
                 distance_matrix[i][j] += euclidean_distance_sqrt(self._weights[neuron_index], self._weights[neighbor_index]);
                 
             distance_matrix[i][j] /= len(self._neighbors[neuron_index]);
 
     return distance_matrix;
Example #12
0
 def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node, best_nodes):
     """!
     @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance.
     
     @param[in] point (list): Coordinates that is considered as centroind for searching
     @param[in] distance (double): Distance from the center where seaching is performed.
     @param[in] sqrt_distance (double): Square distance from the center where searching is performed.
     @param[in] node (node): Node from that searching is performed.
     @param[in|out] best_nodes (list): List of founded nodes.
     
     """
     
     minimum = node.data[node.disc] - distance;
     maximum = node.data[node.disc] + distance;
     
     if (node.right is not None):
         if (point[node.disc] >= minimum):
             self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.right, best_nodes);
     
     if (node.left is not None):
         if (point[node.disc] < maximum):
             self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.left, best_nodes);
     
     candidate_distance = euclidean_distance_sqrt(point, node.data);
     if (candidate_distance <= sqrt_distance):
         best_nodes.append( (candidate_distance, node) );
Example #13
0
 def _competition(self, x):
     """!
     @brief Calculates neuron winner (distance, neuron index).
     
     @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
     
     @return (uint) Returns index of neuron that is winner.
     
     """
     
     index = 0;
     minimum = euclidean_distance_sqrt(self._weights[0], x);
     
     for i in range(1, self._size, 1):
         candidate = euclidean_distance_sqrt(self._weights[i], x);
         if (candidate < minimum):
             index = i;
             minimum = candidate;
     
     return index;
Example #14
0
 def _competition(self, x):
     """!
     @brief Calculates neuron winner (distance, neuron index).
     
     @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
     
     @return (uint) Returns index of neuron that is winner.
     
     """
     
     index = 0;
     minimum = euclidean_distance_sqrt(self._weights[0], x);
     
     for i in range(1, self._size, 1):
         candidate = euclidean_distance_sqrt(self._weights[i], x);
         if (candidate < minimum):
             index = i;
             minimum = candidate;
     
     return index;
Example #15
0
 def __neighbor_indexes(self, point):
     """!
     @brief Return list of indexes of neighbors of specified point for the data.
     
     @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius.
     
     @return (list) Return list of indexes of neighbors in line the connectivity radius.
     
     """
     
     # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]];    # Slow mode
     return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[point], self.__pointer_data[i]) <= self.__sqrt_eps and self.__pointer_data[i] != self.__pointer_data[point]]; # Fast mode
Example #16
0
    def __neighbor_indexes(self, point):
        """!
        @brief Return list of indexes of neighbors of specified point for the data.
        
        @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius.
        
        @return (list) Return list of indexes of neighbors in line the connectivity radius.
        
        """

        # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]];    # Slow mode
        return [
            i for i in range(0, len(self.__pointer_data))
            if euclidean_distance_sqrt(self.__pointer_data[point], self.
                                       __pointer_data[i]) <= self.__sqrt_eps
            and self.__pointer_data[i] != self.__pointer_data[point]
        ]
Example #17
0
    def __find_nearest_clusters(self):
        """!
        @brief Find two indexes of two clusters whose distance is the smallest.
        
        @return (list) List with two indexes of two clusters whose distance is the smallest.
        
        """

        min_dist = 0
        indexes = None

        for index1 in range(0, len(self.__centers)):
            for index2 in range(index1 + 1, len(self.__centers)):
                distance = euclidean_distance_sqrt(self.__centers[index1],
                                                   self.__centers[index2])
                if ((distance < min_dist) or (indexes == None)):
                    min_dist = distance
                    indexes = [index1, index2]

        return indexes
Example #18
0
 def __merge_by_centroid_link(self):
     """!
     @brief Merges the most similar clusters in line with centroid link type.
     
     """
     
     minimum_centroid_distance = float('Inf');
     indexes = None;
     
     for index1 in range(0, len(self.__centers)):
         for index2 in range(index1 + 1, len(self.__centers)):
             distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]);
             if (distance < minimum_centroid_distance):
                 minimum_centroid_distance = distance;
                 indexes = [index1, index2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];
     self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]);
      
     self.__clusters.pop(indexes[1]);   # remove merged cluster.
     self.__centers.pop(indexes[1]);    # remove merged center.
Example #19
0
 def __update_clusters(self):
     """!
     @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for i in range(len(self.__centers))];
     for index_point in range(len(self.__pointer_data)):
         index_optim = -1;
         dist_optim = 0.0;
          
         for index in range(len(self.__centers)):
             # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]);      # Fast solution
              
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
          
         clusters[index_optim].append(index_point);
          
     return clusters;
Example #20
0
 def __update_clusters(self):
     """!
     @brief Calculate Manhattan distance to each point from the each cluster. 
     @details Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for i in range(len(self.__medians))];
     for index_point in range(len(self.__pointer_data)):
         index_optim = -1;
         dist_optim = 0.0;
          
         for index in range(len(self.__medians)):
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]);
              
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
          
         clusters[index_optim].append(index_point);
          
     return clusters;
Example #21
0
 def __init__(self, rows, cols, data, epochs, conn_type = type_conn.grid_eight, parameters = None, ccore = False):
     """!
     @brief Constructor of self-organized map.
     
     @param[in] rows (uint): Number of neurons in the column (number of rows).
     @param[in] cols (uint): Number of neurons in the row (number of columns).
     @param[in] data (list): Input data - list of points where each point is represented by list of features, for example coordinates.
     @param[in] epochs (uint): Number of epochs for training.
     @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour).
     @param[in] parameters (som_parameters): Other specific parameters.
     @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering).
     
     """
     
     # some of these parameters are required despite core implementation, for example, for network demonstration.
     self._cols = cols;
     self._rows = rows;        
     self._data = data;
     self._size = cols * rows;
     self._epochs = epochs;
     self._conn_type = conn_type;
     
     if (parameters is not None):
         self._params = parameters;
     else:
         self._params = som_parameters();
         
     if (self._params.init_radius is None):
         if ((cols + rows) / 4.0 > 1.0): 
             self._params.init_radius = 2.0;
         elif ( (cols > 1) and (rows > 1) ): 
             self._params.init_radius = 1.5;
         else: 
             self._params.init_radius = 1.0;
     
     if (ccore is True):
         self.__ccore_som_pointer = wrapper.som_create(data, rows, cols, epochs, conn_type, self._params);
         
     else:
         # location
         self._location = list();
         for i in range(self._rows):
             for j in range(self._cols):
                 self._location.append([float(i), float(j)]);
         
         # awards
         self._award = [0] * self._size;
         self._capture_objects = [ [] for i in range(self._size) ];
         
         # distances
         self._sqrt_distances = [ [ [] for i in range(self._size) ] for j in range(self._size) ];
         for i in range(self._size):
             for j in range(i, self._size, 1):
                 dist = euclidean_distance_sqrt(self._location[i], self._location[j]);
                 self._sqrt_distances[i][j] = dist;
                 self._sqrt_distances[j][i] = dist;
     
         # connections
         if (conn_type != type_conn.func_neighbor):
             self._create_connections(conn_type);
         
         # weights
         self._create_initial_weights(self._params.init_type);
Example #22
0
 def __init__(self, rows, cols, data, epochs, conn_type = type_conn.grid_eight, parameters = None, ccore = False):
     """!
     @brief Constructor of self-organized map.
     
     @param[in] rows (uint): Number of neurons in the column (number of rows).
     @param[in] cols (uint): Number of neurons in the row (number of columns).
     @param[in] data (list): Input data - list of points where each point is represented by list of features, for example coordinates.
     @param[in] epochs (uint): Number of epochs for training.
     @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour).
     @param[in] parameters (som_parameters): Other specific parameters.
     @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering).
     
     """
     
     # some of these parameters are required despite core implementation, for example, for network demonstration.
     self._cols = cols;
     self._rows = rows;        
     self._data = data;
     self._size = cols * rows;
     self._epochs = epochs;
     self._conn_type = conn_type;
     
     if (parameters is not None):
         self._params = parameters;
     else:
         self._params = som_parameters();
         
     if (self._params.init_radius is None):
         if ((cols + rows) / 4.0 > 1.0): 
             self._params.init_radius = 2.0;
         elif ( (cols > 1) and (rows > 1) ): 
             self._params.init_radius = 1.5;
         else: 
             self._params.init_radius = 1.0;
     
     if (ccore is True):
         self.__ccore_som_pointer = wrapper.som_create(data, rows, cols, epochs, conn_type, self._params);
         
     else:
         # location
         self._location = list();
         for i in range(self._rows):
             for j in range(self._cols):
                 self._location.append([float(i), float(j)]);
         
         # awards
         self._award = [0] * self._size;
         self._capture_objects = [ [] for i in range(self._size) ];
         
         # distances
         self._sqrt_distances = [ [ [] for i in range(self._size) ] for j in range(self._size) ];
         for i in range(self._size):
             for j in range(i, self._size, 1):
                 dist = euclidean_distance_sqrt(self._location[i], self._location[j]);
                 self._sqrt_distances[i][j] = dist;
                 self._sqrt_distances[j][i] = dist;
     
         # connections
         if (conn_type != type_conn.func_neighbor):
             self._create_connections(conn_type);
         
         # weights
         self._create_initial_weights(self._params.init_type);