Beispiel #1
0
    def get_distance(self, entry, type_measurement):
        """!
        @brief Calculates distance between two clusters in line with measurement type.
        
        @details In case of usage CENTROID_EUCLIDIAN_DISTANCE square euclidian distance will be returned.
                 Square root should be taken from the result for obtaining real euclidian distance between
                 entries. 
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
        
        @return (double) Distance between two clusters.
        
        """
        
        if type_measurement is measurement_type.CENTROID_EUCLIDEAN_DISTANCE:
            return euclidean_distance_square(entry.get_centroid(), self.get_centroid())
        
        elif type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE:
            return manhattan_distance(entry.get_centroid(), self.get_centroid())
        
        elif type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE:
            return self.__get_average_inter_cluster_distance(entry)
            
        elif type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE:
            return self.__get_average_intra_cluster_distance(entry)
        
        elif type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE:
            return self.__get_variance_increase_distance(entry)

        else:
            raise ValueError("Unsupported type of measurement '%s' is specified." % type_measurement)
Beispiel #2
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance21 = entry2.get_distance(entry1, type_measurement);
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001;
     if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
Beispiel #3
0
 def get_distance(self, entry, type_measurement):
     """!
     @brief Calculates distance between two clusters in line with measurement type.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
     
     @return (double) Distance between two clusters.
     
     """
     
     if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE):
         return manhattan_distance(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         return self.__get_average_inter_cluster_distance(entry);
         
     elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         return self.__get_average_intra_cluster_distance(entry);
     
     elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE):
         return self.__get_variance_increase_distance(entry);
     
     else:
         assert 0;
Beispiel #4
0
    def __update_clusters(self, medoids):
        """!
         @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. 
         
         """

        self.__belong = [0] * len(self.__pointer_data)
        self.__clusters = [[] for i in range(len(medoids))]
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1
            dist_optim = 0.0

            for index in range(len(medoids)):
                dist = manhattan_distance(self.__pointer_data[index_point],
                                          self.__pointer_data[medoids[index]])

                if (dist < dist_optim) or (index == 0):
                    index_optim = index
                    dist_optim = dist

            self.__clusters[index_optim].append(index_point)
            self.__belong[index_point] = index_optim

        # If cluster is not able to capture object it should be removed
        self.__clusters = [
            cluster for cluster in self.__clusters if len(cluster) > 0
        ]
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1))
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2))
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement)
     distance21 = entry2.get_distance(entry1, type_measurement)
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001
     if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE):
         assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
Beispiel #6
0
def cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()

    print("Measurements for:", path_sample)

    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters),
                                    1):
            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

            print("\tDistance", distance_type, "from", index_cluster, "to",
                  index_neighbor, "is:", distance)
Beispiel #7
0
    def __calculate_estimation(self):
        """!
         @brief Calculates estimation (cost) of the current clusters. The lower the estimation,
                the more optimally configuration of clusters.
         
         @return (double) estimation of current clusters.
         
         """
        estimation = 0.0
        for index_cluster in range(0, len(self.__clusters)):
            cluster = self.__clusters[index_cluster]
            index_medoid = self.__current[index_cluster]
            for index_point in cluster:
                estimation += manhattan_distance(
                    self.__pointer_data[index_point],
                    self.__pointer_data[index_medoid])

        return estimation
Beispiel #8
0
def cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    
    print("Measurements for:", path_sample);
    
    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters), 1):
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
        
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
            
            print("\tDistance", distance_type, "from", index_cluster, "to", index_neighbor, "is:", distance);
Beispiel #9
0
    def __find_another_nearest_medoid(self, point_index, current_medoid_index):
        """!
         @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid. 
         
         @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed.
         @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest.
         
         @return (uint) index of the another nearest medoid for the point.
         
         """
        other_medoid_index = -1
        other_distance_nearest = float('inf')
        for index_medoid in self.__current:
            if (index_medoid != current_medoid_index):
                other_distance_candidate = manhattan_distance(
                    self.__pointer_data[point_index],
                    self.__pointer_data[current_medoid_index])

                if other_distance_candidate < other_distance_nearest:
                    other_distance_nearest = other_distance_candidate
                    other_medoid_index = index_medoid

        return other_medoid_index
Beispiel #10
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    ajacency = [[0] * amount_clusters for i in range(amount_clusters)]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()
    stage = utils.draw_clusters(sample,
                                obtained_clusters,
                                display_result=False)

    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ((index_cluster == index_neighbor_cluster) or
                (ajacency[index_cluster][index_neighbor_cluster] is True)):
                continue

            ajacency[index_cluster][index_neighbor_cluster] = True
            ajacency[index_neighbor_cluster][index_cluster] = True

            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor_cluster]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None
            x_index_maximum, y_index_maximum = 1, 1

            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0]
                x_minimum = center_cluster1[0]
                x_index_maximum = 1
            else:
                x_maximum = center_cluster1[0]
                x_minimum = center_cluster2[0]
                x_index_maximum = -1

            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1]
                y_minimum = center_cluster1[1]
                y_index_maximum = 1
            else:
                y_maximum = center_cluster1[1]
                y_minimum = center_cluster2[1]
                y_index_maximum = -1

            print("Cluster 1:", cluster1, ", center:", center_cluster1)
            print("Cluster 2:", cluster2, ", center:", center_cluster2)

            stage.annotate(s='',
                           xy=(center_cluster1[0], center_cluster1[1]),
                           xytext=(center_cluster2[0], center_cluster2[1]),
                           arrowprops=dict(arrowstyle='<->'))

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

                print("\tCluster distance -", distance_type, ":", distance)

                x_multiplier = index_distance_type + 3
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3

                y_multiplier = index_distance_type + 3
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3

                x_text = x_multiplier * (x_maximum - x_minimum) / (
                    len(distances) + 6) + x_minimum
                y_text = y_multiplier * (y_maximum - y_minimum) / (
                    len(distances) + 6) + y_minimum

                #print(x_text, y_text, "\n");
                stage.text(x_text,
                           y_text,
                           distance_type + " {:.3f}".format(distance),
                           fontsize=9,
                           color='blue')

    plt.show()
Beispiel #11
0
    def __optimize_configuration(self):
        """!
         @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. 
         
         """
        index_neighbor = 0
        while (index_neighbor < self.__maxneighbor):
            # get random current medoid that is to be replaced
            current_medoid_index = self.__current[random.randint(
                0, self.__number_clusters - 1)]
            current_medoid_cluster_index = self.__belong[current_medoid_index]

            # get new candidate to be medoid
            candidate_medoid_index = random.randint(
                0,
                len(self.__pointer_data) - 1)

            while candidate_medoid_index in self.__current:
                candidate_medoid_index = random.randint(
                    0,
                    len(self.__pointer_data) - 1)

            candidate_cost = 0.0
            for point_index in range(0, len(self.__pointer_data)):
                if point_index not in self.__current:
                    # get non-medoid point and its medoid
                    point_cluster_index = self.__belong[point_index]
                    point_medoid_index = self.__current[point_cluster_index]

                    # get other medoid that is nearest to the point (except current and candidate)
                    other_medoid_index = self.__find_another_nearest_medoid(
                        point_index, current_medoid_index)
                    other_medoid_cluster_index = self.__belong[
                        other_medoid_index]

                    # for optimization calculate all required distances
                    # from the point to current medoid
                    distance_current = manhattan_distance(
                        self.__pointer_data[point_index],
                        self.__pointer_data[current_medoid_index])

                    # from the point to candidate median
                    distance_candidate = manhattan_distance(
                        self.__pointer_data[point_index],
                        self.__pointer_data[candidate_medoid_index])

                    # from the point to nearest (own) medoid
                    distance_nearest = float('inf')
                    if ((point_medoid_index != candidate_medoid_index) and
                        (point_medoid_index != current_medoid_cluster_index)):
                        distance_nearest = manhattan_distance(
                            self.__pointer_data[point_index],
                            self.__pointer_data[point_medoid_index])

                    # apply rules for cost calculation
                    if (point_cluster_index == current_medoid_cluster_index):
                        # case 1:
                        if (distance_candidate >= distance_nearest):
                            candidate_cost += distance_nearest - distance_current

                        # case 2:
                        else:
                            candidate_cost += distance_candidate - distance_current

                    elif (point_cluster_index == other_medoid_cluster_index):
                        # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'):
                        if (distance_candidate > distance_nearest):
                            pass

                        # case 4:
                        else:
                            candidate_cost += distance_candidate - distance_nearest

            if (candidate_cost < 0):
                # set candidate that has won
                self.__current[
                    current_medoid_cluster_index] = candidate_medoid_index

                # recalculate clusters
                self.__update_clusters(self.__current)

                # reset iterations and starts investigation from the begining
                index_neighbor = 0

            else:
                index_neighbor += 1
Beispiel #12
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    ajacency = [ [0] * amount_clusters for i in range(amount_clusters) ];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    stage = utils.draw_clusters(sample, obtained_clusters, display_result = False);
    
    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ( (index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True) ):
                continue;
            
            ajacency[index_cluster][index_neighbor_cluster] = True;
            ajacency[index_neighbor_cluster][index_cluster] = True;
            
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor_cluster];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None;
            x_index_maximum, y_index_maximum = 1, 1;
            
            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0];
                x_minimum = center_cluster1[0];
                x_index_maximum = 1;
            else:
                x_maximum = center_cluster1[0];
                x_minimum = center_cluster2[0];
                x_index_maximum = -1;
            
            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1];
                y_minimum = center_cluster1[1];
                y_index_maximum = 1;
            else:
                y_maximum = center_cluster1[1];
                y_minimum = center_cluster2[1];
                y_index_maximum = -1;
            
            print("Cluster 1:", cluster1, ", center:", center_cluster1);
            print("Cluster 2:", cluster2, ", center:", center_cluster2);
            
            stage.annotate(s = '', xy = (center_cluster1[0], center_cluster1[1]), xytext = (center_cluster2[0], center_cluster2[1]), arrowprops = dict(arrowstyle = '<->'));
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
                
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
                
                print("\tCluster distance -", distance_type, ":", distance);
                
                x_multiplier = index_distance_type + 3;
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3;
                
                y_multiplier = index_distance_type + 3;
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3;
                
                x_text = x_multiplier * (x_maximum - x_minimum) / (len(distances) + 6) + x_minimum;
                y_text = y_multiplier * (y_maximum - y_minimum) / (len(distances) + 6) + y_minimum;
                
                #print(x_text, y_text, "\n");
                stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize = 9, color='blue');
    
    plt.show();