예제 #1
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance21 = entry2.get_distance(entry1, type_measurement);
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001;
     if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
예제 #2
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1))
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2))
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement)
     distance21 = entry2.get_distance(entry1, type_measurement)
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001
     if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE):
         assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
예제 #3
0
def cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()

    print("Measurements for:", path_sample)

    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters),
                                    1):
            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

            print("\tDistance", distance_type, "from", index_cluster, "to",
                  index_neighbor, "is:", distance)
예제 #4
0
def cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    
    print("Measurements for:", path_sample);
    
    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters), 1):
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
        
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
            
            print("\tDistance", distance_type, "from", index_cluster, "to", index_neighbor, "is:", distance);
예제 #5
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    ajacency = [[0] * amount_clusters for i in range(amount_clusters)]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()
    stage = utils.draw_clusters(sample,
                                obtained_clusters,
                                display_result=False)

    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ((index_cluster == index_neighbor_cluster) or
                (ajacency[index_cluster][index_neighbor_cluster] is True)):
                continue

            ajacency[index_cluster][index_neighbor_cluster] = True
            ajacency[index_neighbor_cluster][index_cluster] = True

            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor_cluster]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None
            x_index_maximum, y_index_maximum = 1, 1

            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0]
                x_minimum = center_cluster1[0]
                x_index_maximum = 1
            else:
                x_maximum = center_cluster1[0]
                x_minimum = center_cluster2[0]
                x_index_maximum = -1

            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1]
                y_minimum = center_cluster1[1]
                y_index_maximum = 1
            else:
                y_maximum = center_cluster1[1]
                y_minimum = center_cluster2[1]
                y_index_maximum = -1

            print("Cluster 1:", cluster1, ", center:", center_cluster1)
            print("Cluster 2:", cluster2, ", center:", center_cluster2)

            stage.annotate(s='',
                           xy=(center_cluster1[0], center_cluster1[1]),
                           xytext=(center_cluster2[0], center_cluster2[1]),
                           arrowprops=dict(arrowstyle='<->'))

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

                print("\tCluster distance -", distance_type, ":", distance)

                x_multiplier = index_distance_type + 3
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3

                y_multiplier = index_distance_type + 3
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3

                x_text = x_multiplier * (x_maximum - x_minimum) / (
                    len(distances) + 6) + x_minimum
                y_text = y_multiplier * (y_maximum - y_minimum) / (
                    len(distances) + 6) + y_minimum

                #print(x_text, y_text, "\n");
                stage.text(x_text,
                           y_text,
                           distance_type + " {:.3f}".format(distance),
                           fontsize=9,
                           color='blue')

    plt.show()
예제 #6
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    ajacency = [ [0] * amount_clusters for i in range(amount_clusters) ];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    stage = utils.draw_clusters(sample, obtained_clusters, display_result = False);
    
    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ( (index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True) ):
                continue;
            
            ajacency[index_cluster][index_neighbor_cluster] = True;
            ajacency[index_neighbor_cluster][index_cluster] = True;
            
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor_cluster];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None;
            x_index_maximum, y_index_maximum = 1, 1;
            
            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0];
                x_minimum = center_cluster1[0];
                x_index_maximum = 1;
            else:
                x_maximum = center_cluster1[0];
                x_minimum = center_cluster2[0];
                x_index_maximum = -1;
            
            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1];
                y_minimum = center_cluster1[1];
                y_index_maximum = 1;
            else:
                y_maximum = center_cluster1[1];
                y_minimum = center_cluster2[1];
                y_index_maximum = -1;
            
            print("Cluster 1:", cluster1, ", center:", center_cluster1);
            print("Cluster 2:", cluster2, ", center:", center_cluster2);
            
            stage.annotate(s = '', xy = (center_cluster1[0], center_cluster1[1]), xytext = (center_cluster2[0], center_cluster2[1]), arrowprops = dict(arrowstyle = '<->'));
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
                
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
                
                print("\tCluster distance -", distance_type, ":", distance);
                
                x_multiplier = index_distance_type + 3;
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3;
                
                y_multiplier = index_distance_type + 3;
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3;
                
                x_text = x_multiplier * (x_maximum - x_minimum) / (len(distances) + 6) + x_minimum;
                y_text = y_multiplier * (y_maximum - y_minimum) / (len(distances) + 6) + y_minimum;
                
                #print(x_text, y_text, "\n");
                stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize = 9, color='blue');
    
    plt.show();