예제 #1
0
def template_clustering(number_clusters, path, links):
    sample = read_sample(path);
    
    clusters_centroid_link = None;
    clusters_single_link = None;
    clusters_complete_link = None;
    clusters_average_link = None;
    
    visualizer = cluster_visualizer(len(links));
    index_canvas = 0;
    
    if (type_link.CENTROID_LINK in links):
        agglomerative_centroid_link = agglomerative(sample, number_clusters, type_link.CENTROID_LINK);
        
        (ticks, result) = timedcall(agglomerative_centroid_link.process);
        clusters_centroid_link = agglomerative_centroid_link.get_clusters();
        
        visualizer.append_clusters(clusters_centroid_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Centroid', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Centroid", "\tExecution time: ", ticks, "\n");
    
    if (type_link.SINGLE_LINK in links):
        agglomerative_simple_link = agglomerative(sample, number_clusters, type_link.SINGLE_LINK);
        
        (ticks, result) = timedcall(agglomerative_simple_link.process);
        clusters_single_link = agglomerative_simple_link.get_clusters();
        
        visualizer.append_clusters(clusters_single_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Single', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Single", "\tExecution time: ", ticks, "\n");
    
    if (type_link.COMPLETE_LINK in links):
        agglomerative_complete_link = agglomerative(sample, number_clusters, type_link.COMPLETE_LINK);
        
        (ticks, result) = timedcall(agglomerative_complete_link.process);
        clusters_complete_link = agglomerative_complete_link.get_clusters();
        
        visualizer.append_clusters(clusters_complete_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Complete', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Complete", "\tExecution time: ", ticks, "\n");        
    
    if (type_link.AVERAGE_LINK in links):
        agglomerative_average_link = agglomerative(sample, number_clusters, type_link.AVERAGE_LINK);
        
        (ticks, result) = timedcall(agglomerative_average_link.process);
        clusters_average_link = agglomerative_average_link.get_clusters();
        
        visualizer.append_clusters(clusters_average_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Average', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Average", "\tExecution time: ", ticks, "\n");  
    
    visualizer.show();
def template_clustering(number_clusters, path, links):
    sample = read_sample(path);
    
    clusters_centroid_link = None;
    clusters_single_link = None;
    clusters_complete_link = None;
    clusters_average_link = None;
    
    visualizer = cluster_visualizer(len(links), len(links));
    index_canvas = 0;
    
    if (type_link.CENTROID_LINK in links):
        agglomerative_centroid_link = agglomerative(sample, number_clusters, type_link.CENTROID_LINK, True);
        
        (ticks, result) = timedcall(agglomerative_centroid_link.process);
        clusters_centroid_link = agglomerative_centroid_link.get_clusters();
        
        visualizer.append_clusters(clusters_centroid_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Centroid', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Centroid", "\tExecution time: ", ticks, "\n");
    
    if (type_link.SINGLE_LINK in links):
        agglomerative_simple_link = agglomerative(sample, number_clusters, type_link.SINGLE_LINK);
        
        (ticks, result) = timedcall(agglomerative_simple_link.process);
        clusters_single_link = agglomerative_simple_link.get_clusters();
        
        visualizer.append_clusters(clusters_single_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Single', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Single", "\tExecution time: ", ticks, "\n");
    
    if (type_link.COMPLETE_LINK in links):
        agglomerative_complete_link = agglomerative(sample, number_clusters, type_link.COMPLETE_LINK);
        
        (ticks, result) = timedcall(agglomerative_complete_link.process);
        clusters_complete_link = agglomerative_complete_link.get_clusters();
        
        visualizer.append_clusters(clusters_complete_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Complete', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Complete", "\tExecution time: ", ticks, "\n");
    
    if (type_link.AVERAGE_LINK in links):
        agglomerative_average_link = agglomerative(sample, number_clusters, type_link.AVERAGE_LINK);
        
        (ticks, result) = timedcall(agglomerative_average_link.process);
        clusters_average_link = agglomerative_average_link.get_clusters();
        
        visualizer.append_clusters(clusters_average_link, sample, index_canvas);
        visualizer.set_canvas_title('Link: Average', index_canvas);
        index_canvas += 1;
        
        print("Sample: ", path, "Link: Average", "\tExecution time: ", ticks, "\n");
    
    visualizer.show();
예제 #3
0
    def process(self):
        """!
        @brief Performs cluster analysis in line with rules of BIRCH algorithm.
        
        @return (birch) Returns itself (BIRCH instance).
        
        @see get_clusters()
        
        """
        
        self.__insert_data()
        self.__extract_features()

        cf_data = [feature.get_centroid() for feature in self.__features]

        algorithm = agglomerative(cf_data, self.__number_clusters, type_link.SINGLE_LINK).process()
        self.__cf_clusters = algorithm.get_clusters()

        cf_labels = cluster_encoder(type_encoding.CLUSTER_INDEX_LIST_SEPARATION, self.__cf_clusters, cf_data).\
            set_encoding(type_encoding.CLUSTER_INDEX_LABELING).get_clusters()

        self.__clusters = [[] for _ in range(len(self.__cf_clusters))]
        for index_point in range(len(self.__pointer_data)):
            index_cf_entry = numpy.argmin(numpy.sum(numpy.square(
                numpy.subtract(cf_data, self.__pointer_data[index_point])), axis=1))
            index_cluster = cf_labels[index_cf_entry]
            self.__clusters[index_cluster].append(index_point)

        return self
    def templateClusterAllocationTheSameObjects(number_objects,
                                                number_clusters,
                                                link,
                                                ccore_flag=False):
        input_data = [[random()]] * number_objects

        agglomerative_instance = agglomerative(input_data, number_clusters,
                                               link, ccore_flag)
        agglomerative_instance.process()
        clusters = agglomerative_instance.get_clusters()

        assert len(clusters) == number_clusters

        object_mark = [False] * number_objects
        allocated_number_objects = 0

        for cluster in clusters:
            for index_object in cluster:
                assert (object_mark[index_object] == False)
                # one object can be in only one cluster.

                object_mark[index_object] = True
                allocated_number_objects += 1

        assert (number_objects == allocated_number_objects)
예제 #5
0
    def get_agglomerative_clusters(data, count_clusters, line_type):
        rows = data.getRows()
        input_data = list()
        result_clusters = list()
        for row in rows:
            input_data.append(row.getDataArray())
        # create object that uses python code only
        SST = calculate_sst(input_data)

        agglomerative_instance = agglomerative(input_data,
                                               count_clusters,
                                               link=line_type)
        # cluster analysis
        agglomerative_instance.process()
        # obtain results of clustering
        clusters = agglomerative_instance.get_clusters()

        colorRange = Constants.DEFAULT_COLOR_SET
        SSB = 0
        SSW = 0
        for i, cluster in enumerate(clusters):
            result_cluster = Cluster(
                AgglomerativeWindow.get_rows_agglomerative(data, cluster))
            ro = AgglomerativeWindow.get_rows_agglomerative(data, cluster)
            f = [x._dataArray for x in ro]
            SSW = SSW + calculate_ssw(f)
            colour = random.choice(colorRange)
            result_cluster.setName(colour)
            result_cluster.setColor(colour)
            result_clusters.append(result_cluster)
        SSB = calculate_ssb(SST, SSW)
        RS_RESULT.append(SSB / SST)

        print(RS_RESULT)
        return result_clusters
def aggl_cluster(df, n_clusters, link, hover_text):
    datadf = df.loc[:, df.columns != hover_text]
    data_list = datadf.to_numpy(dtype="int64").tolist()
    if (link == "centroid"):
        typelink = type_link.CENTROID_LINK
    elif (link == "single"):
        typelink = type_link.SINGLE_LINK
    elif (link == "complete"):
        typelink = agglomerative.type_link.COMPLETE_LINK
    else:
        typelink = agglomerative.type_link.AVERAGE_LINK
    aggl_instance = agglomerative(data_list, n_clusters, typelink)
    aggl_instance.process()
    clusters = aggl_instance.get_clusters()
    reps = aggl_instance.get_cluster_encoding()
    encoder = cluster_encoder(reps, clusters, data_list)
    encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
    label = np.array(encoder.get_clusters(), dtype='int32')
    data_array = np.array(data_list)
    col_len = len(datadf.columns)
    if (col_len == 2):
        clus = scat2d(data_array, label, hover_text, df)
        return clus
    else:
        clus = scat3d(data_array, label, hover_text, df)
        return clus
예제 #7
0
def consensus_clustering(input_path,CLUSTERING_PATH):
    df = pd.read_csv(CLUSTERING_PATH+"distance_matrix.csv", delimiter=',', header=None)

    #read all the values
    sample = df.values
    #number of elements
    N= len(df.columns)
    print(isinstance(np.asmatrix(sample),np.matrix))
    #rule of thumbs for k
    df1= pd.DataFrame(columns=['value'],index=['k_sqrtNBy4','k_sqrtNDiv4','k_sqrtNDiv2','k_sqrtNBy2','k_sqrtN',])
    #df1.at['k_1','value']= 1
    df1.at['k_sqrtN','value']= round(sqrt(N),0)
    df1.at['k_sqrtNDiv2', 'value'] = round(sqrt(N / 2),0)
    df1.at['k_sqrtNBy2', 'value'] = round(sqrt(N * 2),0)
    df1.at['k_sqrtNDiv4', 'value'] = round(sqrt(N / 4),0)
    df1.at['k_sqrtNBy4', 'value'] = round(sqrt(N*4),0)

    # Declare the weight of each vote
    # consensus matrix is NxN
    #initialization
    iterations=20
    weight1 = 1 / iterations
    weight2 = 1 / len(df1.index)#the amount of  k values used
    consensus_matrix = np.zeros((N, N))

    for k in df1.index:
        #run the same algorithm using several k values. Each configuration is run #iterations times.
        for iteration in range(iterations):
            k_value=int(df1.loc[k].values[0])
            initial_medoids = kmeans_plusplus_initializer(sample,k_value).initialize(return_index=True)
            kmedoids_instance = kmedoids(np.asmatrix(sample), initial_medoids,data_type="distance_matrix")
            kmedoids_instance.process()
            clusters = kmedoids_instance.get_clusters()
            coassociations_matrix= np.zeros((N, N))
            for cluster in clusters:
                for crypto in cluster:
                    #set the diagonal elements with value 1
                    coassociations_matrix[crypto][crypto] = 1
                    for crypto1 in cluster:
                        coassociations_matrix[crypto][crypto1]= 1
                        coassociations_matrix[crypto1][crypto] = 1
            #sum the two matrices
            consensus_matrix=consensus_matrix+coassociations_matrix
    consensus_matrix = consensus_matrix*weight1*weight2
    #now, by doing (1 - consensus_matrix) we get the dissimilarity/distance matrix
    distance_matrix= 1-consensus_matrix
    df = pd.DataFrame(data=distance_matrix)
    df.to_csv(CLUSTERING_PATH+"consensus_matrix(distance).csv",sep=",")

    #Hierarchical clustering
    for k in df1.index:
        k_value = int(df1.loc[k].values[0])

        agglomerative_instance = agglomerative(distance_matrix,k_value, type_link.AVERAGE_LINK)
        agglomerative_instance.process()
        # Obtain results of clustering
        clusters = agglomerative_instance.get_clusters()
        save_clusters(input_path,clusters,k,CLUSTERING_PATH)
예제 #8
0
 def templateClusterAllocationOneDimensionData(link, ccore_flag):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     agglomerative_instance = agglomerative(input_data, 4, link, ccore_flag);
     agglomerative_instance.process();
     clusters = agglomerative_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
 def templateClusterAllocationOneDimensionData(self, link):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     agglomerative_instance = agglomerative(input_data, 4, link);
     agglomerative_instance.process();
     clusters = agglomerative_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
    def get_modelo(self, algoritmo, eps, neig):
        print(algoritmo + ' ' + str(eps) + ' - ' + str(neig))
        instance = None

        if algoritmo == 'AGNES':
            instance = agglomerative(self.amostras,
                                     self.numero_clusters,
                                     link=None)
        elif algoritmo == 'BIRCH':
            instance = birch(self.amostras,
                             self.numero_clusters,
                             entry_size_limit=10000)
        elif algoritmo == 'CLARANS':
            instance = clarans(self.amostras,
                               self.numero_clusters,
                               numlocal=100,
                               maxneighbor=1)
        elif algoritmo == 'CURE':
            instance = cure(self.amostras,
                            self.numero_clusters,
                            number_represent_points=5,
                            compression=0.5)
        elif algoritmo == 'DBSCAN':
            instance = dbscan(self.amostras, eps=eps, neighbors=neig)
        elif algoritmo == 'FCM':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = fcm(self.amostras, initial_centers)
        elif algoritmo == 'KMEANS':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = kmeans(self.amostras, initial_centers, tolerance=0.001)
        elif algoritmo == 'KMEDOIDS':
            instance = kmedoids(self.amostras,
                                initial_index_medoids=[0, 0, 0, 0, 0, 0, 0],
                                tolerance=0.0001)  #ajustar o n_de cluster
        elif algoritmo == 'OPTICS':
            instance = optics(self.amostras, eps=eps, minpts=neig)
        elif algoritmo == 'ROCK':
            instance = rock(self.amostras,
                            eps=eps,
                            number_clusters=self.numero_clusters,
                            threshold=0.5)
        else:
            pass

        instance.process()
        lista_agrupada = self.get_lista_agrupada(instance.get_clusters())
        lista_agrupada = np.array(lista_agrupada)

        if (neig != 0):
            n_grupos = len(np.unique(lista_agrupada))
            if n_grupos > self.numero_clusters:
                lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1)
        return lista_agrupada
예제 #11
0
 def templateClusteringResults(path, number_clusters, link, expected_length_clusters, ccore_flag):
     sample = read_sample(path);
     
     agglomerative_instance = agglomerative(sample, number_clusters, link, ccore_flag);
     agglomerative_instance.process();
     
     clusters = agglomerative_instance.get_clusters();
     
     assert sum([len(cluster) for cluster in clusters]) == len(sample);
     assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters);
     assert sorted([len(cluster) for cluster in clusters]) == expected_length_clusters;
 def templateClusteringResults(self, path, number_clusters, link, expected_length_clusters):
     sample = read_sample(path);
     
     agglomerative_instance = agglomerative(sample, number_clusters, link);
     agglomerative_instance.process();
     
     clusters = agglomerative_instance.get_clusters();
     
     assert sum([len(cluster) for cluster in clusters]) == len(sample);
     assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters);
     assert sorted([len(cluster) for cluster in clusters]) == expected_length_clusters;
예제 #13
0
def cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()

    print("Measurements for:", path_sample)

    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters),
                                    1):
            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

            print("\tDistance", distance_type, "from", index_cluster, "to",
                  index_neighbor, "is:", distance)
예제 #14
0
    def __build_circles_from_contour(self, color_mask, amount, amount_maximum):
        contours, _ = cv2.findContours(color_mask, cv2.RETR_EXTERNAL,
                                       cv2.CHAIN_APPROX_NONE)
        if len(contours) < amount:
            return None

        circles = self.__get_circles_from_contours(contours)
        if len(circles) < amount:
            return None

        coordinates = [[c[0], c[1]] for c in circles]

        clustering_algorithm = agglomerative(coordinates, amount_maximum)
        clustering_algorithm.process()
        clusters = clustering_algorithm.get_clusters()

        return self.__get_farthest_circles(circles, clusters)
예제 #15
0
    def runAGGLOMERATIVE(self, k, X, type_link_param):
        cluster_points = {}
        for q in range(k):
            cluster_points[q] = list()

        agglo_instance = agglomerative(data=X,
                                       number_clusters=k,
                                       link=type_link_param)
        agglo_instance.process()
        clusters = agglo_instance.get_clusters()
        for id_point in range(len(X)):
            for cluster_id in range(len(clusters)):
                point_ids_in_cluster = [
                    int(point_id_in_cluster)
                    for point_id_in_cluster in clusters[cluster_id]
                ]
                if (id_point in point_ids_in_cluster):
                    cluster_points[cluster_id].append(X[id_point])

        return cluster_points
예제 #16
0
 def templateClusterAllocationTheSameObjects(number_objects, number_clusters, link, ccore_flag):
     input_data = [ [random()] ] * number_objects;
     
     agglomerative_instance = agglomerative(input_data, number_clusters, link, ccore_flag);
     agglomerative_instance.process();
     clusters = agglomerative_instance.get_clusters();
     
     assert len(clusters) == number_clusters;
     
     object_mark = [False] * number_objects;
     allocated_number_objects = 0;
     
     for cluster in clusters:
         for index_object in cluster: 
             assert (object_mark[index_object] == False);    # one object can be in only one cluster.
             
             object_mark[index_object] = True;
             allocated_number_objects += 1;
         
     assert (number_objects == allocated_number_objects);    # number of allocated objects should be the same.
예제 #17
0
 def templateClusterAllocationTheSameObjects(self, number_objects, number_clusters, link, ccore_flag = False):
     input_data = [ [random()] ] * number_objects;
     
     agglomerative_instance = agglomerative(input_data, number_clusters, link, ccore_flag);
     agglomerative_instance.process();
     clusters = agglomerative_instance.get_clusters();
     
     assert len(clusters) == number_clusters;
     
     object_mark = [False] * number_objects;
     allocated_number_objects = 0;
     
     for cluster in clusters:
         for index_object in cluster: 
             assert (object_mark[index_object] == False);    # one object can be in only one cluster.
             
             object_mark[index_object] = True;
             allocated_number_objects += 1;
         
     assert (number_objects == allocated_number_objects);    # number of allocated objects should be the same.
예제 #18
0
def cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    
    print("Measurements for:", path_sample);
    
    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters), 1):
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
        
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
            
            print("\tDistance", distance_type, "from", index_cluster, "to", index_neighbor, "is:", distance);
예제 #19
0
 def testCoreInterfaceIntInputData(self):
     agglomerative_instance = agglomerative(
         [[1], [2], [3], [20], [21], [22]], 2, type_link.SINGLE_LINK, True)
     agglomerative_instance.process()
     assert len(agglomerative_instance.get_clusters()) == 2
예제 #20
0
    # Spectral Clustering
    y_pred = SpectralClustering(n_clusters=k).fit_predict(X)
    plt.scatter(X[:, 0], X[:, 1], c=y_pred)
    plt.title("Spectral Clustering")
    plt.show()

    # CURE
    cure_instance = cure(data=X, number_cluster=k);
    cure_instance.process();
    clusters = cure_instance.get_clusters();
    visualizer = cluster_visualizer(titles=["Cure"]);
    visualizer.append_clusters(clusters, X);
    visualizer.show();

    # CLARANS
    clarans_instance = clarans(data=X, number_clusters=k, numlocal=5, maxneighbor=5);
    clarans_instance.process();
    clusters = clarans_instance.get_clusters();
    visualizer = cluster_visualizer(titles=["Clarans"]);
    visualizer.append_clusters(clusters, X);
    visualizer.show();

    # Agglomerative
    # type_link  = [SINGLE_LINK, COMPLETE_LINK, AVERAGE_LINK, CENTROID_LINK]
 
    agglo_instance = agglomerative(data=X, number_clusters=k, link=type_link.COMPLETE_LINK);
    agglo_instance.process();
    clusters = agglo_instance.get_clusters();
    visualizer = cluster_visualizer(titles=["Agglomerative"]);
    visualizer.append_clusters(clusters, X);
    visualizer.show();
예제 #21
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    ajacency = [ [0] * amount_clusters for i in range(amount_clusters) ];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    stage = utils.draw_clusters(sample, obtained_clusters, display_result = False);
    
    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ( (index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True) ):
                continue;
            
            ajacency[index_cluster][index_neighbor_cluster] = True;
            ajacency[index_neighbor_cluster][index_cluster] = True;
            
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor_cluster];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None;
            x_index_maximum, y_index_maximum = 1, 1;
            
            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0];
                x_minimum = center_cluster1[0];
                x_index_maximum = 1;
            else:
                x_maximum = center_cluster1[0];
                x_minimum = center_cluster2[0];
                x_index_maximum = -1;
            
            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1];
                y_minimum = center_cluster1[1];
                y_index_maximum = 1;
            else:
                y_maximum = center_cluster1[1];
                y_minimum = center_cluster2[1];
                y_index_maximum = -1;
            
            print("Cluster 1:", cluster1, ", center:", center_cluster1);
            print("Cluster 2:", cluster2, ", center:", center_cluster2);
            
            stage.annotate(s = '', xy = (center_cluster1[0], center_cluster1[1]), xytext = (center_cluster2[0], center_cluster2[1]), arrowprops = dict(arrowstyle = '<->'));
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
                
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
                
                print("\tCluster distance -", distance_type, ":", distance);
                
                x_multiplier = index_distance_type + 3;
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3;
                
                y_multiplier = index_distance_type + 3;
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3;
                
                x_text = x_multiplier * (x_maximum - x_minimum) / (len(distances) + 6) + x_minimum;
                y_text = y_multiplier * (y_maximum - y_minimum) / (len(distances) + 6) + y_minimum;
                
                #print(x_text, y_text, "\n");
                stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize = 9, color='blue');
    
    plt.show();
예제 #22
0
def process_agglomerative(sample):
    instance = agglomerative(sample, NUMBER_CLUSTERS)
    (ticks, _) = timedcall(instance.process)
    return ticks
예제 #23
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    ajacency = [[0] * amount_clusters for i in range(amount_clusters)]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()
    stage = utils.draw_clusters(sample,
                                obtained_clusters,
                                display_result=False)

    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ((index_cluster == index_neighbor_cluster) or
                (ajacency[index_cluster][index_neighbor_cluster] is True)):
                continue

            ajacency[index_cluster][index_neighbor_cluster] = True
            ajacency[index_neighbor_cluster][index_cluster] = True

            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor_cluster]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None
            x_index_maximum, y_index_maximum = 1, 1

            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0]
                x_minimum = center_cluster1[0]
                x_index_maximum = 1
            else:
                x_maximum = center_cluster1[0]
                x_minimum = center_cluster2[0]
                x_index_maximum = -1

            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1]
                y_minimum = center_cluster1[1]
                y_index_maximum = 1
            else:
                y_maximum = center_cluster1[1]
                y_minimum = center_cluster2[1]
                y_index_maximum = -1

            print("Cluster 1:", cluster1, ", center:", center_cluster1)
            print("Cluster 2:", cluster2, ", center:", center_cluster2)

            stage.annotate(s='',
                           xy=(center_cluster1[0], center_cluster1[1]),
                           xytext=(center_cluster2[0], center_cluster2[1]),
                           arrowprops=dict(arrowstyle='<->'))

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

                print("\tCluster distance -", distance_type, ":", distance)

                x_multiplier = index_distance_type + 3
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3

                y_multiplier = index_distance_type + 3
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3

                x_text = x_multiplier * (x_maximum - x_minimum) / (
                    len(distances) + 6) + x_minimum
                y_text = y_multiplier * (y_maximum - y_minimum) / (
                    len(distances) + 6) + y_minimum

                #print(x_text, y_text, "\n");
                stage.text(x_text,
                           y_text,
                           distance_type + " {:.3f}".format(distance),
                           fontsize=9,
                           color='blue')

    plt.show()
def process_agglomerative(sample):
    instance = agglomerative(sample, NUMBER_CLUSTERS)
    (ticks, _) = timedcall(instance.process)
    return ticks
예제 #25
0
 def testCoreInterfaceIntInputData(self):
     agglomerative_instance = agglomerative([ [1], [2], [3], [20], [21], [22] ], 2, type_link.SINGLE_LINK, True);
     agglomerative_instance.process();
     assert len(agglomerative_instance.get_clusters()) == 2;