def template_clustering(number_clusters, path, links): sample = read_sample(path); clusters_centroid_link = None; clusters_single_link = None; clusters_complete_link = None; clusters_average_link = None; visualizer = cluster_visualizer(len(links)); index_canvas = 0; if (type_link.CENTROID_LINK in links): agglomerative_centroid_link = agglomerative(sample, number_clusters, type_link.CENTROID_LINK); (ticks, result) = timedcall(agglomerative_centroid_link.process); clusters_centroid_link = agglomerative_centroid_link.get_clusters(); visualizer.append_clusters(clusters_centroid_link, sample, index_canvas); visualizer.set_canvas_title('Link: Centroid', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Centroid", "\tExecution time: ", ticks, "\n"); if (type_link.SINGLE_LINK in links): agglomerative_simple_link = agglomerative(sample, number_clusters, type_link.SINGLE_LINK); (ticks, result) = timedcall(agglomerative_simple_link.process); clusters_single_link = agglomerative_simple_link.get_clusters(); visualizer.append_clusters(clusters_single_link, sample, index_canvas); visualizer.set_canvas_title('Link: Single', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Single", "\tExecution time: ", ticks, "\n"); if (type_link.COMPLETE_LINK in links): agglomerative_complete_link = agglomerative(sample, number_clusters, type_link.COMPLETE_LINK); (ticks, result) = timedcall(agglomerative_complete_link.process); clusters_complete_link = agglomerative_complete_link.get_clusters(); visualizer.append_clusters(clusters_complete_link, sample, index_canvas); visualizer.set_canvas_title('Link: Complete', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Complete", "\tExecution time: ", ticks, "\n"); if (type_link.AVERAGE_LINK in links): agglomerative_average_link = agglomerative(sample, number_clusters, type_link.AVERAGE_LINK); (ticks, result) = timedcall(agglomerative_average_link.process); clusters_average_link = agglomerative_average_link.get_clusters(); visualizer.append_clusters(clusters_average_link, sample, index_canvas); visualizer.set_canvas_title('Link: Average', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Average", "\tExecution time: ", ticks, "\n"); visualizer.show();
def template_clustering(number_clusters, path, links): sample = read_sample(path); clusters_centroid_link = None; clusters_single_link = None; clusters_complete_link = None; clusters_average_link = None; visualizer = cluster_visualizer(len(links), len(links)); index_canvas = 0; if (type_link.CENTROID_LINK in links): agglomerative_centroid_link = agglomerative(sample, number_clusters, type_link.CENTROID_LINK, True); (ticks, result) = timedcall(agglomerative_centroid_link.process); clusters_centroid_link = agglomerative_centroid_link.get_clusters(); visualizer.append_clusters(clusters_centroid_link, sample, index_canvas); visualizer.set_canvas_title('Link: Centroid', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Centroid", "\tExecution time: ", ticks, "\n"); if (type_link.SINGLE_LINK in links): agglomerative_simple_link = agglomerative(sample, number_clusters, type_link.SINGLE_LINK); (ticks, result) = timedcall(agglomerative_simple_link.process); clusters_single_link = agglomerative_simple_link.get_clusters(); visualizer.append_clusters(clusters_single_link, sample, index_canvas); visualizer.set_canvas_title('Link: Single', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Single", "\tExecution time: ", ticks, "\n"); if (type_link.COMPLETE_LINK in links): agglomerative_complete_link = agglomerative(sample, number_clusters, type_link.COMPLETE_LINK); (ticks, result) = timedcall(agglomerative_complete_link.process); clusters_complete_link = agglomerative_complete_link.get_clusters(); visualizer.append_clusters(clusters_complete_link, sample, index_canvas); visualizer.set_canvas_title('Link: Complete', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Complete", "\tExecution time: ", ticks, "\n"); if (type_link.AVERAGE_LINK in links): agglomerative_average_link = agglomerative(sample, number_clusters, type_link.AVERAGE_LINK); (ticks, result) = timedcall(agglomerative_average_link.process); clusters_average_link = agglomerative_average_link.get_clusters(); visualizer.append_clusters(clusters_average_link, sample, index_canvas); visualizer.set_canvas_title('Link: Average', index_canvas); index_canvas += 1; print("Sample: ", path, "Link: Average", "\tExecution time: ", ticks, "\n"); visualizer.show();
def process(self): """! @brief Performs cluster analysis in line with rules of BIRCH algorithm. @return (birch) Returns itself (BIRCH instance). @see get_clusters() """ self.__insert_data() self.__extract_features() cf_data = [feature.get_centroid() for feature in self.__features] algorithm = agglomerative(cf_data, self.__number_clusters, type_link.SINGLE_LINK).process() self.__cf_clusters = algorithm.get_clusters() cf_labels = cluster_encoder(type_encoding.CLUSTER_INDEX_LIST_SEPARATION, self.__cf_clusters, cf_data).\ set_encoding(type_encoding.CLUSTER_INDEX_LABELING).get_clusters() self.__clusters = [[] for _ in range(len(self.__cf_clusters))] for index_point in range(len(self.__pointer_data)): index_cf_entry = numpy.argmin(numpy.sum(numpy.square( numpy.subtract(cf_data, self.__pointer_data[index_point])), axis=1)) index_cluster = cf_labels[index_cf_entry] self.__clusters[index_cluster].append(index_point) return self
def templateClusterAllocationTheSameObjects(number_objects, number_clusters, link, ccore_flag=False): input_data = [[random()]] * number_objects agglomerative_instance = agglomerative(input_data, number_clusters, link, ccore_flag) agglomerative_instance.process() clusters = agglomerative_instance.get_clusters() assert len(clusters) == number_clusters object_mark = [False] * number_objects allocated_number_objects = 0 for cluster in clusters: for index_object in cluster: assert (object_mark[index_object] == False) # one object can be in only one cluster. object_mark[index_object] = True allocated_number_objects += 1 assert (number_objects == allocated_number_objects)
def get_agglomerative_clusters(data, count_clusters, line_type): rows = data.getRows() input_data = list() result_clusters = list() for row in rows: input_data.append(row.getDataArray()) # create object that uses python code only SST = calculate_sst(input_data) agglomerative_instance = agglomerative(input_data, count_clusters, link=line_type) # cluster analysis agglomerative_instance.process() # obtain results of clustering clusters = agglomerative_instance.get_clusters() colorRange = Constants.DEFAULT_COLOR_SET SSB = 0 SSW = 0 for i, cluster in enumerate(clusters): result_cluster = Cluster( AgglomerativeWindow.get_rows_agglomerative(data, cluster)) ro = AgglomerativeWindow.get_rows_agglomerative(data, cluster) f = [x._dataArray for x in ro] SSW = SSW + calculate_ssw(f) colour = random.choice(colorRange) result_cluster.setName(colour) result_cluster.setColor(colour) result_clusters.append(result_cluster) SSB = calculate_ssb(SST, SSW) RS_RESULT.append(SSB / SST) print(RS_RESULT) return result_clusters
def aggl_cluster(df, n_clusters, link, hover_text): datadf = df.loc[:, df.columns != hover_text] data_list = datadf.to_numpy(dtype="int64").tolist() if (link == "centroid"): typelink = type_link.CENTROID_LINK elif (link == "single"): typelink = type_link.SINGLE_LINK elif (link == "complete"): typelink = agglomerative.type_link.COMPLETE_LINK else: typelink = agglomerative.type_link.AVERAGE_LINK aggl_instance = agglomerative(data_list, n_clusters, typelink) aggl_instance.process() clusters = aggl_instance.get_clusters() reps = aggl_instance.get_cluster_encoding() encoder = cluster_encoder(reps, clusters, data_list) encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING) label = np.array(encoder.get_clusters(), dtype='int32') data_array = np.array(data_list) col_len = len(datadf.columns) if (col_len == 2): clus = scat2d(data_array, label, hover_text, df) return clus else: clus = scat3d(data_array, label, hover_text, df) return clus
def consensus_clustering(input_path,CLUSTERING_PATH): df = pd.read_csv(CLUSTERING_PATH+"distance_matrix.csv", delimiter=',', header=None) #read all the values sample = df.values #number of elements N= len(df.columns) print(isinstance(np.asmatrix(sample),np.matrix)) #rule of thumbs for k df1= pd.DataFrame(columns=['value'],index=['k_sqrtNBy4','k_sqrtNDiv4','k_sqrtNDiv2','k_sqrtNBy2','k_sqrtN',]) #df1.at['k_1','value']= 1 df1.at['k_sqrtN','value']= round(sqrt(N),0) df1.at['k_sqrtNDiv2', 'value'] = round(sqrt(N / 2),0) df1.at['k_sqrtNBy2', 'value'] = round(sqrt(N * 2),0) df1.at['k_sqrtNDiv4', 'value'] = round(sqrt(N / 4),0) df1.at['k_sqrtNBy4', 'value'] = round(sqrt(N*4),0) # Declare the weight of each vote # consensus matrix is NxN #initialization iterations=20 weight1 = 1 / iterations weight2 = 1 / len(df1.index)#the amount of k values used consensus_matrix = np.zeros((N, N)) for k in df1.index: #run the same algorithm using several k values. Each configuration is run #iterations times. for iteration in range(iterations): k_value=int(df1.loc[k].values[0]) initial_medoids = kmeans_plusplus_initializer(sample,k_value).initialize(return_index=True) kmedoids_instance = kmedoids(np.asmatrix(sample), initial_medoids,data_type="distance_matrix") kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() coassociations_matrix= np.zeros((N, N)) for cluster in clusters: for crypto in cluster: #set the diagonal elements with value 1 coassociations_matrix[crypto][crypto] = 1 for crypto1 in cluster: coassociations_matrix[crypto][crypto1]= 1 coassociations_matrix[crypto1][crypto] = 1 #sum the two matrices consensus_matrix=consensus_matrix+coassociations_matrix consensus_matrix = consensus_matrix*weight1*weight2 #now, by doing (1 - consensus_matrix) we get the dissimilarity/distance matrix distance_matrix= 1-consensus_matrix df = pd.DataFrame(data=distance_matrix) df.to_csv(CLUSTERING_PATH+"consensus_matrix(distance).csv",sep=",") #Hierarchical clustering for k in df1.index: k_value = int(df1.loc[k].values[0]) agglomerative_instance = agglomerative(distance_matrix,k_value, type_link.AVERAGE_LINK) agglomerative_instance.process() # Obtain results of clustering clusters = agglomerative_instance.get_clusters() save_clusters(input_path,clusters,k,CLUSTERING_PATH)
def templateClusterAllocationOneDimensionData(link, ccore_flag): input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ]; agglomerative_instance = agglomerative(input_data, 4, link, ccore_flag); agglomerative_instance.process(); clusters = agglomerative_instance.get_clusters(); assert len(clusters) == 4; for cluster in clusters: assert len(cluster) == 10;
def templateClusterAllocationOneDimensionData(self, link): input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ]; agglomerative_instance = agglomerative(input_data, 4, link); agglomerative_instance.process(); clusters = agglomerative_instance.get_clusters(); assert len(clusters) == 4; for cluster in clusters: assert len(cluster) == 10;
def get_modelo(self, algoritmo, eps, neig): print(algoritmo + ' ' + str(eps) + ' - ' + str(neig)) instance = None if algoritmo == 'AGNES': instance = agglomerative(self.amostras, self.numero_clusters, link=None) elif algoritmo == 'BIRCH': instance = birch(self.amostras, self.numero_clusters, entry_size_limit=10000) elif algoritmo == 'CLARANS': instance = clarans(self.amostras, self.numero_clusters, numlocal=100, maxneighbor=1) elif algoritmo == 'CURE': instance = cure(self.amostras, self.numero_clusters, number_represent_points=5, compression=0.5) elif algoritmo == 'DBSCAN': instance = dbscan(self.amostras, eps=eps, neighbors=neig) elif algoritmo == 'FCM': initial_centers = kmeans_plusplus_initializer( self.amostras, self.numero_clusters).initialize() instance = fcm(self.amostras, initial_centers) elif algoritmo == 'KMEANS': initial_centers = kmeans_plusplus_initializer( self.amostras, self.numero_clusters).initialize() instance = kmeans(self.amostras, initial_centers, tolerance=0.001) elif algoritmo == 'KMEDOIDS': instance = kmedoids(self.amostras, initial_index_medoids=[0, 0, 0, 0, 0, 0, 0], tolerance=0.0001) #ajustar o n_de cluster elif algoritmo == 'OPTICS': instance = optics(self.amostras, eps=eps, minpts=neig) elif algoritmo == 'ROCK': instance = rock(self.amostras, eps=eps, number_clusters=self.numero_clusters, threshold=0.5) else: pass instance.process() lista_agrupada = self.get_lista_agrupada(instance.get_clusters()) lista_agrupada = np.array(lista_agrupada) if (neig != 0): n_grupos = len(np.unique(lista_agrupada)) if n_grupos > self.numero_clusters: lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1) return lista_agrupada
def templateClusteringResults(path, number_clusters, link, expected_length_clusters, ccore_flag): sample = read_sample(path); agglomerative_instance = agglomerative(sample, number_clusters, link, ccore_flag); agglomerative_instance.process(); clusters = agglomerative_instance.get_clusters(); assert sum([len(cluster) for cluster in clusters]) == len(sample); assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters); assert sorted([len(cluster) for cluster in clusters]) == expected_length_clusters;
def templateClusteringResults(self, path, number_clusters, link, expected_length_clusters): sample = read_sample(path); agglomerative_instance = agglomerative(sample, number_clusters, link); agglomerative_instance.process(); clusters = agglomerative_instance.get_clusters(); assert sum([len(cluster) for cluster in clusters]) == len(sample); assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters); assert sorted([len(cluster) for cluster in clusters]) == expected_length_clusters;
def cluster_distances(path_sample, amount_clusters): distances = [ 'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance' ] sample = utils.read_sample(path_sample) agglomerative_instance = agglomerative(sample, amount_clusters) agglomerative_instance.process() obtained_clusters = agglomerative_instance.get_clusters() print("Measurements for:", path_sample) for index_cluster in range(len(obtained_clusters)): for index_neighbor in range(index_cluster + 1, len(obtained_clusters), 1): cluster1 = obtained_clusters[index_cluster] cluster2 = obtained_clusters[index_neighbor] center_cluster1 = utils.centroid(sample, cluster1) center_cluster2 = utils.centroid(sample, cluster2) for index_distance_type in range(len(distances)): distance = None distance_type = distances[index_distance_type] if (distance_type == 'euclidian'): distance = utils.euclidean_distance( center_cluster1, center_cluster2) elif (distance_type == 'manhattan'): distance = utils.manhattan_distance( center_cluster1, center_cluster2) elif (distance_type == 'avr-inter'): distance = utils.average_inter_cluster_distance( cluster1, cluster2, sample) elif (distance_type == 'avr-intra'): distance = utils.average_intra_cluster_distance( cluster1, cluster2, sample) elif (distance_type == 'variance'): distance = utils.variance_increase_distance( cluster1, cluster2, sample) print("\tDistance", distance_type, "from", index_cluster, "to", index_neighbor, "is:", distance)
def __build_circles_from_contour(self, color_mask, amount, amount_maximum): contours, _ = cv2.findContours(color_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) if len(contours) < amount: return None circles = self.__get_circles_from_contours(contours) if len(circles) < amount: return None coordinates = [[c[0], c[1]] for c in circles] clustering_algorithm = agglomerative(coordinates, amount_maximum) clustering_algorithm.process() clusters = clustering_algorithm.get_clusters() return self.__get_farthest_circles(circles, clusters)
def runAGGLOMERATIVE(self, k, X, type_link_param): cluster_points = {} for q in range(k): cluster_points[q] = list() agglo_instance = agglomerative(data=X, number_clusters=k, link=type_link_param) agglo_instance.process() clusters = agglo_instance.get_clusters() for id_point in range(len(X)): for cluster_id in range(len(clusters)): point_ids_in_cluster = [ int(point_id_in_cluster) for point_id_in_cluster in clusters[cluster_id] ] if (id_point in point_ids_in_cluster): cluster_points[cluster_id].append(X[id_point]) return cluster_points
def templateClusterAllocationTheSameObjects(number_objects, number_clusters, link, ccore_flag): input_data = [ [random()] ] * number_objects; agglomerative_instance = agglomerative(input_data, number_clusters, link, ccore_flag); agglomerative_instance.process(); clusters = agglomerative_instance.get_clusters(); assert len(clusters) == number_clusters; object_mark = [False] * number_objects; allocated_number_objects = 0; for cluster in clusters: for index_object in cluster: assert (object_mark[index_object] == False); # one object can be in only one cluster. object_mark[index_object] = True; allocated_number_objects += 1; assert (number_objects == allocated_number_objects); # number of allocated objects should be the same.
def templateClusterAllocationTheSameObjects(self, number_objects, number_clusters, link, ccore_flag = False): input_data = [ [random()] ] * number_objects; agglomerative_instance = agglomerative(input_data, number_clusters, link, ccore_flag); agglomerative_instance.process(); clusters = agglomerative_instance.get_clusters(); assert len(clusters) == number_clusters; object_mark = [False] * number_objects; allocated_number_objects = 0; for cluster in clusters: for index_object in cluster: assert (object_mark[index_object] == False); # one object can be in only one cluster. object_mark[index_object] = True; allocated_number_objects += 1; assert (number_objects == allocated_number_objects); # number of allocated objects should be the same.
def cluster_distances(path_sample, amount_clusters): distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance']; sample = utils.read_sample(path_sample); agglomerative_instance = agglomerative(sample, amount_clusters); agglomerative_instance.process(); obtained_clusters = agglomerative_instance.get_clusters(); print("Measurements for:", path_sample); for index_cluster in range(len(obtained_clusters)): for index_neighbor in range(index_cluster + 1, len(obtained_clusters), 1): cluster1 = obtained_clusters[index_cluster]; cluster2 = obtained_clusters[index_neighbor]; center_cluster1 = utils.centroid(sample, cluster1); center_cluster2 = utils.centroid(sample, cluster2); for index_distance_type in range(len(distances)): distance = None; distance_type = distances[index_distance_type]; if (distance_type == 'euclidian'): distance = utils.euclidean_distance(center_cluster1, center_cluster2); elif (distance_type == 'manhattan'): distance = utils.manhattan_distance(center_cluster1, center_cluster2); elif (distance_type == 'avr-inter'): distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample); elif (distance_type == 'avr-intra'): distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample); elif (distance_type == 'variance'): distance = utils.variance_increase_distance(cluster1, cluster2, sample); print("\tDistance", distance_type, "from", index_cluster, "to", index_neighbor, "is:", distance);
def testCoreInterfaceIntInputData(self): agglomerative_instance = agglomerative( [[1], [2], [3], [20], [21], [22]], 2, type_link.SINGLE_LINK, True) agglomerative_instance.process() assert len(agglomerative_instance.get_clusters()) == 2
# Spectral Clustering y_pred = SpectralClustering(n_clusters=k).fit_predict(X) plt.scatter(X[:, 0], X[:, 1], c=y_pred) plt.title("Spectral Clustering") plt.show() # CURE cure_instance = cure(data=X, number_cluster=k); cure_instance.process(); clusters = cure_instance.get_clusters(); visualizer = cluster_visualizer(titles=["Cure"]); visualizer.append_clusters(clusters, X); visualizer.show(); # CLARANS clarans_instance = clarans(data=X, number_clusters=k, numlocal=5, maxneighbor=5); clarans_instance.process(); clusters = clarans_instance.get_clusters(); visualizer = cluster_visualizer(titles=["Clarans"]); visualizer.append_clusters(clusters, X); visualizer.show(); # Agglomerative # type_link = [SINGLE_LINK, COMPLETE_LINK, AVERAGE_LINK, CENTROID_LINK] agglo_instance = agglomerative(data=X, number_clusters=k, link=type_link.COMPLETE_LINK); agglo_instance.process(); clusters = agglo_instance.get_clusters(); visualizer = cluster_visualizer(titles=["Agglomerative"]); visualizer.append_clusters(clusters, X); visualizer.show();
def display_two_dimensional_cluster_distances(path_sample, amount_clusters): distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance']; ajacency = [ [0] * amount_clusters for i in range(amount_clusters) ]; sample = utils.read_sample(path_sample); agglomerative_instance = agglomerative(sample, amount_clusters); agglomerative_instance.process(); obtained_clusters = agglomerative_instance.get_clusters(); stage = utils.draw_clusters(sample, obtained_clusters, display_result = False); for index_cluster in range(len(ajacency)): for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)): if ( (index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True) ): continue; ajacency[index_cluster][index_neighbor_cluster] = True; ajacency[index_neighbor_cluster][index_cluster] = True; cluster1 = obtained_clusters[index_cluster]; cluster2 = obtained_clusters[index_neighbor_cluster]; center_cluster1 = utils.centroid(sample, cluster1); center_cluster2 = utils.centroid(sample, cluster2); x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None; x_index_maximum, y_index_maximum = 1, 1; if (center_cluster2[0] > center_cluster1[0]): x_maximum = center_cluster2[0]; x_minimum = center_cluster1[0]; x_index_maximum = 1; else: x_maximum = center_cluster1[0]; x_minimum = center_cluster2[0]; x_index_maximum = -1; if (center_cluster2[1] > center_cluster1[1]): y_maximum = center_cluster2[1]; y_minimum = center_cluster1[1]; y_index_maximum = 1; else: y_maximum = center_cluster1[1]; y_minimum = center_cluster2[1]; y_index_maximum = -1; print("Cluster 1:", cluster1, ", center:", center_cluster1); print("Cluster 2:", cluster2, ", center:", center_cluster2); stage.annotate(s = '', xy = (center_cluster1[0], center_cluster1[1]), xytext = (center_cluster2[0], center_cluster2[1]), arrowprops = dict(arrowstyle = '<->')); for index_distance_type in range(len(distances)): distance = None; distance_type = distances[index_distance_type]; if (distance_type == 'euclidian'): distance = utils.euclidean_distance(center_cluster1, center_cluster2); elif (distance_type == 'manhattan'): distance = utils.manhattan_distance(center_cluster1, center_cluster2); elif (distance_type == 'avr-inter'): distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample); elif (distance_type == 'avr-intra'): distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample); elif (distance_type == 'variance'): distance = utils.variance_increase_distance(cluster1, cluster2, sample); print("\tCluster distance -", distance_type, ":", distance); x_multiplier = index_distance_type + 3; if (x_index_maximum < 0): x_multiplier = len(distances) - index_distance_type + 3; y_multiplier = index_distance_type + 3; if (y_index_maximum < 0): y_multiplier = len(distances) - index_distance_type + 3; x_text = x_multiplier * (x_maximum - x_minimum) / (len(distances) + 6) + x_minimum; y_text = y_multiplier * (y_maximum - y_minimum) / (len(distances) + 6) + y_minimum; #print(x_text, y_text, "\n"); stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize = 9, color='blue'); plt.show();
def process_agglomerative(sample): instance = agglomerative(sample, NUMBER_CLUSTERS) (ticks, _) = timedcall(instance.process) return ticks
def display_two_dimensional_cluster_distances(path_sample, amount_clusters): distances = [ 'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance' ] ajacency = [[0] * amount_clusters for i in range(amount_clusters)] sample = utils.read_sample(path_sample) agglomerative_instance = agglomerative(sample, amount_clusters) agglomerative_instance.process() obtained_clusters = agglomerative_instance.get_clusters() stage = utils.draw_clusters(sample, obtained_clusters, display_result=False) for index_cluster in range(len(ajacency)): for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)): if ((index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True)): continue ajacency[index_cluster][index_neighbor_cluster] = True ajacency[index_neighbor_cluster][index_cluster] = True cluster1 = obtained_clusters[index_cluster] cluster2 = obtained_clusters[index_neighbor_cluster] center_cluster1 = utils.centroid(sample, cluster1) center_cluster2 = utils.centroid(sample, cluster2) x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None x_index_maximum, y_index_maximum = 1, 1 if (center_cluster2[0] > center_cluster1[0]): x_maximum = center_cluster2[0] x_minimum = center_cluster1[0] x_index_maximum = 1 else: x_maximum = center_cluster1[0] x_minimum = center_cluster2[0] x_index_maximum = -1 if (center_cluster2[1] > center_cluster1[1]): y_maximum = center_cluster2[1] y_minimum = center_cluster1[1] y_index_maximum = 1 else: y_maximum = center_cluster1[1] y_minimum = center_cluster2[1] y_index_maximum = -1 print("Cluster 1:", cluster1, ", center:", center_cluster1) print("Cluster 2:", cluster2, ", center:", center_cluster2) stage.annotate(s='', xy=(center_cluster1[0], center_cluster1[1]), xytext=(center_cluster2[0], center_cluster2[1]), arrowprops=dict(arrowstyle='<->')) for index_distance_type in range(len(distances)): distance = None distance_type = distances[index_distance_type] if (distance_type == 'euclidian'): distance = utils.euclidean_distance( center_cluster1, center_cluster2) elif (distance_type == 'manhattan'): distance = utils.manhattan_distance( center_cluster1, center_cluster2) elif (distance_type == 'avr-inter'): distance = utils.average_inter_cluster_distance( cluster1, cluster2, sample) elif (distance_type == 'avr-intra'): distance = utils.average_intra_cluster_distance( cluster1, cluster2, sample) elif (distance_type == 'variance'): distance = utils.variance_increase_distance( cluster1, cluster2, sample) print("\tCluster distance -", distance_type, ":", distance) x_multiplier = index_distance_type + 3 if (x_index_maximum < 0): x_multiplier = len(distances) - index_distance_type + 3 y_multiplier = index_distance_type + 3 if (y_index_maximum < 0): y_multiplier = len(distances) - index_distance_type + 3 x_text = x_multiplier * (x_maximum - x_minimum) / ( len(distances) + 6) + x_minimum y_text = y_multiplier * (y_maximum - y_minimum) / ( len(distances) + 6) + y_minimum #print(x_text, y_text, "\n"); stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize=9, color='blue') plt.show()
def process_agglomerative(sample): instance = agglomerative(sample, NUMBER_CLUSTERS) (ticks, _) = timedcall(instance.process) return ticks
def testCoreInterfaceIntInputData(self): agglomerative_instance = agglomerative([ [1], [2], [3], [20], [21], [22] ], 2, type_link.SINGLE_LINK, True); agglomerative_instance.process(); assert len(agglomerative_instance.get_clusters()) == 2;