def elbow_analysis(sample_file_path, kmin, kmax, **kwargs): initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(sample_file_path) elbow_instance = elbow(sample, kmin, kmax, initializer=initializer) elbow_instance.process() amount_clusters = elbow_instance.get_amount() wce = elbow_instance.get_wce() centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize() kmeans_instance = kmeans(sample, centers) kmeans_instance.process() clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() print("Sample '%s': Obtained amount of clusters: '%d'." % (sample_file_path, amount_clusters)) figure = plt.figure(1) ax = figure.add_subplot(111) ax.plot(range(kmin, kmax), wce, color='b', marker='.') ax.plot(amount_clusters, wce[amount_clusters - kmin], color='r', marker='.', markersize=10) ax.annotate("Elbow", (amount_clusters + 0.1, wce[amount_clusters - kmin] + 5)) ax.grid(True) plt.ylabel("WCE") plt.xlabel("K") plt.show() kmeans_visualizer.show_clusters(sample, clusters, centers)
def template_clustering(start_centers, path, tolerance=0.25, ccore=False): sample = read_sample(path) dimension = len(sample[0]) metric = distance_metric(type_metric.MANHATTAN) observer = kmeans_observer() kmeans_instance = kmeans(sample, start_centers, tolerance, ccore, observer=observer, metric=metric) (ticks, _) = timedcall(kmeans_instance.process) clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() print("Sample: ", path, "\t\tExecution time: ", ticks, "\n") visualizer = cluster_visualizer_multidim() visualizer.append_clusters(clusters, sample) visualizer.show() if dimension > 3: kmeans_visualizer.show_clusters(sample, clusters, centers, start_centers) kmeans_visualizer.animate_cluster_allocation(sample, observer)
def templateShowClusteringResultNoFailure(filename, initial_centers, ccore_flag): sample = read_sample(filename) kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag) kmeans_instance.process() clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() kmeans_visualizer.show_clusters(sample, clusters, centers, initial_centers)
def templateShowClusteringResultNoFailure(filename, initial_centers, ccore_flag): sample = read_sample(filename); kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag); kmeans_instance.process(); clusters = kmeans_instance.get_clusters(); centers = kmeans_instance.get_centers(); kmeans_visualizer.show_clusters(sample, clusters, centers, initial_centers);
def template_clustering(start_centers, path, tolerance=0.25, ccore=True): sample = read_sample(path) observer = kmeans_observer() kmeans_instance = kmeans(sample, start_centers, tolerance, ccore, observer=observer) (ticks, _) = timedcall(kmeans_instance.process) clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() print("Sample: ", path, "\t\tExecution time: ", ticks, "\n") kmeans_visualizer.show_clusters(sample, clusters, centers, start_centers) kmeans_visualizer.animate_cluster_allocation(sample, observer)
def run_elbow(data): # create instance of Elbow method using K value from 1 to 10. kmin, kmax = 1, 10 elbow_instance = elbow(data, kmin, kmax) # process input data and obtain results of analysis elbow_instance.process() amount_clusters = elbow_instance.get_amount() # perform cluster analysis using K-Means algorithm centers = kmeans_plusplus_initializer( data, amount_clusters, amount_candidates=kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize() kmeans_instance = kmeans(data, centers) kmeans_instance.process() clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() kmeans_visualizer.show_clusters(data, clusters, centers)
def get_cluster_custom(sample, visualize=False, class_num=3): metric = distance_metric(type_metric.USER_DEFINED, func=user_function) # create K-Means algorithm with specific distance metric initial_centers = kmeans_plusplus_initializer(sample, class_num).initialize() kmeans_instance = kmeans(sample, initial_centers, metric=metric) # run cluster analysis and obtain results kmeans_instance.process() clusters = kmeans_instance.get_clusters() cls_encoded = [] for cls_idx in clusters: cls_encoded.append(sample[cls_idx]) final_centers = kmeans_instance.get_centers() for i in range(len(final_centers)): final_centers[i] = np.round(np.array(final_centers[i])) loss = kmeans_instance.get_total_wce() # Visualize obtained results if visualize: kmeans_visualizer.show_clusters(sample, clusters, final_centers) return cls_encoded, final_centers, loss
def template_clustering(start_centers, path, tolerance = 0.25, ccore = False): sample = read_sample(path) dimension = len(sample[0]) metric = distance_metric(type_metric.MANHATTAN) observer = kmeans_observer() kmeans_instance = kmeans(sample, start_centers, tolerance, ccore, observer=observer, metric=metric) (ticks, _) = timedcall(kmeans_instance.process) clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() print("Sample: ", path, "\t\tExecution time: ", ticks, "\n") visualizer = cluster_visualizer_multidim() visualizer.append_clusters(clusters, sample) visualizer.show() if dimension > 3: kmeans_visualizer.show_clusters(sample, clusters, centers, start_centers) kmeans_visualizer.animate_cluster_allocation(sample, observer)
def elbow_kmeans_optimizer(X, k=None, kmin=1, kmax=5, visualize=True): """k-means clustering with or without automatically determined cluster numbers. Reference: https://pyclustering.github.io/docs/0.8.2/html/d3/d70/classpyclustering_1_1cluster_1_1elbow_1_1elbow.html # Arguments: X (numpy array-like): Input data matrix. kmin: Minimum number of clusters to consider. Defaults to 1. kmax: Maximum number of clusters to consider. Defaults to 5. visualize: Whether to perform k-means visualization or not. # Returns: numpy arraylike: Clusters. numpy arraylike: Cluster centers. """ from pyclustering.utils import read_sample from pyclustering.samples.definitions import SIMPLE_SAMPLES from pyclustering.cluster.kmeans import kmeans from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer, random_center_initializer from pyclustering.core.wrapper import ccore_library from pyclustering.cluster.elbow import elbow from pyclustering.cluster.kmeans import kmeans_visualizer import pyclustering.core.elbow_wrapper as wrapper if k is not None: amount_clusters = k else: elbow_instance = elbow(X, kmin, kmax) elbow_instance.process() amount_clusters = elbow_instance.get_amount() wce = elbow_instance.get_wce() centers = kmeans_plusplus_initializer(X, amount_clusters).initialize() kmeans_instance = kmeans(X, centers) kmeans_instance.process() clusters = kmeans_instance.get_clusters() centers = kmeans_instance.get_centers() kmeans_visualizer.show_clusters(X, clusters, centers) return clusters, centers
# Load list of points for cluster analysis. sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS) # Prepare initial centers using K-Means++ method. initial_centers = kmeans_plusplus_initializer(sample, 2).initialize() # Create instance of K-Means algorithm with prepared centers. kmeans_instance = kmeans(sample, initial_centers) # Run cluster analysis and obtain results. kmeans_instance.process() clusters = kmeans_instance.get_clusters() final_centers = kmeans_instance.get_centers() # Visualize obtained results kmeans_visualizer.show_clusters(sample, clusters, final_centers) from pyclustering.cluster.kmedoids import kmedoids from pyclustering.cluster import cluster_visualizer from pyclustering.utils import read_sample from pyclustering.samples.definitions import FCPS_SAMPLES # Load list of points for cluster analysis. sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS) # Set random initial medoids. initial_medoids = [1, 500] # Create instance of K-Medoids algorithm. kmedoids_instance = kmedoids(sample, initial_medoids) # Run cluster analysis and obtain results.
# In[53]: data_1 = list(data_1) initial_centers = kmeans_plusplus_initializer(data_1, 10).initialize() # Create instance of K-Means algorithm with prepared centers. kmeans_instance = kmeans(data_1, initial_centers) # Run cluster analysis and obtain results. kmeans_instance.process() clusters = kmeans_instance.get_clusters() final_centers = kmeans_instance.get_centers() # Visualize obtained results # visualizer = cluster_visualizer_multidim() kmeans_visualizer.show_clusters(data_1, clusters, final_centers) # visualizer.append_clusters(clusters, data_1) # visualizer.show() # In[58]: # 5 initialisations for m in range(5): initial_centers = kmeans_plusplus_initializer(data_1, 10).initialize() # Create instance of K-Means algorithm with prepared centers. kmeans_instance = kmeans(data_1, initial_centers) # Run cluster analysis and obtain results. kmeans_instance.process() clusters = kmeans_instance.get_clusters()
samples = np.array(list(zip(ws, hs))) for _ in range(1): sample = samples[ np.random.choice(samples.shape[0], 20000, replace=False), :] metric = distance_metric(type_metric.USER_DEFINED, func=iou_distance_wh) initial_centers = kmeans_plusplus_initializer(sample, 9).initialize() # Create instance of K-Means algorithm with prepared centers. kmeans_instance = kmeans(sample, initial_centers, metric=metric) # Run cluster analysis and obtain results. kmeans_instance.process() clusters = kmeans_instance.get_clusters() final_centers = np.array(kmeans_instance.get_centers()) # Visualize obtained results kmeans_visualizer.show_clusters(sample, clusters, final_centers, display=False) sccs = np.round(final_centers[np.argsort(np.prod(final_centers, axis=1))]).astype(np.int) print(sccs) # [[ 9 13] # [ 25 17] # [ 16 31] # [ 47 29] # [ 32 51] # [ 83 48] # [ 61 91] # [131 99] # [210 189]] plt.savefig('results/visdrone_anchors.png')
plt.scatter(reduced_data[i, 0], reduced_data[i, 1], c=col[int(model_train_label[i])], marker='.') gt_centers = [] for i in range(10): tmp = [] tmp.append(arr_x[i] / arr_cnt[i]) tmp.append(arr_y[i] / arr_cnt[i]) gt_centers.append(tmp) plt.title('sample data') plt.show() kmeans_visualizer.show_clusters(inp, gt_clusters, gt_centers) # Visualize clustering results visualizer = cluster_visualizer_multidim() visualizer.append_clusters(gt_clusters, inp, marker='o') # visualizer.append_cluster(noise, inp, marker='x') # visualizer.set_canvas_title('original clustering : the ground truth') visualizer.show() # Prepare initial centers using K-Means++ method. initial_centers = kmeans_plusplus_initializer(inp, 10).initialize() # Create instance of K-Means algorithm with prepared centers. kmeans_instance = kmeans(inp, initial_centers) # Run cluster analysis and obtain results.