def _kmeans(descriptors): """ Map a set of brisk descriptors (each descriptor is 64 floats) into a pre-training cluster space. Each descriptor maps to an integer (i.e. the cluster id) so we transform the (n,64) descriptor matrix into an n-vector. A k-histogram of the n-vector is returned. """ kmeans_model = KMeans(centroids=centroids) cluster_ids = [kmeans_model.classify(d) for d in descriptors] """ Take the multiplicity of each cluster id in the mapped descriptor space. The n-vector of cluster ids becomes a histogram of cardinality 128 """ histogram = [0] * brisk_constants.N_CLUSTERS for cluster_id in set(cluster_ids): histogram[cluster_id] = cluster_ids.count(cluster_id) return histogram
kmeans.convergence = 0.001 # 收敛误差 kmeans.setup() # 在开始训练前跑个设定 # 每迭代的回呼函式 def iteration_callback(iteration_times, groups): print("iteration %r" % iteration_times) for group in groups: print("新旧群心 (%r) : % r -> %r" % (group.tag, group.old_center, group.center)) # 完成训练时的回呼函式 def completion_callback(iteration_times, groups, sse): print("completion %r and sse %r" % (iteration_times, sse)) for group in groups: print("最终的群心 (%r): %r, %r" % (group.tag, group.center, group.samples)) # 开始训练 kmeans.run(iteration_callback, completion_callback) # 对新样本进行分类后的结果回呼函式 def classified_callback(point, group): print("%r classified to %r" % (point, group.tag)) # 用已训练好的模型来对新样本进行分类 kmeans.classify([[3, 4], [5, 6]], classified_callback)
def main(): print("Clustering Fun\n") verbose = False k = None plot = False num_points = None csv_path = None lim_distance = None argCount = len(sys.argv) args = sys.argv locations = None for i, arg in enumerate(args): if i == 0: continue # ignore script name if arg.startswith("-"): if arg.startswith('-k='): k = int(arg.split('=', 1)[1]) elif arg.startswith('-n='): num_points = int(arg.split('=', 1)[1]) elif arg.startswith('-l='): lim_distance = int(arg.split('=', 1)[1]) elif arg == "-v": verbose = True elif arg == "-p": plot = True else: print(f"Unknown argument: {arg}") print_usage() exit(1) elif not csv_path: csv_path = arg else: print(f"Too many parameters!") print_usage() exit(1) if not csv_path and not num_points or not k: print(f"Too few parameters!") print_usage() exit(1) if verbose: print_options(verbose, csv_path, k, plot, lim_distance) if csv_path: locations = read_csv(csv_path) if verbose: print(f"\nRead {len(locations)} locations from {csv_path}") else: locations = generate_points_gauss(num_points, k) print(f"\nSetting up K-Means Classifier:") km = KMeans(locations, k, verbose, dist_limit=lim_distance) print(f"Finding best initial center points with K-Means++") centers = np.array(np.empty) min_var = None for i in range(8): tmp_centers, variance = km.init_centers() if verbose: print(f" Iteration {i}, variance = {variance} km") if min_var is None or variance < min_var: min_var = variance centers = np.copy(tmp_centers) if plot and verbose: plot_locations( locations, k, centers, img_tag=f"{'' if not csv_path else csv_path+'_'}initial_state") centers, locations = km.classify(centers) if plot: plot_locations(locations, k, centers, img_tag=f"{'' if not csv_path else csv_path+'_'}") print(f"\nClass centers:") for idx, c in enumerate(centers): print(f' Class = {idx}:') print(f' Center = {c}') print( f' Farthest point from center = {max([loc.class_distance for loc in locations if loc.classification == idx])} km' ) print( f' Attribute1 = {any(loc.attr1 for loc in locations if loc.classification == idx)} (Logical OR)' ) print( f' Attribute2 = {any(loc.attr2 for loc in locations if loc.classification == idx)} (Logical OR)' ) print() if lim_distance: print(f"\nWith distance limit of {lim_distance} km:") print( f" {len([loc for loc in locations if loc.classification == None])} locations are left unclassified" )