def euclidean_cluster(points,base_radius=None,n_clusters=None,rgen=default_chooser): msg = "Must specify either a radius or a number of clusters." assert not(base_radius is None and n_clusters is None), msg iters=10 unassigned = range(np.shape(points)[1]) assigned = [] centers= [] while len(unassigned) > 0: new_center = rgen(unassigned) centers.append(new_center) assigned.append(new_center) unassigned.remove(new_center) center_2d = np.reshape(points[:,new_center],[1,-1]) ua_dists = spatial.distance.cdist(points[:,unassigned].T, center_2d) ball = np.array(unassigned)[ua_dists.flatten() < base_radius] for pt in ball: assigned.append(pt) unassigned.remove(pt) pts_centers = points[:,centers] for i in xrange(iters): idxs = qmain.nn_search(pts_centers, points, 1)[1] partition = idxs.flatten() pts_centers = partition_centers(points,partition) idxs = qmain.nn_search(pts_centers, points, 1)[1] partition = idxs.flatten() return partition, pts_centers
def cluster(eigvecs,eigvals,base_radius=None,knn=10): n_points = np.shape(eigvecs)[0] orphans = [] for i in xrange(n_points): orphans.append(ClusterTreeNode([i])) d = np.diag(eigvals) points = d.dot(eigvecs.T) new_parents = [] if base_radius is None: knn = min(np.shape(points)[1],knn) dists = qmain.nn_search(points,points,knn)[0] base_radius = np.median(dists) print "The estimated radius for building folders is {}.".format(base_radius) partition_centers = np.array([[1,2],[1,2]]) #run loop at least once while np.shape(partition_centers)[1] > 1: partition, partition_centers = euclidean_cluster(points,base_radius) for center in np.unique(partition): new_parents.append(ClusterTreeNode([])) for idx,orphan in enumerate(orphans): orphan.assign_to_parent(new_parents[partition[idx]]) orphans = new_parents[:] new_parents = [] d = d.dot(d) points = d.dot(partition_centers) assert len(orphans) == 1 orphans[0].make_index() return orphans[0]