def join_CCs(X, G, embed_dim, num_ccs=1, max_angle=0.3, verbose=False): n, labels = G.connected_components(return_labels=True) # compute linear subspaces for each connected component # (assumed to be local+linear) CC_planes, _ = cluster_subspaces(X, embed_dim, n, labels) CC_labels = labels # keep around the original labels that go with CC_planes angle_thresh = 0.1 while n > num_ccs: # compute the distance between all clusters # (by finding the distance between the closest 2 member points) Dcenter, min_edge_idxs = inter_cluster_distance(X, n, labels) # Find "meta-edges" between clusters (k=1) ninds = nearest_neighbors(Dcenter, precomputed=True, k=2) # self + 1 == 2 meta_edge_lengths = Dcenter[ninds[:,0],ninds[:,1]] dist_thresh = np.median(meta_edge_lengths) if verbose: # pragma: no cover print(n, 'CCs') # convert ninds to CC_ninds (back to the CC_labels space, via W-space) CC_ninds = CC_labels[min_edge_idxs[ninds[:,0],ninds[:,1]]] # modify G to connect edges between nearby CCs while True: if verbose: # pragma: no cover print('DT:', dist_thresh, 'AT:', angle_thresh) G, num_added, minD, minF = _connect_meta_edges( X, G, CC_planes, CC_labels, CC_ninds, dist_thresh=dist_thresh, angle_thresh=angle_thresh) if num_added > 0: break elif angle_thresh < minF <= max_angle: angle_thresh = minF elif dist_thresh < minD: if np.isinf(minD): max_angle += 0.1 # XXX: hack angle_thresh = min(minF, max_angle) if verbose: # pragma: no cover print('Increasing max_angle to', max_angle) else: dist_thresh = minD else: raise AssertionError("Impossible state: can't increase dist_thresh " "enough to make a connection") # recalc CCs and repeat (keeping the original CC_planes!) # until there's only one left. n, labels = G.connected_components(return_labels=True) return CC_labels, angle_thresh
def grow_trees(X, G, embed_dim, verbose=False): dist_thresh = 0 while True: n, labels = G.connected_components(return_labels=True) tree_sizes = np.bincount(labels) min_tree_size = tree_sizes.min() if min_tree_size > embed_dim: break Dcenter, min_edge_idxs = inter_cluster_distance(X, n, labels) pairs = nearest_neighbors(Dcenter, precomputed=True, k=2) # self + 1 == 2 ninds = pairs[tree_sizes == min_tree_size] meta_edge_lengths = Dcenter[ninds[:,0],ninds[:,1]] dist_thresh = max(dist_thresh, np.max(meta_edge_lengths)) if verbose: # pragma: no cover print(n, 'CCs. dist thresh:', dist_thresh) # modify G to connect edges between nearby CCs G, num_added = _connect_meta_edges(X, G, None, labels, ninds, dist_thresh=dist_thresh)[:2] assert num_added > 0 return G