def test_knn_graph(self): """ Test construction of the k-nearest neighbor graph. """ k = 3 ## Correct knn similarity graph ans_radii = np.array([2., 1., 1., 1., 2.]) ans_graph = np.array([[0, 1, 2], [1, 0, 2], [2, 1, 3], [3, 2, 4], [4, 3, 2]]) ## DeBaCl knn similarity graph for method in ['brute_force', 'kd_tree', 'ball_tree']: knn, radii = utl.knn_graph(self.X, k=k, method=method) ## Test assert_array_equal(radii, ans_radii) for neighbors, ans_neighbors in zip(knn, ans_graph): self.assertItemsEqual(neighbors, ans_neighbors) distances = scipy.spatial.distance.pdist(self.X) distance_matrix = scipy.spatial.distance.squareform(distances) knn, radii = utl.knn_graph(distance_matrix, k, method='precomputed') ## Test precomputed method assert_array_equal(radii, ans_radii) for neighbors, ans_neighbors in zip(knn, ans_graph): self.assertItemsEqual(neighbors, ans_neighbors)
def construct_tree(X, k, prune_threshold=None, num_levels=None, verbose=False): """ Construct a level set tree from tabular data. Parameters ---------- X : 2-dimensional numpy array Numeric dataset, where each row represents one observation. k : int Number of observations to consider as neighbors to a given point. prune_threshold : int, optional Leaf nodes with fewer than this number of members are recursively merged into larger nodes. If 'None' (the default), then no pruning is performed. num_levels : int, optional Number of density levels in the constructed tree. If None (default), `num_levels` is internally set to be the number of rows in `X`. verbose : bool, optional If True, a progress indicator is printed at every 100th level of tree construction. Returns ------- T : LevelSetTree A pruned level set tree. See Also -------- construct_tree_from_graph, LevelSetTree Examples -------- >>> X = numpy.random.rand(100, 2) >>> tree = debacl.construct_tree(X, k=8, prune_threshold=5) >>> print tree +----+-------------+-----------+------------+----------+------+--------+----------+ | id | start_level | end_level | start_mass | end_mass | size | parent | children | +----+-------------+-----------+------------+----------+------+--------+----------+ | 0 | 0.000 | 0.870 | 0.000 | 0.450 | 100 | None | [3, 4] | | 3 | 0.870 | 3.364 | 0.450 | 0.990 | 17 | 0 | [] | | 4 | 0.870 | 1.027 | 0.450 | 0.520 | 35 | 0 | [7, 8] | | 7 | 1.027 | 1.755 | 0.520 | 0.870 | 8 | 4 | [] | | 8 | 1.027 | 3.392 | 0.520 | 1.000 | 23 | 4 | [] | +----+-------------+-----------+------------+----------+------+--------+----------+ """ sim_graph, radii = _utl.knn_graph(X, k, method='brute_force') n, p = X.shape density = _utl.knn_density(radii, n, p, k) tree = construct_tree_from_graph(adjacency_list=sim_graph, density=density, prune_threshold=prune_threshold, num_levels=num_levels, verbose=verbose) return tree
def test_knn_graph(self): """ Test construction of the k-nearest neighbor graph. """ k = 3 ## Correct knn similarity graph ans_radii = np.array([2., 1., 1., 1., 2.]) ans_graph = np.array([[0, 1, 2], [1, 0, 2], [2, 1, 3], [3, 2, 4], [4, 3, 2]]) ## DeBaCl knn similarity graph for method in ['brute_force', 'kd_tree', 'ball_tree']: knn, radii = utl.knn_graph(self.X, k=k, method=method) ## Test assert_array_equal(radii, ans_radii) for neighbors, ans_neighbors in zip(knn, ans_graph): self.assertItemsEqual(neighbors, ans_neighbors)