def test_nn_search(): train = nn_data[100:] test = nn_data[:100] (knn_indices, knn_dists, rp_forest) = nearest_neighbors(train, 10, "euclidean", {}, False, np.random) graph = fuzzy_simplicial_set( nn_data, 10, np.random, "euclidean", {}, knn_indices, knn_dists, False, 1.0, 1.0, False, ) search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8) search_graph.rows = knn_indices search_graph.data = (knn_dists != 0).astype(np.int8) search_graph = search_graph.maximum(search_graph.transpose()).tocsr() random_init, tree_init = make_initialisations(dist.euclidean, ()) search = make_initialized_nnd_search(dist.euclidean, ()) rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64) init = initialise_search(rp_forest, train, test, int(10 * 3), random_init, tree_init, rng_state) result = search(train, search_graph.indptr, search_graph.indices, init, test) indices, dists = deheap_sort(result) indices = indices[:, :10] tree = KDTree(train) true_indices = tree.query(test, 10, return_distance=False) num_correct = 0.0 for i in range(test.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], indices[i])) percent_correct = num_correct / (test.shape[0] * 10) assert_greater_equal( percent_correct, 0.99, "Sparse NN-descent did not get " "99% accuracy on nearest " "neighbors", )
def test_smooth_knn_dist_l1norms_w_connectivity(): knn_indices, knn_dists, _ = nearest_neighbors(nn_data, 10, "euclidean", {}, False, np.random) sigmas, rhos = smooth_knn_dist(knn_dists, 10, local_connectivity=1.75) shifted_dists = knn_dists - rhos[:, np.newaxis] shifted_dists[shifted_dists < 0.0] = 0.0 vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis])) norms = np.sum(vals, axis=1) assert_array_almost_equal( norms, 1.0 + np.log2(10) * np.ones(norms.shape[0]), decimal=3, err_msg="Smooth knn-dists does not give expected" "norms for local_connectivity=1.75", )
def test_nn_descent_neighbor_accuracy_callable_metric(): knn_indices, knn_dists, _ = nearest_neighbors(nn_data, 10, dist.euclidean, {}, False, np.random) tree = KDTree(nn_data) true_indices = tree.query(nn_data, 10, return_distance=False) num_correct = 0.0 for i in range(nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (spatial_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.99, "NN-descent did not get 99% " "accuracy on nearest neighbors with callable metric", )
def test_sparse_angular_nn_descent_neighbor_accuracy(): knn_indices, knn_dists, _ = nearest_neighbors(sparse_nn_data, 10, "cosine", {}, True, np.random) angular_data = normalize(sparse_nn_data, norm="l2").toarray() tree = KDTree(angular_data) true_indices = tree.query(angular_data, 10, return_distance=False) num_correct = 0.0 for i in range(nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (spatial_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.99, "NN-descent did not get 99% " "accuracy on nearest neighbors", )
def test_sparse_nn_descent_neighbor_accuracy(): knn_indices, knn_dists, _ = nearest_neighbors(sparse_nn_data, 10, "euclidean", {}, False, np.random) tree = KDTree(sparse_nn_data.todense()) true_indices = tree.query(sparse_nn_data.todense(), 10, return_distance=False) num_correct = 0.0 for i in range(nn_data.shape[0]): num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i])) percent_correct = num_correct / (spatial_data.shape[0] * 10) assert_greater_equal( percent_correct, 0.99, "Sparse NN-descent did not get " "99% accuracy on nearest " "neighbors", )