Esempio n. 1
0
def test_nn_search(nn_data):
    train = nn_data[100:]
    test = nn_data[:100]

    (knn_indices, knn_dists, rp_forest) = nearest_neighbors(
        train,
        10,
        "euclidean",
        {},
        False,
        np.random,
        use_pynndescent=False,
    )
    # Commented - NOT REALLY USED IN THE TEST
    # graph = fuzzy_simplicial_set(
    #     nn_data,
    #     10,
    #     np.random,
    #     "euclidean",
    #     {},
    #     knn_indices,
    #     knn_dists,
    #     False,
    #     1.0,
    #     1.0,
    #     False,
    # )

    search_graph = setup_search_graph(knn_dists, knn_indices, train)
    rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
    init = initialise_search(rp_forest, train, test, int(10 * 3), rng_state,
                             dist.euclidean)
    result = initialized_nnd_search(train, search_graph.indptr,
                                    search_graph.indices, init, test,
                                    dist.euclidean)

    indices, dists = deheap_sort(result)
    indices = indices[:, :10]

    tree = KDTree(train)
    true_indices = tree.query(test, 10, return_distance=False)

    num_correct = 0.0
    for i in range(test.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))

    percent_correct = num_correct / (test.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "Sparse NN-descent did not get "
        "99% accuracy on nearest "
        "neighbors",
    )
Esempio n. 2
0
def test_nn_search():
    train = nn_data[100:]
    test = nn_data[:100]

    (knn_indices, knn_dists, rp_forest) = nearest_neighbors(
        train, 10, "euclidean", {}, False, np.random, use_pynndescent=False,
    )

    graph = fuzzy_simplicial_set(
        nn_data,
        10,
        np.random,
        "euclidean",
        {},
        knn_indices,
        knn_dists,
        False,
        1.0,
        1.0,
        False,
    )

    search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8)
    search_graph.rows = knn_indices
    search_graph.data = (knn_dists != 0).astype(np.int8)
    search_graph = search_graph.maximum(search_graph.transpose()).tocsr()

    rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
    init = initialise_search(
        rp_forest, train, test, int(10 * 3), rng_state, dist.euclidean
    )
    result = initialized_nnd_search(
        train, search_graph.indptr, search_graph.indices, init, test, dist.euclidean
    )

    indices, dists = deheap_sort(result)
    indices = indices[:, :10]

    tree = KDTree(train)
    true_indices = tree.query(test, 10, return_distance=False)

    num_correct = 0.0
    for i in range(test.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))

    percent_correct = num_correct / (test.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "Sparse NN-descent did not get " "99% accuracy on nearest " "neighbors",
    )
Esempio n. 3
0
def _nhood_search(umap_object, nhood_size):
    if umap_object._small_data:
        dmat = sklearn.metrics.pairwise_distances(umap_object._raw_data)
        indices = np.argpartition(dmat, nhood_size)[:, :nhood_size]
        dmat_shortened = submatrix(dmat, indices, nhood_size)
        indices_sorted = np.argsort(dmat_shortened)
        indices = submatrix(indices, indices_sorted, nhood_size)
        dists = submatrix(dmat_shortened, indices_sorted, nhood_size)
    else:
        rng_state = np.empty(3, dtype=np.int64)

        if len(umap_object._metric_kwds) >= 1:
            _dist = umap_object._input_distance_func
            _args = tuple(umap_object._metric_kwds.values())

            @numba.njit()
            def _metric(x, y):
                _dist(x, y, *_args)

        else:
            _metric = umap_object._input_distance_func

        init = initialise_search(
            umap_object._rp_forest,
            umap_object._raw_data,
            umap_object._raw_data,
            int(nhood_size * umap_object.transform_queue_size),
            rng_state,
            _metric,
        )

        result = initialized_nnd_search(
            umap_object._raw_data,
            umap_object._search_graph.indptr,
            umap_object._search_graph.indices,
            init,
            umap_object._raw_data,
            _metric,
        )

        indices, dists = deheap_sort(result)
        indices = indices[:, :nhood_size]
        dists = dists[:, :nhood_size]

    return indices, dists
Esempio n. 4
0
def _nhood_search(umap_object, nhood_size):
    if umap_object._small_data:
        dmat = sklearn.metrics.pairwise_distances(umap_object._raw_data)
        indices = np.argpartition(dmat, nhood_size)[:, :nhood_size]
        dmat_shortened = submatrix(dmat, indices, nhood_size)
        indices_sorted = np.argsort(dmat_shortened)
        indices = submatrix(indices, indices_sorted, nhood_size)
        dists = submatrix(dmat_shortened, indices_sorted, nhood_size)
    else:
        rng_state = np.empty(3, dtype=np.int64)

        init = initialise_search(
            umap_object._rp_forest,
            umap_object._raw_data,
            umap_object._raw_data,
            int(nhood_size * umap_object.transform_queue_size),
            rng_state,
            umap_object._distance_func,
            umap_object._dist_args,
        )

        result = initialized_nnd_search(
            umap_object._raw_data,
            umap_object._search_graph.indptr,
            umap_object._search_graph.indices,
            init,
            umap_object._raw_data,
            umap_object._distance_func,
            umap_object._dist_args,
        )

        indices, dists = deheap_sort(result)
        indices = indices[:, :nhood_size]
        dists = dists[:, :nhood_size]

    return indices, dists