Ejemplo n.º 1
0
def test_nn_search():
    train = nn_data[100:]
    test = nn_data[:100]
    (knn_indices, knn_dists,
     rp_forest) = nearest_neighbors(train, 10, "euclidean", {}, False,
                                    np.random)

    graph = fuzzy_simplicial_set(
        nn_data,
        10,
        np.random,
        "euclidean",
        {},
        knn_indices,
        knn_dists,
        False,
        1.0,
        1.0,
        False,
    )

    search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]),
                                     dtype=np.int8)
    search_graph.rows = knn_indices
    search_graph.data = (knn_dists != 0).astype(np.int8)
    search_graph = search_graph.maximum(search_graph.transpose()).tocsr()

    random_init, tree_init = make_initialisations(dist.euclidean, ())
    search = make_initialized_nnd_search(dist.euclidean, ())

    rng_state = np.random.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
    init = initialise_search(rp_forest, train, test, int(10 * 3), random_init,
                             tree_init, rng_state)
    result = search(train, search_graph.indptr, search_graph.indices, init,
                    test)

    indices, dists = deheap_sort(result)
    indices = indices[:, :10]

    tree = KDTree(train)
    true_indices = tree.query(test, 10, return_distance=False)

    num_correct = 0.0
    for i in range(test.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], indices[i]))

    percent_correct = num_correct / (test.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "Sparse NN-descent did not get "
        "99% accuracy on nearest "
        "neighbors",
    )
Ejemplo n.º 2
0
def test_smooth_knn_dist_l1norms_w_connectivity():
    knn_indices, knn_dists, _ = nearest_neighbors(nn_data, 10, "euclidean", {},
                                                  False, np.random)
    sigmas, rhos = smooth_knn_dist(knn_dists, 10, local_connectivity=1.75)
    shifted_dists = knn_dists - rhos[:, np.newaxis]
    shifted_dists[shifted_dists < 0.0] = 0.0
    vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis]))
    norms = np.sum(vals, axis=1)

    assert_array_almost_equal(
        norms,
        1.0 + np.log2(10) * np.ones(norms.shape[0]),
        decimal=3,
        err_msg="Smooth knn-dists does not give expected"
        "norms for local_connectivity=1.75",
    )
Ejemplo n.º 3
0
def test_nn_descent_neighbor_accuracy_callable_metric():
    knn_indices, knn_dists, _ = nearest_neighbors(nn_data, 10, dist.euclidean,
                                                  {}, False, np.random)

    tree = KDTree(nn_data)
    true_indices = tree.query(nn_data, 10, return_distance=False)

    num_correct = 0.0
    for i in range(nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (spatial_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "NN-descent did not get 99% "
        "accuracy on nearest neighbors with callable metric",
    )
Ejemplo n.º 4
0
def test_sparse_angular_nn_descent_neighbor_accuracy():
    knn_indices, knn_dists, _ = nearest_neighbors(sparse_nn_data, 10, "cosine",
                                                  {}, True, np.random)

    angular_data = normalize(sparse_nn_data, norm="l2").toarray()
    tree = KDTree(angular_data)
    true_indices = tree.query(angular_data, 10, return_distance=False)

    num_correct = 0.0
    for i in range(nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (spatial_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "NN-descent did not get 99% "
        "accuracy on nearest neighbors",
    )
Ejemplo n.º 5
0
def test_sparse_nn_descent_neighbor_accuracy():
    knn_indices, knn_dists, _ = nearest_neighbors(sparse_nn_data, 10,
                                                  "euclidean", {}, False,
                                                  np.random)

    tree = KDTree(sparse_nn_data.todense())
    true_indices = tree.query(sparse_nn_data.todense(),
                              10,
                              return_distance=False)

    num_correct = 0.0
    for i in range(nn_data.shape[0]):
        num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))

    percent_correct = num_correct / (spatial_data.shape[0] * 10)
    assert_greater_equal(
        percent_correct,
        0.99,
        "Sparse NN-descent did not get "
        "99% accuracy on nearest "
        "neighbors",
    )