Ejemplo n.º 1
0
def test_index_save_load():
    data = np.random.uniform(0, 100, size=(500000, 3)).astype(np.float32)
    queries = np.random.uniform(0, 100, size=(100, 3)).astype(np.float32)

    # Lets create an index of kd-tree
    kdtree = pynanoflann.KDTree()
    with Timer() as index_build_time:
        kdtree.fit(data)
    dist1, idx1 = kdtree.kneighbors(queries)

    # Save the built index
    # NOTE: Only the index will be saved, data points are NOT stored in the index
    index_path = '/tmp/index.bin'
    try:
        os.remove(index_path)
    except OSError:
        pass
    kdtree.save_index(index_path)
    assert os.path.exists(index_path)

    # Now, load a prebuilt index
    # BEWARE, data points must be the same
    new_kdtree = pynanoflann.KDTree()
    with Timer() as index_load_time:
        new_kdtree.fit(data, index_path)

    # Fitting with a prebuilt index is much faster, since it only requires loading a binary file 
    assert index_build_time.elapsed > 10 * index_load_time.elapsed

    # At the same time, the results are identical
    dist2, idx2 = kdtree.kneighbors(queries)
    assert (dist2 == dist1).all()
    assert (idx1 == idx2).all()
Ejemplo n.º 2
0
def test_incorrect_param():
    with pytest.raises(ValueError):
        nn = pynanoflann.KDTree(metric='l3')

    nn = pynanoflann.KDTree(n_neighbors=10)
    with pytest.raises(ValueError):
        nn.fit(np.array(['str', 'qwe']))
    with pytest.raises(ValueError):
        nn.fit(np.random.uniform(size=(1, 2, 3)))

    with pytest.raises(ValueError):
        nn.fit(np.random.uniform(size=(5, 10)))
        nn.kneighbors(np.random.uniform(size=(2, 10)))
def test_batched():
    import pynanoflann
    import numpy as np

    n_batches = 100
    target = np.random.rand(n_batches, 10000, 3).astype(np.float32)
    query = np.random.rand(n_batches, 2000, 3).astype(np.float32)

    g_res_d = []
    g_res_i = []
    for i in range(n_batches):
        kd_tree = pynanoflann.KDTree(n_neighbors=4, metric='L2', leaf_size=20)
        kd_tree.fit(target[i])
        d, nn_idx = kd_tree.kneighbors(query[i])
        g_res_d.append(d)
        g_res_i.append(nn_idx)

    g_res_d = np.array(g_res_d)
    g_res_i = np.array(g_res_i)

    distances, indices = pynanoflann.batched_kneighbors(target,
                                                        query,
                                                        n_neighbors=4,
                                                        metric='L2',
                                                        leaf_size=20,
                                                        n_jobs=2)
    distances = np.array(distances)
    indices = np.array(indices)

    assert np.allclose(g_res_d, distances)
    assert (indices == g_res_i).all()
Ejemplo n.º 4
0
def test_radius():
    nn = pynanoflann.KDTree(metric='l1', radius=1)
    nn.fit(np.array([[1.], [2.], [3.], [4.]]).reshape((-1, 1)))
    distances, indices = nn.radius_neighbors(np.array([[1.5]]).reshape((-1, 1)))
    assert set(indices[0]) == {0, 1}

    distances, indices = nn.radius_neighbors(np.array([[1.5]]).reshape((-1, 1)), radius=0.1)
    assert set(indices[0]) == set()
Ejemplo n.º 5
0
def test_radius_arg_passing():
    nn = pynanoflann.KDTree(metric='l2', radius=2)
    index = np.array([[1.], [2.], [3.], [4.]]).reshape(-1, 1)
    nn.fit(index)
    query = np.array([[0.1]]).reshape(-1, 1)
    _, indices1 = nn.radius_neighbors(query)
    _, indices2 = nn.radius_neighbors(query, radius=2)
    assert (indices1[0] == indices2[0]).all()
    assert set(indices1[0]) == {0, 1}
Ejemplo n.º 6
0
    def search_batch(i):
        pts_target = target[i]
        pts_query = query[i]

        kd_tree = pynanoflann.KDTree(n_neighbors=1, metric='L2', leaf_size=20)
        kd_tree.fit(pts_target)

        d, nn_idx = kd_tree.kneighbors(pts_query)
        d2, nn_idx2 = kd_tree.kneighbors(pts_query, n_jobs=4)

        assert np.allclose(d, d2)
        assert (nn_idx == nn_idx2).all()
Ejemplo n.º 7
0
def test_pickle():
    data = np.random.uniform(0, 100, size=(500000, 3)).astype(np.float32)
    queries = np.random.uniform(0, 100, size=(100, 3)).astype(np.float32)

    leaf_size = 20
    radius = 0.5

    # Construct a kd-tree
    kdtree = pynanoflann.KDTree(metric='l1', leaf_size=leaf_size, radius=radius)
    kdtree.fit(data)
    dist1, idx1 = kdtree.kneighbors(queries)

    # Pickle to memory
    pickled = pickle.dumps(kdtree)

    # Size of the pickled kd-tree includes data points: (500000 points * 3 dim * 4 bytes) ~ 6Mb
    assert 6_000_000 < len(pickled) < 6_001_000

    # Free memory
    del kdtree, data

    # Load a pickled instance
    unpickled_kdtree = pickle.loads(pickled)
    dist2, idx2 = unpickled_kdtree.kneighbors(queries)

    # The results are identical
    assert (dist1 == dist2).all()
    assert (idx1 == idx2).all()

    # Parameters are unpickled correctly
    assert unpickled_kdtree.leaf_size == leaf_size
    assert unpickled_kdtree.radius == radius

    unfitted_kdtree = pynanoflann.KDTree(metric='l1')
    data = pickle.dumps(unfitted_kdtree)
    # Size of the unfitted kd-tree very small: only parameters
    assert len(data) < 200
    un_un_tree = pickle.loads(data)
    assert un_un_tree.metric == 'l1'
Ejemplo n.º 8
0
def test(search_type='knn', data_dim=3, n_index_points=2000, n_query_points=100, n_neighbors=10, metric='l2', output=False, radius=1):
    data = np.random.uniform(0, 100, size=(n_index_points, data_dim)).astype(np.float32)
    queries = np.random.uniform(0, 100, size=(n_query_points, data_dim)).astype(np.float32)

    with Timer() as sk_init:
        nn = neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto', metric=metric, radius=radius)
        nn.fit(data)
    with Timer() as sk_query:
        if search_type == 'knn':
            sk_res_dist, sk_res_idx = nn.kneighbors(queries)
        else:
            sk_res_dist, sk_res_idx = nn.radius_neighbors(queries)

    with Timer() as kd_init:
        nn = pynanoflann.KDTree(n_neighbors=n_neighbors, metric=metric, radius=radius)
        nn.fit(data)

    with Timer() as kd_query:
        if search_type == 'knn':
            kd_res_dist, kd_res_idx = nn.kneighbors(queries)
        else:
            kd_res_dist, kd_res_idx = nn.radius_neighbors(queries)

    # allow small diff due to floating point computation
    params = {}
    for k in inspect.signature(test).parameters:
        params[k] = locals().get(k)

    if search_type == 'knn':
        assert (kd_res_idx == sk_res_idx).mean() > 0.99, params
        assert np.allclose(kd_res_dist, sk_res_dist), params
    else:
        # sklearn radius search does not allow to return sorted indices
        # So let's check as an unordered sets
        for k, s in zip(kd_res_idx, sk_res_idx):
            if len(k):
                rat = len(set(k).intersection(set(s))) / len(k)
                assert rat > 0.99
            else:
                assert (k == s).all()

    if output and search_type == 'knn':
        diff = kd_res_dist - sk_res_dist
        data = [['sk', sk_init, sk_query], ['kd', kd_init, kd_query]]
        t = tabulate.tabulate(data, headers=['', 'Init', 'Query'], tablefmt='psql')
        print(t)
        print('Dist diff: {}'.format(diff.sum()))
        print('IDX diff: {} / {}'.format((kd_res_idx != sk_res_idx).sum(), kd_res_idx.size))
Ejemplo n.º 9
0
    def generate_random_flann_forest(self, n_trees=1, subdims=1):
        self.forest = []

        for i in range(n_trees):
            inx = random.sample(range(self.n_dims), subdims)
            #inx = random.sample(range(self.n_dims), self.n_dims)
            #inx = range(self.n_dims)
            #subdata = [([self.high_dim_centers[i].center[d] for d in inx] , self.high_dim_centers[i]) for i in range(self.n_centers)]
            subdata = np.array(
                [[self.high_dim_centers[i].center[d] for d in inx]
                 for i in range(self.n_centers)])
            cs = [self.high_dim_centers[i] for i in range(self.n_centers)]
            #self.forest.append((create(subdata, self.n_dims), inx))
            t = pynanoflann.KDTree(n_neighbors=1)
            t.fit(subdata)
            self.forest.append((t, inx, cs))
Ejemplo n.º 10
0
def test_get_data():
    data = np.random.uniform(0, 100, size=(5000, 3)).astype(np.float32)

    kdtree = pynanoflann.KDTree()
    with pytest.raises(NotFittedError):
        kdtree.get_data()

    kdtree.fit(data)

    pickled = pickle.dumps(kdtree)
    unpickled_kdtree = pickle.loads(pickled)

    X = unpickled_kdtree.get_data()
    assert (X == data).all()
    X *= 2
    assert (unpickled_kdtree.get_data() == data).all()

    X = unpickled_kdtree.get_data(copy=False)
    assert (X == data).all()
    X *= 2
    assert (unpickled_kdtree.get_data() != data).all()
Ejemplo n.º 11
0
def test_multithreaded_radius():
    index = np.random.rand(40_000, 3)
    query = np.random.rand(20_000, 3)

    kd_tree = pynanoflann.KDTree(metric="L2", radius=0.1)
    kd_tree.fit(index)

    t1 = time.time()
    distances1, indices1 = kd_tree.radius_neighbors(query)
    t1 = time.time() - t1

    t2 = time.time()
    distances2, indices2 = kd_tree.radius_neighbors(query, n_jobs=4)
    t2 = time.time() - t2

    assert len(distances1) == len(distances2)
    for d1, d2 in zip(distances1, distances2):
        assert np.allclose(d1, d2)

    assert len(indices1) == len(indices2)
    for i1, i2 in zip(indices1, indices2):
        assert (i1 == i2).all()
Ejemplo n.º 12
0
index_type = np.float32

data = np.random.uniform(0, 100,
                         size=(n_index_points, data_dim)).astype(index_type)
queries = np.random.uniform(0, 100,
                            size=(n_query_points, data_dim)).astype(index_type)

algs = {
    "sklearn_brute":
    neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm="brute"),
    "sklearn_ball_tree":
    neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree"),
    "sklearn_kd_tree":
    neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm="kd_tree"),
    "pynanoflann":
    pynanoflann.KDTree(n_neighbors=n_neighbors),
}

results = []
for rep in range(n_repititions):
    for alg_name, nn in algs.items():

        with Timer() as index_build_time:
            nn.fit(data)

        with Timer() as query_time:
            dist, idx = nn.kneighbors(queries)

        results.append(
            (alg_name, index_build_time.elapsed, query_time.elapsed))
Ejemplo n.º 13
0
def test_warning():
    with pytest.warns(Warning):
        nn = pynanoflann.KDTree()
        nn.fit(np.random.uniform(size=(100, 100)))