def test_index_save_load(): data = np.random.uniform(0, 100, size=(500000, 3)).astype(np.float32) queries = np.random.uniform(0, 100, size=(100, 3)).astype(np.float32) # Lets create an index of kd-tree kdtree = pynanoflann.KDTree() with Timer() as index_build_time: kdtree.fit(data) dist1, idx1 = kdtree.kneighbors(queries) # Save the built index # NOTE: Only the index will be saved, data points are NOT stored in the index index_path = '/tmp/index.bin' try: os.remove(index_path) except OSError: pass kdtree.save_index(index_path) assert os.path.exists(index_path) # Now, load a prebuilt index # BEWARE, data points must be the same new_kdtree = pynanoflann.KDTree() with Timer() as index_load_time: new_kdtree.fit(data, index_path) # Fitting with a prebuilt index is much faster, since it only requires loading a binary file assert index_build_time.elapsed > 10 * index_load_time.elapsed # At the same time, the results are identical dist2, idx2 = kdtree.kneighbors(queries) assert (dist2 == dist1).all() assert (idx1 == idx2).all()
def test_incorrect_param(): with pytest.raises(ValueError): nn = pynanoflann.KDTree(metric='l3') nn = pynanoflann.KDTree(n_neighbors=10) with pytest.raises(ValueError): nn.fit(np.array(['str', 'qwe'])) with pytest.raises(ValueError): nn.fit(np.random.uniform(size=(1, 2, 3))) with pytest.raises(ValueError): nn.fit(np.random.uniform(size=(5, 10))) nn.kneighbors(np.random.uniform(size=(2, 10)))
def test_batched(): import pynanoflann import numpy as np n_batches = 100 target = np.random.rand(n_batches, 10000, 3).astype(np.float32) query = np.random.rand(n_batches, 2000, 3).astype(np.float32) g_res_d = [] g_res_i = [] for i in range(n_batches): kd_tree = pynanoflann.KDTree(n_neighbors=4, metric='L2', leaf_size=20) kd_tree.fit(target[i]) d, nn_idx = kd_tree.kneighbors(query[i]) g_res_d.append(d) g_res_i.append(nn_idx) g_res_d = np.array(g_res_d) g_res_i = np.array(g_res_i) distances, indices = pynanoflann.batched_kneighbors(target, query, n_neighbors=4, metric='L2', leaf_size=20, n_jobs=2) distances = np.array(distances) indices = np.array(indices) assert np.allclose(g_res_d, distances) assert (indices == g_res_i).all()
def test_radius(): nn = pynanoflann.KDTree(metric='l1', radius=1) nn.fit(np.array([[1.], [2.], [3.], [4.]]).reshape((-1, 1))) distances, indices = nn.radius_neighbors(np.array([[1.5]]).reshape((-1, 1))) assert set(indices[0]) == {0, 1} distances, indices = nn.radius_neighbors(np.array([[1.5]]).reshape((-1, 1)), radius=0.1) assert set(indices[0]) == set()
def test_radius_arg_passing(): nn = pynanoflann.KDTree(metric='l2', radius=2) index = np.array([[1.], [2.], [3.], [4.]]).reshape(-1, 1) nn.fit(index) query = np.array([[0.1]]).reshape(-1, 1) _, indices1 = nn.radius_neighbors(query) _, indices2 = nn.radius_neighbors(query, radius=2) assert (indices1[0] == indices2[0]).all() assert set(indices1[0]) == {0, 1}
def search_batch(i): pts_target = target[i] pts_query = query[i] kd_tree = pynanoflann.KDTree(n_neighbors=1, metric='L2', leaf_size=20) kd_tree.fit(pts_target) d, nn_idx = kd_tree.kneighbors(pts_query) d2, nn_idx2 = kd_tree.kneighbors(pts_query, n_jobs=4) assert np.allclose(d, d2) assert (nn_idx == nn_idx2).all()
def test_pickle(): data = np.random.uniform(0, 100, size=(500000, 3)).astype(np.float32) queries = np.random.uniform(0, 100, size=(100, 3)).astype(np.float32) leaf_size = 20 radius = 0.5 # Construct a kd-tree kdtree = pynanoflann.KDTree(metric='l1', leaf_size=leaf_size, radius=radius) kdtree.fit(data) dist1, idx1 = kdtree.kneighbors(queries) # Pickle to memory pickled = pickle.dumps(kdtree) # Size of the pickled kd-tree includes data points: (500000 points * 3 dim * 4 bytes) ~ 6Mb assert 6_000_000 < len(pickled) < 6_001_000 # Free memory del kdtree, data # Load a pickled instance unpickled_kdtree = pickle.loads(pickled) dist2, idx2 = unpickled_kdtree.kneighbors(queries) # The results are identical assert (dist1 == dist2).all() assert (idx1 == idx2).all() # Parameters are unpickled correctly assert unpickled_kdtree.leaf_size == leaf_size assert unpickled_kdtree.radius == radius unfitted_kdtree = pynanoflann.KDTree(metric='l1') data = pickle.dumps(unfitted_kdtree) # Size of the unfitted kd-tree very small: only parameters assert len(data) < 200 un_un_tree = pickle.loads(data) assert un_un_tree.metric == 'l1'
def test(search_type='knn', data_dim=3, n_index_points=2000, n_query_points=100, n_neighbors=10, metric='l2', output=False, radius=1): data = np.random.uniform(0, 100, size=(n_index_points, data_dim)).astype(np.float32) queries = np.random.uniform(0, 100, size=(n_query_points, data_dim)).astype(np.float32) with Timer() as sk_init: nn = neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto', metric=metric, radius=radius) nn.fit(data) with Timer() as sk_query: if search_type == 'knn': sk_res_dist, sk_res_idx = nn.kneighbors(queries) else: sk_res_dist, sk_res_idx = nn.radius_neighbors(queries) with Timer() as kd_init: nn = pynanoflann.KDTree(n_neighbors=n_neighbors, metric=metric, radius=radius) nn.fit(data) with Timer() as kd_query: if search_type == 'knn': kd_res_dist, kd_res_idx = nn.kneighbors(queries) else: kd_res_dist, kd_res_idx = nn.radius_neighbors(queries) # allow small diff due to floating point computation params = {} for k in inspect.signature(test).parameters: params[k] = locals().get(k) if search_type == 'knn': assert (kd_res_idx == sk_res_idx).mean() > 0.99, params assert np.allclose(kd_res_dist, sk_res_dist), params else: # sklearn radius search does not allow to return sorted indices # So let's check as an unordered sets for k, s in zip(kd_res_idx, sk_res_idx): if len(k): rat = len(set(k).intersection(set(s))) / len(k) assert rat > 0.99 else: assert (k == s).all() if output and search_type == 'knn': diff = kd_res_dist - sk_res_dist data = [['sk', sk_init, sk_query], ['kd', kd_init, kd_query]] t = tabulate.tabulate(data, headers=['', 'Init', 'Query'], tablefmt='psql') print(t) print('Dist diff: {}'.format(diff.sum())) print('IDX diff: {} / {}'.format((kd_res_idx != sk_res_idx).sum(), kd_res_idx.size))
def generate_random_flann_forest(self, n_trees=1, subdims=1): self.forest = [] for i in range(n_trees): inx = random.sample(range(self.n_dims), subdims) #inx = random.sample(range(self.n_dims), self.n_dims) #inx = range(self.n_dims) #subdata = [([self.high_dim_centers[i].center[d] for d in inx] , self.high_dim_centers[i]) for i in range(self.n_centers)] subdata = np.array( [[self.high_dim_centers[i].center[d] for d in inx] for i in range(self.n_centers)]) cs = [self.high_dim_centers[i] for i in range(self.n_centers)] #self.forest.append((create(subdata, self.n_dims), inx)) t = pynanoflann.KDTree(n_neighbors=1) t.fit(subdata) self.forest.append((t, inx, cs))
def test_get_data(): data = np.random.uniform(0, 100, size=(5000, 3)).astype(np.float32) kdtree = pynanoflann.KDTree() with pytest.raises(NotFittedError): kdtree.get_data() kdtree.fit(data) pickled = pickle.dumps(kdtree) unpickled_kdtree = pickle.loads(pickled) X = unpickled_kdtree.get_data() assert (X == data).all() X *= 2 assert (unpickled_kdtree.get_data() == data).all() X = unpickled_kdtree.get_data(copy=False) assert (X == data).all() X *= 2 assert (unpickled_kdtree.get_data() != data).all()
def test_multithreaded_radius(): index = np.random.rand(40_000, 3) query = np.random.rand(20_000, 3) kd_tree = pynanoflann.KDTree(metric="L2", radius=0.1) kd_tree.fit(index) t1 = time.time() distances1, indices1 = kd_tree.radius_neighbors(query) t1 = time.time() - t1 t2 = time.time() distances2, indices2 = kd_tree.radius_neighbors(query, n_jobs=4) t2 = time.time() - t2 assert len(distances1) == len(distances2) for d1, d2 in zip(distances1, distances2): assert np.allclose(d1, d2) assert len(indices1) == len(indices2) for i1, i2 in zip(indices1, indices2): assert (i1 == i2).all()
index_type = np.float32 data = np.random.uniform(0, 100, size=(n_index_points, data_dim)).astype(index_type) queries = np.random.uniform(0, 100, size=(n_query_points, data_dim)).astype(index_type) algs = { "sklearn_brute": neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm="brute"), "sklearn_ball_tree": neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree"), "sklearn_kd_tree": neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm="kd_tree"), "pynanoflann": pynanoflann.KDTree(n_neighbors=n_neighbors), } results = [] for rep in range(n_repititions): for alg_name, nn in algs.items(): with Timer() as index_build_time: nn.fit(data) with Timer() as query_time: dist, idx = nn.kneighbors(queries) results.append( (alg_name, index_build_time.elapsed, query_time.elapsed))
def test_warning(): with pytest.warns(Warning): nn = pynanoflann.KDTree() nn.fit(np.random.uniform(size=(100, 100)))