def test_search_by_id(self): f = 3 i = HnswIndex(f) i.add_data([2, 1, 0]) i.add_data([1, 2, 0]) i.add_data([0, 0, 1]) i.build(max_m0=10) self.assertEqual(i.search_by_id(0, 3), [0, 1, 2]) self.assertEqual(i.search_by_id(1, 3), [1, 0, 2]) self.assertTrue(i.search_by_id(2, 3) in [[2, 0, 1], [2, 1, 0]]) # could be either
def test_large_index(self): # Generate pairs of random points where the pair is super close f = 10 # q = [random.gauss(0, 10) for z in xrange(f)] i = HnswIndex(f, 'L2') for j in xrange(0, 10000, 2): p = [random.gauss(0, 1) for z in xrange(f)] x = [1 + pi + random.gauss(0, 1e-2) for pi in p] # todo: should be q[i] y = [1 + pi + random.gauss(0, 1e-2) for pi in p] i.add_data(x) i.add_data(y) i.build() for j in xrange(0, 10000, 2): self.assertEqual(i.search_by_id(j, 2), [j, j + 1]) self.assertEqual(i.search_by_id(j + 1, 2), [j + 1, j])
def test_search_by_id(self): f = 2 i = HnswIndex(f, 'L2') i.add_data([2, 2]) i.add_data([3, 2]) i.add_data([3, 3]) i.build() self.assertEqual(i.search_by_id(0, 3), [0, 1, 2]) self.assertEqual(i.search_by_id(2, 3), [2, 1, 0])
def test04_batch_search_by_ids(self): index = HnswIndex(self.dim) index.load(self.model_fname) T = [random.randrange(0, self.data_num) for y in xrange(100)] batch_res = index.batch_search_by_ids(T, 10, num_threads=12, include_distances=True) normal_res = [ index.search_by_id(t, 10, include_distances=True) for t in T ] self.assertEqual(batch_res, normal_res)
def kNN(matrix: np.ndarray, k: int) -> List[float]: index = HnswIndex(matrix.shape[1], 'L2') for sample in matrix: index.add_data(sample) index.build(m=32, max_m0=48, ef_construction=int(k * 1.1), n_threads=cpu_count()) result = [] for i in range(0, matrix.shape[0]): results = index.search_by_id(i, k, include_distances=True) result.append(np.mean(np.sqrt(np.array([dist for _, dist in results])))) return np.sort(result)
from n2 import HnswIndex import random f = 3 t = HnswIndex(f) # HnswIndex(f, "L2 or angular") for i in xrange(1000): v = [random.gauss(0, 1) for z in xrange(f)] t.add_data(v) t.build(m=5, max_m0=10, n_threads=4) t.save('test.n2') u = HnswIndex(f, "angular") u.load('test.n2') search_id = 1 k = 3 neighbor_ids = u.search_by_id(search_id, k) print( "[search_by_id]: Nearest neighborhoods of id {}: {}".format( search_id, neighbor_ids)) example_vector_query = [random.gauss(0, 1) for z in xrange(f)] nns = u.search_by_vector(example_vector_query, k, include_distances=True) print( "[search_by_vector]: Nearest neighborhoods of vector {}: {}".format( example_vector_query, nns))