def _known_ordered_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # make vectors to return in a known euclidean distance order i = 1000 test_descriptors = [] for j in range(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(np.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(test_descriptors) # Since descriptors were built in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', i) q.set_vector(np.array([0, 0], float)) # top result should have UUID == 0 (nearest to query) r, dists = index.nn(q, 5) self.assertEqual(r[0].uuid(), 0) self.assertEqual(r[1].uuid(), 1) self.assertEqual(r[2].uuid(), 2) self.assertEqual(r[3].uuid(), 3) self.assertEqual(r[4].uuid(), 4) # global search should be in complete order r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): self.assertEqual(d.uuid(), j)
def _known_ordered_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # make vectors to return in a known euclidean distance order i = 1000 test_descriptors = [] for j in range(i): d = DescriptorMemoryElement('ordered', j) d.set_vector(np.array([j, j*2], float)) test_descriptors.append(d) random.shuffle(test_descriptors) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(test_descriptors) # Since descriptors were built in increasing distance from (0,0), # returned descriptors for a query of [0,0] should be in index order. q = DescriptorMemoryElement('query', i) q.set_vector(np.array([0, 0], float)) # top result should have UUID == 0 (nearest to query) r, dists = index.nn(q, 5) self.assertEqual(r[0].uuid(), 0) self.assertEqual(r[1].uuid(), 1) self.assertEqual(r[2].uuid(), 2) self.assertEqual(r[3].uuid(), 3) self.assertEqual(r[4].uuid(), 4) # global search should be in complete order r, dists = index.nn(q, i) for j, d, dist in zip(range(i), r, dists): self.assertEqual(d.uuid(), j)
def _random_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # :param hash_ftor: Hash function class for generating hash codes for # descriptors. # :param hash_idx: Hash index instance to use in local LSH algo # instance. # :param ftor_train_hook: Function for training functor if necessary. # make random descriptors i = 1000 dim = 256 td = [] numpy.random.seed(self.RANDOM_SEED) for j in range(i): d = DescriptorMemoryElement('random', j) d.set_vector(numpy.random.rand(dim)) td.append(d) ftor_train_hook(td) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement('query', i) v = td_q.vector().copy() v_min = max(v.min(), 0.1) v[0] += v_min v[dim - 1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) ntools.assert_false(numpy.array_equal(q.vector(), td_q.vector())) ntools.assert_equal(r[0], td_q) # random query q = DescriptorMemoryElement('query', i + 1) q.set_vector(numpy.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in range(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) r, dists = index.nn(q, i) for j in range(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1])
def _random_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # :param hash_ftor: Hash function class for generating hash codes for # descriptors. # :param hash_idx: Hash index instance to use in local LSH algo # instance. # :param ftor_train_hook: Function for training functor if necessary. # make random descriptors i = 1000 dim = 256 td = [] np.random.seed(self.RANDOM_SEED) for j in range(i): d = DescriptorMemoryElement('random', j) d.set_vector(np.random.rand(dim)) td.append(d) ftor_train_hook(td) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method='euclidean') index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) self.assertEqual(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement('query', i) v = td_q.vector().copy() v_min = max(v.min(), 0.1) v[0] += v_min v[dim-1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) self.assertFalse(np.array_equal(q.vector(), td_q.vector())) self.assertEqual(r[0], td_q) # random query q = DescriptorMemoryElement('query', i+1) q.set_vector(np.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in range(1, len(dists)): self.assertGreater(dists[j], dists[j-1]) r, dists = index.nn(q, i) for j in range(1, len(dists)): self.assertGreater(dists[j], dists[j-1])
def _known_unit(self, hash_ftor, hash_idx, dist_method, ftor_train_hook=lambda d: None): ### # Unit vectors - Equal distance # dim = 5 test_descriptors = [] for i in range(dim): v = numpy.zeros(dim, float) v[i] = 1. d = DescriptorMemoryElement('unit', i) d.set_vector(v) test_descriptors.append(d) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method=dist_method) index.build_index(test_descriptors) # query with zero vector # -> all modeled descriptors have no intersection, dists should be 1.0, # or maximum distance by histogram intersection q = DescriptorMemoryElement('query', 0) q.set_vector(numpy.zeros(dim, float)) r, dists = index.nn(q, dim) # All dists should be 1.0, r order doesn't matter for d in dists: ntools.assert_equal(d, 1.) # query with index element q = test_descriptors[3] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) ntools.assert_equal(dists[0], 0.) r, dists = index.nn(q, dim) ntools.assert_equal(r[0], q) ntools.assert_equal(dists[0], 0.)
def _random_euclidean(self, hash_ftor, hash_idx, ftor_train_hook=lambda d: None): # make random descriptors i = 1000 dim = 256 td = [] numpy.random.seed(self.RANDOM_SEED) for j in xrange(i): d = DescriptorMemoryElement("random", j) d.set_vector(numpy.random.rand(dim)) td.append(d) ftor_train_hook(td) di = MemoryDescriptorIndex() index = LSHNearestNeighborIndex(hash_ftor, di, hash_idx, distance_method="euclidean") index.build_index(td) # test query from build set -- should return same descriptor when k=1 q = td[255] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) # test query very near a build vector td_q = td[0] q = DescriptorMemoryElement("query", i) v = td_q.vector().copy() v_min = max(v.min(), 0.1) v[0] += v_min v[dim - 1] -= v_min q.set_vector(v) r, dists = index.nn(q, 1) ntools.assert_false(numpy.array_equal(q.vector(), td_q.vector())) ntools.assert_equal(r[0], td_q) # random query q = DescriptorMemoryElement("query", i + 1) q.set_vector(numpy.random.rand(dim)) # for any query of size k, results should at least be in distance order r, dists = index.nn(q, 10) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) r, dists = index.nn(q, i) for j in xrange(1, len(dists)): ntools.assert_greater(dists[j], dists[j - 1]) DescriptorMemoryElement.MEMORY_CACHE = {}
def _known_unit(self, hash_ftor, hash_idx, dist_method, ftor_train_hook=lambda d: None): ### # Unit vectors - Equal distance # dim = 5 test_descriptors = [] for i in range(dim): v = np.zeros(dim, float) v[i] = 1. d = DescriptorMemoryElement('unit', i) d.set_vector(v) test_descriptors.append(d) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() kvstore = MemoryKeyValueStore() index = LSHNearestNeighborIndex(hash_ftor, di, kvstore, hash_index=hash_idx, distance_method=dist_method) index.build_index(test_descriptors) # query with zero vector # -> all modeled descriptors have no intersection, dists should be 1.0, # or maximum distance by histogram intersection q = DescriptorMemoryElement('query', 0) q.set_vector(np.zeros(dim, float)) r, dists = index.nn(q, dim) # All dists should be 1.0, r order doesn't matter for d in dists: self.assertEqual(d, 1.) # query with index element q = test_descriptors[3] r, dists = index.nn(q, 1) self.assertEqual(r[0], q) self.assertEqual(dists[0], 0.) r, dists = index.nn(q, dim) self.assertEqual(r[0], q) self.assertEqual(dists[0], 0.)
def _known_unit(self, hash_ftor, hash_idx, dist_method, ftor_train_hook=lambda d: None): ### # Unit vectors - Equal distance # dim = 5 test_descriptors = [] for i in xrange(dim): v = numpy.zeros(dim, float) v[i] = 1.0 d = DescriptorMemoryElement("unit", i) d.set_vector(v) test_descriptors.append(d) ftor_train_hook(test_descriptors) di = MemoryDescriptorIndex() index = LSHNearestNeighborIndex(hash_ftor, di, hash_idx, distance_method=dist_method) index.build_index(test_descriptors) # query with zero vector # -> all modeled descriptors have no intersection, dists should be 1.0, # or maximum distance by histogram intersection q = DescriptorMemoryElement("query", 0) q.set_vector(numpy.zeros(dim, float)) r, dists = index.nn(q, dim) # All dists should be 1.0, r order doesn't matter for d in dists: ntools.assert_equal(d, 1.0) # query with index element q = test_descriptors[3] r, dists = index.nn(q, 1) ntools.assert_equal(r[0], q) ntools.assert_equal(dists[0], 0.0) r, dists = index.nn(q, dim) ntools.assert_equal(r[0], q) ntools.assert_equal(dists[0], 0.0) DescriptorMemoryElement.MEMORY_CACHE = {}