def test_update_index_with_hash_index(self) -> None: # Similar test to `test_update_index_add_new_descriptors` but with a # linear hash index. descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() linear_hi = LinearHashIndex() # simplest hash index, heap-sorts. index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs, linear_hi) descriptors1 = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] descriptors2 = [ DescriptorMemoryElement('t', 5), DescriptorMemoryElement('t', 6), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors1 + descriptors2: d.set_vector(np.ones(1, float) * d.uuid()) # Build initial index. index.build_index(descriptors1) # Initial hash index should only encode hashes for first batch of # descriptors. self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4}) # Update index and check that components have new data. index.update_index(descriptors2) # Now the hash index should include all descriptor hashes. self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4, 5, 6})
def test_update_index_no_existing_index(self) -> None: # Test that calling update_index with no existing index acts like # building the index fresh. This test is basically the same as # test_build_index_fresh_build but using update_index instead. descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors: d.set_vector(np.ones(1, float) * d.uuid()) index.update_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_set.count(), 5) for d in descriptors: self.assertIn(d, descr_set) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_existing_descriptors_frozenset(self) -> None: """ Same as ``test_update_index_similar_descriptors`` but testing that we can update the index when seeded with structures with existing values. """ # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] descr_set = MemoryDescriptorSet() descr_set.add_many_descriptors(descriptors1) hash_kvs = MemoryKeyValueStore() hash_kvs.add(0, frozenset({0})) hash_kvs.add(1, frozenset({1})) hash_kvs.add(2, frozenset({2})) hash_kvs.add(3, frozenset({3})) hash_kvs.add(4, frozenset({4})) index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) index.update_index(descriptors2) assert descr_set.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_set for d in descriptors2: assert d in descr_set # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_similar_descriptors(self) -> None: """ Test that updating a built index with similar descriptors (same vectors, different UUIDs) results in contained structures having an expected state. """ descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_set.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_set for d in descriptors2: assert d in descr_set # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_add_new_descriptors(self) -> None: # Test that calling update index after a build index causes index # components to be properly updated. descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) descriptors1 = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] descriptors2 = [ DescriptorMemoryElement('t', 5), DescriptorMemoryElement('t', 6), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors1 + descriptors2: d.set_vector(np.ones(1, float) * d.uuid()) # Build initial index. index.build_index(descriptors1) self.assertEqual(descr_set.count(), 5) for d in descriptors1: self.assertIn(d, descr_set) for d in descriptors2: self.assertNotIn(d, descr_set) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i}) # Update index and check that components have new data. index.update_index(descriptors2) self.assertEqual(descr_set.count(), 7) for d in descriptors1 + descriptors2: self.assertIn(d, descr_set) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 7) for i in range(7): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_duplicate_descriptors(self) -> None: """ Test that updating a built index with the same descriptors results in idempotent behavior. """ descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) # Identical Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_set.count() == 5 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_set for d in descriptors2: assert d in descr_set # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0} assert hash_kvs.get(1) == {1} assert hash_kvs.get(2) == {2} assert hash_kvs.get(3) == {3} assert hash_kvs.get(4) == {4}