def test_get_invalid_key_with_default(self): s = MemoryKeyValueStore() nose.tools.assert_equal( s.get(0, 1), 1, ) assert s.get(0, ()) == ()
def test_get_config_no_cache_elem(self): s = MemoryKeyValueStore() s._cache_element = None # We expect an default DataElement config (no impl type defined) c = s.get_config() self.assertIn('cache_element', c) self.assertIsNone(c['cache_element']['type'])
def test_update_index_no_existing_index(self): # Test that calling update_index with no existing index acts like # building the index fresh. This test is basically the same as # test_build_index_fresh_build but using update_index instead. descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors: d.set_vector(np.ones(1, float) * d.uuid()) index.update_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_index.count(), 5) for d in descriptors: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_clear(self): table_before_clear = dict(a=1, b=2, c=3) s = MemoryKeyValueStore() s._table = table_before_clear s.clear() nose.tools.assert_equal(s._table, {})
def test_remove_many_missing_key(self): """ Test that we cannot remove keys not present in table and that table is not modified on error. """ expected_table = { 0: 0, 1: 1, 2: 2, } s = MemoryKeyValueStore() s._table = { 0: 0, 1: 1, 2: 2, } self.assertRaisesRegex(KeyError, 'a', s.remove_many, ['a']) self.assertDictEqual(s._table, expected_table) # Even if one of the keys is value, the table should not be modified if # one of the keys is invalid. self.assertRaisesRegex(KeyError, '6', s.remove_many, [1, 6]) self.assertDictEqual(s._table, expected_table) PY2_SET_KEY_ERROR_RE = r"set\(\[(?:7|8), (?:7|8)\]\)" PY3_SET_KEY_ERROR_RE = "{(?:7|8), (?:7|8)}" self.assertRaisesRegex( KeyError, # Should show a "set" that contains 7 and 8, regardless of order. '(?:{}|{})'.format(PY2_SET_KEY_ERROR_RE, PY3_SET_KEY_ERROR_RE), s.remove_many, [7, 8])
def test_count_empty_hash2uid(self): """ Test that an empty hash-to-uid mapping results in a 0 return regardless of descriptor-set state. """ descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() self.assertEqual(descr_set.count(), 0) self.assertEqual(hash_kvs.count(), 0) lsh = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) self.assertEqual(lsh.count(), 0) # Additions to the descriptor-set should not impact LSH index "size" lsh.descriptor_set.add_descriptor(DescriptorMemoryElement('t', 0)) self.assertEqual(lsh.descriptor_set.count(), 1) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.descriptor_set.add_descriptor(DescriptorMemoryElement('t', 1)) self.assertEqual(lsh.descriptor_set.count(), 2) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.hash2uuids_kvstore.add(0, {0}) self.assertEqual(lsh.descriptor_set.count(), 2) self.assertEqual(lsh.count(), 1) lsh.hash2uuids_kvstore.add(0, {0, 1}) self.assertEqual(lsh.descriptor_set.count(), 2) self.assertEqual(lsh.count(), 2) lsh.hash2uuids_kvstore.add(0, {0, 1, 2}) self.assertEqual(lsh.descriptor_set.count(), 2) self.assertEqual(lsh.count(), 3)
def test_build_index_fresh_build(self): descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for i, d in enumerate(descriptors): d.set_vector(np.ones(1, float) * i) index.build_index(descriptors) # Make sure descriptors are now in attached index and in # key-value-store. self.assertEqual(descr_set.count(), 5) for d in descriptors: self.assertIn(d, descr_set) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_no_existing_index(self): # Test that calling update_index with no existing index acts like # building the index fresh. This test is basically the same as # test_build_index_fresh_build but using update_index instead. descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors: d.set_vector(np.ones(1, float) * d.uuid()) index.update_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_set.count(), 5) for d in descriptors: self.assertIn(d, descr_set) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_configuration(self): ex_descr_set = MemoryDescriptorSet() ex_i2u_kvs = MemoryKeyValueStore() ex_u2i_kvs = MemoryKeyValueStore() ex_index_elem = DataMemoryElement() ex_index_param_elem = DataMemoryElement() i = FaissNearestNeighborsIndex( descriptor_set=ex_descr_set, idx2uid_kvs=ex_i2u_kvs, uid2idx_kvs=ex_u2i_kvs, index_element=ex_index_elem, index_param_element=ex_index_param_elem, read_only=True, factory_string=u'some fact str', ivf_nprobe=88, use_gpu=False, gpu_id=99, random_seed=8, ) for inst in configuration_test_helper(i): assert isinstance(inst._descriptor_set, MemoryDescriptorSet) assert isinstance(inst._idx2uid_kvs, MemoryKeyValueStore) assert isinstance(inst._uid2idx_kvs, MemoryKeyValueStore) assert isinstance(inst._index_element, DataMemoryElement) assert isinstance(inst._index_param_element, DataMemoryElement) assert inst.read_only is True assert isinstance(inst.factory_string, six.string_types) assert inst.factory_string == 'some fact str' assert inst._ivf_nprobe == 88 assert inst._use_gpu is False assert inst._gpu_id == 99 assert inst.random_seed == 8
def test_build_index_fresh_build(self): descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for i, d in enumerate(descriptors): d.set_vector(np.ones(1, float) * i) index.build_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_index.count(), 5) for d in descriptors: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_count_empty_hash2uid(self): """ Test that an empty hash-to-uid mapping results in a 0 return regardless of descriptor-set state. """ descr_set = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() self.assertEqual(descr_set.count(), 0) self.assertEqual(hash_kvs.count(), 0) lsh = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) self.assertEqual(lsh.count(), 0) # Additions to the descriptor-set should not impact LSH index "size" lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 0)) self.assertEqual(lsh.descriptor_index.count(), 1) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 1)) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.hash2uuids_kvstore.add(0, {0}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 1) lsh.hash2uuids_kvstore.add(0, {0, 1}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 2) lsh.hash2uuids_kvstore.add(0, {0, 1, 2}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 3)
def test_get_invalid_key_with_default(self): """ Test default value return on missing key. """ s = MemoryKeyValueStore() self.assertEqual( s.get(0, 1), 1, ) assert s.get(0, ()) == ()
def test_clear(self): """ Test normal clear functionality. """ table_before_clear = dict(a=1, b=2, c=3) s = MemoryKeyValueStore() s._table = table_before_clear s.clear() self.assertEqual(s._table, {})
def test_count(self): """ Test that count returns appropriately based on table state. """ s = MemoryKeyValueStore() assert s.count() == 0 s._table = {0: 0, 1: 1, 'a': True, None: False} assert s.count() == 4
def test_get(self): """ Test normal get functionality. """ s = MemoryKeyValueStore() s._table['a'] = 'b' s._table[0] = 1 assert s.get('a') == 'b' assert s.get(0) == 1
def test_clear_readonly(self): table_before_clear = dict(a=1, b=2, c=3) s = MemoryKeyValueStore() s._table = table_before_clear s.is_read_only = mock.MagicMock(return_value=True) nose.tools.assert_raises(ReadOnlyError, s.clear) nose.tools.assert_equal(s._table, table_before_clear)
def test_remove_missing_key(self): """ Test that we cannot remove a key not in the store. """ s = MemoryKeyValueStore() s._table = {0: 1, 'a': 'b'} self.assertRaises(KeyError, s.remove, 'some-key') # table should remain unchanged. self.assertDictEqual(s._table, {0: 1, 'a': 'b'})
def test_clear_readonly(self): """ Test trying to clear on a read-only store. """ table_before_clear = dict(a=1, b=2, c=3) s = MemoryKeyValueStore() s._table = table_before_clear s.is_read_only = mock.MagicMock(return_value=True) self.assertRaises(ReadOnlyError, s.clear) self.assertEqual(s._table, table_before_clear)
def test_get_config_no_cache_elem(self): """ Test that configuration returned reflects no cache element being set. """ s = MemoryKeyValueStore() s._cache_element = None # We expect an default DataElement config (no impl type defined) c = s.get_config() self.assertIn('cache_element', c) self.assertIsNone(c['cache_element']['type'])
def test_keys_with_table(self): s = MemoryKeyValueStore() s._table = { 'a': 'b', 'c': 1, 'asdfghsdfg': None, 'r3adf3a#+': [4, 5, 6, '7'], } nose.tools.assert_set_equal(set(s.keys()), {'a', 'c', 'asdfghsdfg', 'r3adf3a#+'})
def test_remove(self): """ Test normal removal. """ s = MemoryKeyValueStore() s._table = { 0: 1, 'a': 'b', } s.remove(0) self.assertDictEqual(s._table, {'a': 'b'})
def test_remove_from_index_shared_hashes_partial(self): """ Test that only some hashes are removed from the hash index, but not others when those hashes still refer to other descriptors. """ # Simulate initial state with some descriptor hashed to one value and # other descriptors hashed to another. # Vectors of length 1 for easy dummy hashing prediction. descriptors = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] # Dummy hash function to do the simulated thing hash_func = DummyHashFunctor() hash_func.get_hash = mock.Mock( # Vectors of even sum hash to 0, odd to 1. side_effect=lambda vec: [vec.sum() % 2] ) d_set = MemoryDescriptorIndex() d_set._table = { 0: descriptors[0], 1: descriptors[1], 2: descriptors[2], 3: descriptors[3], 4: descriptors[4], } hash2uid_kvs = MemoryKeyValueStore() hash2uid_kvs._table = { 0: {0, 2, 4}, 1: {1, 3}, } idx = LSHNearestNeighborIndex(hash_func, d_set, hash2uid_kvs) idx.hash_index = mock.Mock(spec=HashIndex) idx.remove_from_index([1, 2, 3]) # Check that only one hash vector was passed to hash_index's removal # method (deque of hash-code vectors). idx.hash_index.remove_from_index.assert_called_once_with( collections.deque([ [1], ]) ) self.assertDictEqual(d_set._table, { 0: descriptors[0], 4: descriptors[4], }) self.assertDictEqual(hash2uid_kvs._table, {0: {0, 4}})
def test_has_key(self): """ Test that has-key returns true for entered keys. """ s = MemoryKeyValueStore() s._table = { 'a': 0, 'b': 1, 0: 2, } self.assertTrue(s.has('a')) self.assertTrue(s.has('b')) self.assertTrue(s.has(0)) self.assertFalse(s.has('c'))
def test_remove_many(self): """ Test expected remove_many functionality. """ s = MemoryKeyValueStore() s._table = { 0: 0, 1: 1, 2: 2, } s.remove_many([0, 1]) self.assertDictEqual(s._table, {2: 2})
def test_clear_readonly(self): """ Test trying to clear on a read-only store. """ table_before_clear = dict(a=1, b=2, c=3) s = MemoryKeyValueStore() s._table = table_before_clear s.is_read_only = mock.MagicMock(return_value=True) self.assertRaises( ReadOnlyError, s.clear ) self.assertEqual(s._table, table_before_clear)
def test_keys_with_table(self): """ Test that keys returned reflect the table state. """ s = MemoryKeyValueStore() s._table = { 'a': 'b', 'c': 1, 'asdfghsdfg': None, 'r3adf3a#+': [4, 5, 6, '7'], } self.assertSetEqual(set(s.keys()), {'a', 'c', 'asdfghsdfg', 'r3adf3a#+'})
def test_remove_many_with_cache(self): starting_table = { 0: 0, 1: 1, 2: 2, } c = DataMemoryElement(pickle.dumps(starting_table)) s = MemoryKeyValueStore(c) self.assertDictEqual(s._table, starting_table) s.remove_many([0, 2]) self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
def test_count(self): """ Test that count returns appropriately based on table state. """ s = MemoryKeyValueStore() assert s.count() == 0 s._table = { 0: 0, 1: 1, 'a': True, None: False } assert s.count() == 4
def test_add_many(self): d = { 'a': 'b', 'foo': None, 0: 89, } s = MemoryKeyValueStore() self.assertIsNone(s._cache_element) self.assertEqual(s._table, {}) s.add_many(d) self.assertIsNone(s._cache_element) self.assertEqual(s._table, d)
def test_add_data(self): mem_kv = MemoryKeyValueStore() kvds = KVSDataSet(mem_kv) de1 = DataMemoryElement(six.b('bytes1')) de2 = DataMemoryElement(six.b('bytes2')) kvds.add_data(de1, de2) # Check that appropriate keys and values are retrievable and located in # used KV-store. self.assertIn(de1.uuid(), mem_kv) self.assertIn(de2.uuid(), mem_kv) self.assertEqual(mem_kv.get(de1.uuid()), de1) self.assertEqual(mem_kv.get(de2.uuid()), de2)
def test_get_config_mem_cache_elem(self): s = MemoryKeyValueStore() s._cache_element = DataMemoryElement('someBytes', 'text/plain', False) expected_config = { 'cache_element': { "DataMemoryElement": { 'bytes': 'someBytes', 'content_type': 'text/plain', 'readonly': False, }, 'type': 'DataMemoryElement' } } nose.tools.assert_equal(s.get_config(), expected_config)
def test_add_with_caching(self): s = MemoryKeyValueStore() s._cache_element = DataMemoryElement() expected_cache_dict = {'a': 'b', 'foo': None, 0: 89} s.add('a', 'b') s.add('foo', None) s.add(0, 89) nose.tools.assert_equal(pickle.loads(s._cache_element.get_bytes()), expected_cache_dict)
def test_remove_missing_key(self): """ Test that we cannot remove a key not in the store. """ s = MemoryKeyValueStore() s._table = { 0: 1, 'a': 'b' } self.assertRaises( KeyError, s.remove, 'some-key' ) # table should remain unchanged. self.assertDictEqual(s._table, {0: 1, 'a': 'b'})
def test_keys_with_table(self): """ Test that keys returned reflect the table state. """ s = MemoryKeyValueStore() s._table = { 'a': 'b', 'c': 1, 'asdfghsdfg': None, 'r3adf3a#+': [4, 5, 6, '7'], } self.assertSetEqual( set(s.keys()), {'a', 'c', 'asdfghsdfg', 'r3adf3a#+'} )
def test_add_many_with_caching(self): d = { 'a': 'b', 'foo': None, 0: 89, } c = DataMemoryElement() s = MemoryKeyValueStore(c) self.assertEqual(s._table, {}) self.assertEqual(c.get_bytes(), six.b("")) s.add_many(d) self.assertEqual(s._table, d) self.assertEqual(pickle.loads(c.get_bytes()), d)
def _nearestNeighborIndex(sid, descriptor_set): """ Retrieve the Nearest neighbor index for a given session. :param sid: ID of the session :param descriptor_set: The descriptor set corresponding to the session id, see _descriptorSetFromSessionId. :returns: Nearest neighbor index or None if no session exists :rtype: LSHNearestNeighborIndex|None """ session = ModelImporter.model('item').findOne({'_id': ObjectId(sid)}) if not session: return None else: smqtkFolder = {'_id': ObjectId(session['meta']['smqtk_folder_id'])} functor = ItqFunctor( smqtkDataElementFromGirderFileId( localSmqtkFileIdFromName(smqtkFolder, 'mean_vec.npy')), smqtkDataElementFromGirderFileId( localSmqtkFileIdFromName(smqtkFolder, 'rotation.npy'))) hash2uuidsKV = MemoryKeyValueStore( smqtkDataElementFromGirderFileId( localSmqtkFileIdFromName(smqtkFolder, 'hash2uuids.pickle'))) return LSHNearestNeighborIndex(functor, descriptor_set, hash2uuidsKV, read_only=True)
def test_get_config_mem_cache_elem(self): """ Test that configuration returned reflects the cache element that is set. """ s = MemoryKeyValueStore() s._cache_element = DataMemoryElement(six.b('someBytes'), 'text/plain', False) expected_config = {'cache_element': { "DataMemoryElement": { 'bytes': six.b('someBytes'), 'content_type': 'text/plain', 'readonly': False, }, 'type': 'DataMemoryElement' }} self.assertEqual(s.get_config(), expected_config)
def test_update_index_with_hash_index(self): # Similar test to `test_update_index_add_new_descriptors` but with a # linear hash index. descr_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() linear_hi = LinearHashIndex() # simplest hash index, heap-sorts. index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs, linear_hi) descriptors1 = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] descriptors2 = [ DescriptorMemoryElement('t', 5), DescriptorMemoryElement('t', 6), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors1 + descriptors2: d.set_vector(np.ones(1, float) * d.uuid()) # Build initial index. index.build_index(descriptors1) # Initial hash index should only encode hashes for first batch of # descriptors. self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4}) # Update index and check that components have new data. index.update_index(descriptors2) # Now the hash index should include all descriptor hashes. self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4, 5, 6})
def test_remove_from_index(self): # Test that removing by UIDs does the correct thing. # Descriptors are 1 dim, value == index. descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors: d.set_vector(np.ones(1, float) * d.uuid()) d_set = MemoryDescriptorSet() hash_kvs = MemoryKeyValueStore() idx = LSHNearestNeighborIndex(DummyHashFunctor(), d_set, hash_kvs) idx.build_index(descriptors) # Attempt removing 1 uid. idx.remove_from_index([3]) self.assertEqual( idx.descriptor_set._table, { 0: descriptors[0], 1: descriptors[1], 2: descriptors[2], 4: descriptors[4], }) self.assertEqual(idx.hash2uuids_kvstore._table, { 0: {0}, 1: {1}, 2: {2}, 4: {4}, })
def test_repr_simple_cache(self): c = DataMemoryElement() s = MemoryKeyValueStore(c) expected_repr = "<MemoryKeyValueStore cache_element: " \ "DataMemoryElement{len(bytes): 0, content_type: " \ "None, readonly: False}>" nose.tools.assert_equal(repr(s), expected_repr)
def test_add_many(self): """ Test that we can add many key-values via a dictionary input. """ d = { 'a': 'b', 'foo': None, 0: 89, } s = MemoryKeyValueStore() self.assertIsNone(s._cache_element) self.assertEqual(s._table, {}) s.add_many(d) self.assertIsNone(s._cache_element) self.assertEqual(s._table, d)
def test_remove_with_cache(self): """ Test that removal correctly updates the cache element. """ existing_data = { 0: 1, 'a': 'b', } c = DataMemoryElement(pickle.dumps(existing_data)) s = MemoryKeyValueStore(c) self.assertDictEqual(s._table, existing_data) s.remove('a') self.assertDictEqual(s._table, {0: 1}) self.assertDictEqual(pickle.loads(c.get_bytes()), {0: 1})
def test_get_default(self): # Check default config default_config = MemoryKeyValueStore.get_default_config() self.assertIsInstance(default_config, dict) # - Should just contain cache element property, which is a nested # plugin config with no default type. self.assertIn('cache_element', default_config) self.assertIn('type', default_config['cache_element']) self.assertIsNone(default_config['cache_element']['type'])
def test_from_config_none_type(self): # When config map given, but plugin type set to null/None config = {'cache_element': { 'some_type': {'param': None}, 'type': None, }} s = MemoryKeyValueStore.from_config(config) self.assertIsNone(s._cache_element) self.assertEqual(s._table, {})
def test_update_index_similar_descriptors(self): """ Test that updating a built index with similar descriptors (same vectors, different UUIDs) results in contained structures having an expected state. """ descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_index.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_add_new_descriptors(self): # Test that calling update index after a build index causes index # components to be properly updated. descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors1 = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] descriptors2 = [ DescriptorMemoryElement('t', 5), DescriptorMemoryElement('t', 6), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors1 + descriptors2: d.set_vector(np.ones(1, float) * d.uuid()) # Build initial index. index.build_index(descriptors1) self.assertEqual(descr_index.count(), 5) for d in descriptors1: self.assertIn(d, descr_index) for d in descriptors2: self.assertNotIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i}) # Update index and check that components have new data. index.update_index(descriptors2) self.assertEqual(descr_index.count(), 7) for d in descriptors1 + descriptors2: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 7) for i in range(7): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_duplicate_descriptors(self): """ Test that updating a built index with the same descriptors results in idempotent behavior. """ descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) # Identical Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_index.count() == 5 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0} assert hash_kvs.get(1) == {1} assert hash_kvs.get(2) == {2} assert hash_kvs.get(3) == {3} assert hash_kvs.get(4) == {4}
def test_add_many_with_caching(self): """ Test that adding many reflects in cache. """ d = { 'a': 'b', 'foo': None, 0: 89, } c = DataMemoryElement() s = MemoryKeyValueStore(c) self.assertEqual(s._table, {}) self.assertEqual(c.get_bytes(), six.b("")) s.add_many(d) self.assertEqual(s._table, d) self.assertEqual( pickle.loads(c.get_bytes()), d )
def test_remove_many_missing_key(self): """ Test that we cannot remove keys not present in table and that table is not modified on error. """ expected_table = { 0: 0, 1: 1, 2: 2, } s = MemoryKeyValueStore() s._table = { 0: 0, 1: 1, 2: 2, } self.assertRaisesRegexp( KeyError, 'a', s.remove_many, ['a'] ) self.assertDictEqual(s._table, expected_table) # Even if one of the keys is value, the table should not be modified if # one of the keys is invalid. self.assertRaisesRegexp( KeyError, '6', s.remove_many, [1, 6] ) self.assertDictEqual(s._table, expected_table) PY2_SET_KEY_ERROR_RE = "set\(\[(?:7|8), (?:7|8)\]\)" PY3_SET_KEY_ERROR_RE = "{(?:7|8), (?:7|8)}" self.assertRaisesRegexp( KeyError, # Should show a "set" that contains 7 and 8, regardless of order. '(?:{}|{})'.format(PY2_SET_KEY_ERROR_RE, PY3_SET_KEY_ERROR_RE), s.remove_many, [7, 8] )
def test_from_config_with_cache_element(self): # Pickled dictionary with a known entry expected_table = {'some_key': 'some_value'} empty_dict_pickle = six.b("(dp1\nS'some_key'\np2\nS'some_value'\np3\ns.") # Test construction with memory data element. config = {'cache_element': { 'DataMemoryElement': { 'bytes': empty_dict_pickle, }, 'type': 'DataMemoryElement' }} s = MemoryKeyValueStore.from_config(config) self.assertIsInstance(s._cache_element, DataMemoryElement) self.assertEqual(s._table, expected_table)
def test_add_with_caching(self): """ Test that we can add key-value pairs and they reflect in the cache element. """ c = DataMemoryElement() s = MemoryKeyValueStore(c) expected_cache_dict = {'a': 'b', 'foo': None, 0: 89} s.add('a', 'b') s.add('foo', None) s.add(0, 89) self.assertEqual( pickle.loads(c.get_bytes()), expected_cache_dict )
def test_add(self): """ Test that we can add key-value pairs. """ s = MemoryKeyValueStore() s.add('a', 'b') self.assertEqual(s._table, {'a': 'b'}) s.add('foo', None) self.assertEqual(s._table, { 'a': 'b', 'foo': None, }) s.add(0, 89) self.assertEqual(s._table, { 'a': 'b', 'foo': None, 0: 89, })