예제 #1
0
 def test_get_invalid_key_with_default(self):
     s = MemoryKeyValueStore()
     nose.tools.assert_equal(
         s.get(0, 1),
         1,
     )
     assert s.get(0, ()) == ()
예제 #2
0
 def test_get_config_no_cache_elem(self):
     s = MemoryKeyValueStore()
     s._cache_element = None
     # We expect an default DataElement config (no impl type defined)
     c = s.get_config()
     self.assertIn('cache_element', c)
     self.assertIsNone(c['cache_element']['type'])
예제 #3
0
    def test_update_index_no_existing_index(self):
        # Test that calling update_index with no existing index acts like
        # building the index fresh.  This test is basically the same as
        # test_build_index_fresh_build but using update_index instead.
        descr_index = MemoryDescriptorIndex()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(),
                                        descr_index, hash_kvs)

        descriptors = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for d in descriptors:
            d.set_vector(np.ones(1, float) * d.uuid())
        index.update_index(descriptors)

        # Make sure descriptors are now in attached index and in key-value-store
        self.assertEqual(descr_index.count(), 5)
        for d in descriptors:
            self.assertIn(d, descr_index)
        # Dummy hash function bins sum of descriptor vectors.
        self.assertEqual(hash_kvs.count(), 5)
        for i in range(5):
            self.assertSetEqual(hash_kvs.get(i), {i})
예제 #4
0
    def test_clear(self):
        table_before_clear = dict(a=1, b=2, c=3)

        s = MemoryKeyValueStore()
        s._table = table_before_clear
        s.clear()
        nose.tools.assert_equal(s._table, {})
    def test_remove_many_missing_key(self):
        """
        Test that we cannot remove keys not present in table and that table
        is not modified on error.
        """
        expected_table = {
            0: 0,
            1: 1,
            2: 2,
        }

        s = MemoryKeyValueStore()
        s._table = {
            0: 0,
            1: 1,
            2: 2,
        }

        self.assertRaisesRegex(KeyError, 'a', s.remove_many, ['a'])
        self.assertDictEqual(s._table, expected_table)

        # Even if one of the keys is value, the table should not be modified if
        # one of the keys is invalid.
        self.assertRaisesRegex(KeyError, '6', s.remove_many, [1, 6])
        self.assertDictEqual(s._table, expected_table)

        PY2_SET_KEY_ERROR_RE = r"set\(\[(?:7|8), (?:7|8)\]\)"
        PY3_SET_KEY_ERROR_RE = "{(?:7|8), (?:7|8)}"
        self.assertRaisesRegex(
            KeyError,
            # Should show a "set" that contains 7 and 8, regardless of order.
            '(?:{}|{})'.format(PY2_SET_KEY_ERROR_RE, PY3_SET_KEY_ERROR_RE),
            s.remove_many,
            [7, 8])
예제 #6
0
    def test_count_empty_hash2uid(self):
        """
        Test that an empty hash-to-uid mapping results in a 0 return regardless
        of descriptor-set state.
        """
        descr_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        self.assertEqual(descr_set.count(), 0)
        self.assertEqual(hash_kvs.count(), 0)

        lsh = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs)
        self.assertEqual(lsh.count(), 0)

        # Additions to the descriptor-set should not impact LSH index "size"
        lsh.descriptor_set.add_descriptor(DescriptorMemoryElement('t', 0))
        self.assertEqual(lsh.descriptor_set.count(), 1)
        self.assertEqual(lsh.hash2uuids_kvstore.count(), 0)
        self.assertEqual(lsh.count(), 0)

        lsh.descriptor_set.add_descriptor(DescriptorMemoryElement('t', 1))
        self.assertEqual(lsh.descriptor_set.count(), 2)
        self.assertEqual(lsh.hash2uuids_kvstore.count(), 0)
        self.assertEqual(lsh.count(), 0)

        lsh.hash2uuids_kvstore.add(0, {0})
        self.assertEqual(lsh.descriptor_set.count(), 2)
        self.assertEqual(lsh.count(), 1)

        lsh.hash2uuids_kvstore.add(0, {0, 1})
        self.assertEqual(lsh.descriptor_set.count(), 2)
        self.assertEqual(lsh.count(), 2)

        lsh.hash2uuids_kvstore.add(0, {0, 1, 2})
        self.assertEqual(lsh.descriptor_set.count(), 2)
        self.assertEqual(lsh.count(), 3)
예제 #7
0
    def test_build_index_fresh_build(self):
        descr_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set,
                                        hash_kvs)

        descriptors = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for i, d in enumerate(descriptors):
            d.set_vector(np.ones(1, float) * i)
        index.build_index(descriptors)

        # Make sure descriptors are now in attached index and in
        # key-value-store.
        self.assertEqual(descr_set.count(), 5)
        for d in descriptors:
            self.assertIn(d, descr_set)
        # Dummy hash function bins sum of descriptor vectors.
        self.assertEqual(hash_kvs.count(), 5)
        for i in range(5):
            self.assertSetEqual(hash_kvs.get(i), {i})
예제 #8
0
    def test_update_index_no_existing_index(self):
        # Test that calling update_index with no existing index acts like
        # building the index fresh.  This test is basically the same as
        # test_build_index_fresh_build but using update_index instead.
        descr_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set,
                                        hash_kvs)

        descriptors = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for d in descriptors:
            d.set_vector(np.ones(1, float) * d.uuid())
        index.update_index(descriptors)

        # Make sure descriptors are now in attached index and in key-value-store
        self.assertEqual(descr_set.count(), 5)
        for d in descriptors:
            self.assertIn(d, descr_set)
        # Dummy hash function bins sum of descriptor vectors.
        self.assertEqual(hash_kvs.count(), 5)
        for i in range(5):
            self.assertSetEqual(hash_kvs.get(i), {i})
예제 #9
0
    def test_configuration(self):
        ex_descr_set = MemoryDescriptorSet()
        ex_i2u_kvs = MemoryKeyValueStore()
        ex_u2i_kvs = MemoryKeyValueStore()
        ex_index_elem = DataMemoryElement()
        ex_index_param_elem = DataMemoryElement()

        i = FaissNearestNeighborsIndex(
            descriptor_set=ex_descr_set,
            idx2uid_kvs=ex_i2u_kvs,
            uid2idx_kvs=ex_u2i_kvs,
            index_element=ex_index_elem,
            index_param_element=ex_index_param_elem,
            read_only=True,
            factory_string=u'some fact str',
            ivf_nprobe=88,
            use_gpu=False,
            gpu_id=99,
            random_seed=8,
        )
        for inst in configuration_test_helper(i):
            assert isinstance(inst._descriptor_set, MemoryDescriptorSet)
            assert isinstance(inst._idx2uid_kvs, MemoryKeyValueStore)
            assert isinstance(inst._uid2idx_kvs, MemoryKeyValueStore)
            assert isinstance(inst._index_element, DataMemoryElement)
            assert isinstance(inst._index_param_element, DataMemoryElement)
            assert inst.read_only is True
            assert isinstance(inst.factory_string, six.string_types)
            assert inst.factory_string == 'some fact str'
            assert inst._ivf_nprobe == 88
            assert inst._use_gpu is False
            assert inst._gpu_id == 99
            assert inst.random_seed == 8
예제 #10
0
    def test_build_index_fresh_build(self):
        descr_index = MemoryDescriptorIndex()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(),
                                        descr_index, hash_kvs)

        descriptors = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for i, d in enumerate(descriptors):
            d.set_vector(np.ones(1, float) * i)
        index.build_index(descriptors)

        # Make sure descriptors are now in attached index and in key-value-store
        self.assertEqual(descr_index.count(), 5)
        for d in descriptors:
            self.assertIn(d, descr_index)
        # Dummy hash function bins sum of descriptor vectors.
        self.assertEqual(hash_kvs.count(), 5)
        for i in range(5):
            self.assertSetEqual(hash_kvs.get(i), {i})
예제 #11
0
    def test_count_empty_hash2uid(self):
        """
        Test that an empty hash-to-uid mapping results in a 0 return regardless
        of descriptor-set state.
        """
        descr_set = MemoryDescriptorIndex()
        hash_kvs = MemoryKeyValueStore()
        self.assertEqual(descr_set.count(), 0)
        self.assertEqual(hash_kvs.count(), 0)

        lsh = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs)
        self.assertEqual(lsh.count(), 0)

        # Additions to the descriptor-set should not impact LSH index "size"
        lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 0))
        self.assertEqual(lsh.descriptor_index.count(), 1)
        self.assertEqual(lsh.hash2uuids_kvstore.count(), 0)
        self.assertEqual(lsh.count(), 0)

        lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 1))
        self.assertEqual(lsh.descriptor_index.count(), 2)
        self.assertEqual(lsh.hash2uuids_kvstore.count(), 0)
        self.assertEqual(lsh.count(), 0)

        lsh.hash2uuids_kvstore.add(0, {0})
        self.assertEqual(lsh.descriptor_index.count(), 2)
        self.assertEqual(lsh.count(), 1)

        lsh.hash2uuids_kvstore.add(0, {0, 1})
        self.assertEqual(lsh.descriptor_index.count(), 2)
        self.assertEqual(lsh.count(), 2)

        lsh.hash2uuids_kvstore.add(0, {0, 1, 2})
        self.assertEqual(lsh.descriptor_index.count(), 2)
        self.assertEqual(lsh.count(), 3)
 def test_get_invalid_key_with_default(self):
     """ Test default value return on missing key. """
     s = MemoryKeyValueStore()
     self.assertEqual(
         s.get(0, 1),
         1,
     )
     assert s.get(0, ()) == ()
예제 #13
0
    def test_clear(self):
        """ Test normal clear functionality. """
        table_before_clear = dict(a=1, b=2, c=3)

        s = MemoryKeyValueStore()
        s._table = table_before_clear
        s.clear()
        self.assertEqual(s._table, {})
 def test_count(self):
     """
     Test that count returns appropriately based on table state.
     """
     s = MemoryKeyValueStore()
     assert s.count() == 0
     s._table = {0: 0, 1: 1, 'a': True, None: False}
     assert s.count() == 4
예제 #15
0
 def test_get_invalid_key_with_default(self):
     """ Test default value return on missing key. """
     s = MemoryKeyValueStore()
     self.assertEqual(
         s.get(0, 1),
         1,
     )
     assert s.get(0, ()) == ()
예제 #16
0
    def test_get(self):
        """ Test normal get functionality. """
        s = MemoryKeyValueStore()
        s._table['a'] = 'b'
        s._table[0] = 1

        assert s.get('a') == 'b'
        assert s.get(0) == 1
    def test_clear(self):
        """ Test normal clear functionality. """
        table_before_clear = dict(a=1, b=2, c=3)

        s = MemoryKeyValueStore()
        s._table = table_before_clear
        s.clear()
        self.assertEqual(s._table, {})
예제 #18
0
    def test_clear_readonly(self):
        table_before_clear = dict(a=1, b=2, c=3)

        s = MemoryKeyValueStore()
        s._table = table_before_clear
        s.is_read_only = mock.MagicMock(return_value=True)

        nose.tools.assert_raises(ReadOnlyError, s.clear)
        nose.tools.assert_equal(s._table, table_before_clear)
 def test_remove_missing_key(self):
     """
     Test that we cannot remove a key not in the store.
     """
     s = MemoryKeyValueStore()
     s._table = {0: 1, 'a': 'b'}
     self.assertRaises(KeyError, s.remove, 'some-key')
     # table should remain unchanged.
     self.assertDictEqual(s._table, {0: 1, 'a': 'b'})
    def test_clear_readonly(self):
        """ Test trying to clear on a read-only store. """
        table_before_clear = dict(a=1, b=2, c=3)

        s = MemoryKeyValueStore()
        s._table = table_before_clear
        s.is_read_only = mock.MagicMock(return_value=True)

        self.assertRaises(ReadOnlyError, s.clear)
        self.assertEqual(s._table, table_before_clear)
예제 #21
0
 def test_get_config_no_cache_elem(self):
     """
     Test that configuration returned reflects no cache element being set.
     """
     s = MemoryKeyValueStore()
     s._cache_element = None
     # We expect an default DataElement config (no impl type defined)
     c = s.get_config()
     self.assertIn('cache_element', c)
     self.assertIsNone(c['cache_element']['type'])
예제 #22
0
 def test_keys_with_table(self):
     s = MemoryKeyValueStore()
     s._table = {
         'a': 'b',
         'c': 1,
         'asdfghsdfg': None,
         'r3adf3a#+': [4, 5, 6, '7'],
     }
     nose.tools.assert_set_equal(set(s.keys()),
                                 {'a', 'c', 'asdfghsdfg', 'r3adf3a#+'})
예제 #23
0
    def test_remove(self):
        """ Test normal removal. """
        s = MemoryKeyValueStore()
        s._table = {
            0: 1,
            'a': 'b',
        }

        s.remove(0)
        self.assertDictEqual(s._table, {'a': 'b'})
 def test_get_config_no_cache_elem(self):
     """
     Test that configuration returned reflects no cache element being set.
     """
     s = MemoryKeyValueStore()
     s._cache_element = None
     # We expect an default DataElement config (no impl type defined)
     c = s.get_config()
     self.assertIn('cache_element', c)
     self.assertIsNone(c['cache_element']['type'])
    def test_remove(self):
        """ Test normal removal. """
        s = MemoryKeyValueStore()
        s._table = {
            0: 1,
            'a': 'b',
        }

        s.remove(0)
        self.assertDictEqual(s._table, {'a': 'b'})
예제 #26
0
    def test_remove_from_index_shared_hashes_partial(self):
        """
        Test that only some hashes are removed from the hash index, but not
        others when those hashes still refer to other descriptors.
        """
        # Simulate initial state with some descriptor hashed to one value and
        # other descriptors hashed to another.

        # Vectors of length 1 for easy dummy hashing prediction.
        descriptors = [
            DescriptorMemoryElement('t', 0).set_vector([0]),
            DescriptorMemoryElement('t', 1).set_vector([1]),
            DescriptorMemoryElement('t', 2).set_vector([2]),
            DescriptorMemoryElement('t', 3).set_vector([3]),
            DescriptorMemoryElement('t', 4).set_vector([4]),
        ]

        # Dummy hash function to do the simulated thing
        hash_func = DummyHashFunctor()
        hash_func.get_hash = mock.Mock(
            # Vectors of even sum hash to 0, odd to 1.
            side_effect=lambda vec: [vec.sum() % 2]
        )

        d_set = MemoryDescriptorIndex()
        d_set._table = {
            0: descriptors[0],
            1: descriptors[1],
            2: descriptors[2],
            3: descriptors[3],
            4: descriptors[4],
        }

        hash2uid_kvs = MemoryKeyValueStore()
        hash2uid_kvs._table = {
            0: {0, 2, 4},
            1: {1, 3},
        }

        idx = LSHNearestNeighborIndex(hash_func, d_set, hash2uid_kvs)
        idx.hash_index = mock.Mock(spec=HashIndex)

        idx.remove_from_index([1, 2, 3])
        # Check that only one hash vector was passed to hash_index's removal
        # method (deque of hash-code vectors).
        idx.hash_index.remove_from_index.assert_called_once_with(
            collections.deque([
                [1],
            ])
        )
        self.assertDictEqual(d_set._table, {
            0: descriptors[0],
            4: descriptors[4],
        })
        self.assertDictEqual(hash2uid_kvs._table, {0: {0, 4}})
예제 #27
0
    def test_remove_from_index_shared_hashes_partial(self):
        """
        Test that only some hashes are removed from the hash index, but not
        others when those hashes still refer to other descriptors.
        """
        # Simulate initial state with some descriptor hashed to one value and
        # other descriptors hashed to another.

        # Vectors of length 1 for easy dummy hashing prediction.
        descriptors = [
            DescriptorMemoryElement('t', 0).set_vector([0]),
            DescriptorMemoryElement('t', 1).set_vector([1]),
            DescriptorMemoryElement('t', 2).set_vector([2]),
            DescriptorMemoryElement('t', 3).set_vector([3]),
            DescriptorMemoryElement('t', 4).set_vector([4]),
        ]

        # Dummy hash function to do the simulated thing
        hash_func = DummyHashFunctor()
        hash_func.get_hash = mock.Mock(
            # Vectors of even sum hash to 0, odd to 1.
            side_effect=lambda vec: [vec.sum() % 2]
        )

        d_set = MemoryDescriptorIndex()
        d_set._table = {
            0: descriptors[0],
            1: descriptors[1],
            2: descriptors[2],
            3: descriptors[3],
            4: descriptors[4],
        }

        hash2uid_kvs = MemoryKeyValueStore()
        hash2uid_kvs._table = {
            0: {0, 2, 4},
            1: {1, 3},
        }

        idx = LSHNearestNeighborIndex(hash_func, d_set, hash2uid_kvs)
        idx.hash_index = mock.Mock(spec=HashIndex)

        idx.remove_from_index([1, 2, 3])
        # Check that only one hash vector was passed to hash_index's removal
        # method (deque of hash-code vectors).
        idx.hash_index.remove_from_index.assert_called_once_with(
            collections.deque([
                [1],
            ])
        )
        self.assertDictEqual(d_set._table, {
            0: descriptors[0],
            4: descriptors[4],
        })
        self.assertDictEqual(hash2uid_kvs._table, {0: {0, 4}})
예제 #28
0
 def test_has_key(self):
     """ Test that has-key returns true for entered keys. """
     s = MemoryKeyValueStore()
     s._table = {
         'a': 0,
         'b': 1,
         0: 2,
     }
     self.assertTrue(s.has('a'))
     self.assertTrue(s.has('b'))
     self.assertTrue(s.has(0))
     self.assertFalse(s.has('c'))
예제 #29
0
    def test_remove_many(self):
        """
        Test expected remove_many functionality.
        """
        s = MemoryKeyValueStore()
        s._table = {
            0: 0,
            1: 1,
            2: 2,
        }

        s.remove_many([0, 1])
        self.assertDictEqual(s._table, {2: 2})
예제 #30
0
    def test_clear_readonly(self):
        """ Test trying to clear on a read-only store. """
        table_before_clear = dict(a=1, b=2, c=3)

        s = MemoryKeyValueStore()
        s._table = table_before_clear
        s.is_read_only = mock.MagicMock(return_value=True)

        self.assertRaises(
            ReadOnlyError,
            s.clear
        )
        self.assertEqual(s._table, table_before_clear)
 def test_keys_with_table(self):
     """
     Test that keys returned reflect the table state.
     """
     s = MemoryKeyValueStore()
     s._table = {
         'a': 'b',
         'c': 1,
         'asdfghsdfg': None,
         'r3adf3a#+': [4, 5, 6, '7'],
     }
     self.assertSetEqual(set(s.keys()),
                         {'a', 'c', 'asdfghsdfg', 'r3adf3a#+'})
예제 #32
0
    def test_remove_many_with_cache(self):
        starting_table = {
            0: 0,
            1: 1,
            2: 2,
        }
        c = DataMemoryElement(pickle.dumps(starting_table))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, starting_table)

        s.remove_many([0, 2])

        self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
예제 #33
0
 def test_count(self):
     """
     Test that count returns appropriately based on table state.
     """
     s = MemoryKeyValueStore()
     assert s.count() == 0
     s._table = {
         0: 0,
         1: 1,
         'a': True,
         None: False
     }
     assert s.count() == 4
    def test_remove_many_with_cache(self):
        starting_table = {
            0: 0,
            1: 1,
            2: 2,
        }
        c = DataMemoryElement(pickle.dumps(starting_table))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, starting_table)

        s.remove_many([0, 2])

        self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
    def test_remove_many(self):
        """
        Test expected remove_many functionality.
        """
        s = MemoryKeyValueStore()
        s._table = {
            0: 0,
            1: 1,
            2: 2,
        }

        s.remove_many([0, 1])
        self.assertDictEqual(s._table, {2: 2})
예제 #36
0
    def test_add_many(self):
        d = {
            'a': 'b',
            'foo': None,
            0: 89,
        }

        s = MemoryKeyValueStore()
        self.assertIsNone(s._cache_element)
        self.assertEqual(s._table, {})

        s.add_many(d)
        self.assertIsNone(s._cache_element)
        self.assertEqual(s._table, d)
예제 #37
0
    def test_add_data(self):
        mem_kv = MemoryKeyValueStore()
        kvds = KVSDataSet(mem_kv)

        de1 = DataMemoryElement(six.b('bytes1'))
        de2 = DataMemoryElement(six.b('bytes2'))
        kvds.add_data(de1, de2)

        # Check that appropriate keys and values are retrievable and located in
        # used KV-store.
        self.assertIn(de1.uuid(), mem_kv)
        self.assertIn(de2.uuid(), mem_kv)
        self.assertEqual(mem_kv.get(de1.uuid()), de1)
        self.assertEqual(mem_kv.get(de2.uuid()), de2)
예제 #38
0
    def test_add_data(self):
        mem_kv = MemoryKeyValueStore()
        kvds = KVSDataSet(mem_kv)

        de1 = DataMemoryElement(six.b('bytes1'))
        de2 = DataMemoryElement(six.b('bytes2'))
        kvds.add_data(de1, de2)

        # Check that appropriate keys and values are retrievable and located in
        # used KV-store.
        self.assertIn(de1.uuid(), mem_kv)
        self.assertIn(de2.uuid(), mem_kv)
        self.assertEqual(mem_kv.get(de1.uuid()), de1)
        self.assertEqual(mem_kv.get(de2.uuid()), de2)
예제 #39
0
 def test_get_config_mem_cache_elem(self):
     s = MemoryKeyValueStore()
     s._cache_element = DataMemoryElement('someBytes', 'text/plain', False)
     expected_config = {
         'cache_element': {
             "DataMemoryElement": {
                 'bytes': 'someBytes',
                 'content_type': 'text/plain',
                 'readonly': False,
             },
             'type': 'DataMemoryElement'
         }
     }
     nose.tools.assert_equal(s.get_config(), expected_config)
예제 #40
0
    def test_add_with_caching(self):
        s = MemoryKeyValueStore()
        s._cache_element = DataMemoryElement()

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        nose.tools.assert_equal(pickle.loads(s._cache_element.get_bytes()),
                                expected_cache_dict)
예제 #41
0
 def test_remove_missing_key(self):
     """
     Test that we cannot remove a key not in the store.
     """
     s = MemoryKeyValueStore()
     s._table = {
         0: 1,
         'a': 'b'
     }
     self.assertRaises(
         KeyError,
         s.remove, 'some-key'
     )
     # table should remain unchanged.
     self.assertDictEqual(s._table, {0: 1, 'a': 'b'})
예제 #42
0
 def test_keys_with_table(self):
     """
     Test that keys returned reflect the table state.
     """
     s = MemoryKeyValueStore()
     s._table = {
         'a': 'b',
         'c': 1,
         'asdfghsdfg': None,
         'r3adf3a#+': [4, 5, 6, '7'],
     }
     self.assertSetEqual(
         set(s.keys()),
         {'a', 'c', 'asdfghsdfg', 'r3adf3a#+'}
     )
예제 #43
0
    def test_add_many_with_caching(self):
        d = {
            'a': 'b',
            'foo': None,
            0: 89,
        }
        c = DataMemoryElement()

        s = MemoryKeyValueStore(c)
        self.assertEqual(s._table, {})
        self.assertEqual(c.get_bytes(), six.b(""))

        s.add_many(d)
        self.assertEqual(s._table, d)
        self.assertEqual(pickle.loads(c.get_bytes()), d)
예제 #44
0
    def _nearestNeighborIndex(sid, descriptor_set):
        """
        Retrieve the Nearest neighbor index for a given session.

        :param sid: ID of the session
        :param descriptor_set: The descriptor set corresponding to the session id,
        see _descriptorSetFromSessionId.
        :returns: Nearest neighbor index or None if no session exists
        :rtype: LSHNearestNeighborIndex|None
        """
        session = ModelImporter.model('item').findOne({'_id': ObjectId(sid)})

        if not session:
            return None
        else:
            smqtkFolder = {'_id': ObjectId(session['meta']['smqtk_folder_id'])}

            functor = ItqFunctor(
                smqtkDataElementFromGirderFileId(
                    localSmqtkFileIdFromName(smqtkFolder, 'mean_vec.npy')),
                smqtkDataElementFromGirderFileId(
                    localSmqtkFileIdFromName(smqtkFolder, 'rotation.npy')))
            hash2uuidsKV = MemoryKeyValueStore(
                smqtkDataElementFromGirderFileId(
                    localSmqtkFileIdFromName(smqtkFolder,
                                             'hash2uuids.pickle')))

            return LSHNearestNeighborIndex(functor,
                                           descriptor_set,
                                           hash2uuidsKV,
                                           read_only=True)
예제 #45
0
 def test_get_config_mem_cache_elem(self):
     """
     Test that configuration returned reflects the cache element that is
     set.
     """
     s = MemoryKeyValueStore()
     s._cache_element = DataMemoryElement(six.b('someBytes'), 'text/plain', False)
     expected_config = {'cache_element': {
         "DataMemoryElement": {
             'bytes': six.b('someBytes'),
             'content_type': 'text/plain',
             'readonly': False,
         },
         'type': 'DataMemoryElement'
     }}
     self.assertEqual(s.get_config(), expected_config)
예제 #46
0
    def test_update_index_with_hash_index(self):
        # Similar test to `test_update_index_add_new_descriptors` but with a
        # linear hash index.
        descr_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        linear_hi = LinearHashIndex()  # simplest hash index, heap-sorts.
        index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set,
                                        hash_kvs, linear_hi)

        descriptors1 = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        descriptors2 = [
            DescriptorMemoryElement('t', 5),
            DescriptorMemoryElement('t', 6),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for d in descriptors1 + descriptors2:
            d.set_vector(np.ones(1, float) * d.uuid())

        # Build initial index.
        index.build_index(descriptors1)
        # Initial hash index should only encode hashes for first batch of
        # descriptors.
        self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4})

        # Update index and check that components have new data.
        index.update_index(descriptors2)
        # Now the hash index should include all descriptor hashes.
        self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4, 5, 6})
예제 #47
0
    def test_remove_from_index(self):
        # Test that removing by UIDs does the correct thing.

        # Descriptors are 1 dim, value == index.
        descriptors = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for d in descriptors:
            d.set_vector(np.ones(1, float) * d.uuid())
        d_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        idx = LSHNearestNeighborIndex(DummyHashFunctor(), d_set, hash_kvs)
        idx.build_index(descriptors)

        # Attempt removing 1 uid.
        idx.remove_from_index([3])
        self.assertEqual(
            idx.descriptor_set._table, {
                0: descriptors[0],
                1: descriptors[1],
                2: descriptors[2],
                4: descriptors[4],
            })
        self.assertEqual(idx.hash2uuids_kvstore._table, {
            0: {0},
            1: {1},
            2: {2},
            4: {4},
        })
예제 #48
0
 def test_repr_simple_cache(self):
     c = DataMemoryElement()
     s = MemoryKeyValueStore(c)
     expected_repr = "<MemoryKeyValueStore cache_element: " \
                     "DataMemoryElement{len(bytes): 0, content_type: " \
                     "None, readonly: False}>"
     nose.tools.assert_equal(repr(s), expected_repr)
예제 #49
0
    def test_add_many(self):
        """
        Test that we can add many key-values via a dictionary input.
        """
        d = {
            'a': 'b',
            'foo': None,
            0: 89,
        }

        s = MemoryKeyValueStore()
        self.assertIsNone(s._cache_element)
        self.assertEqual(s._table, {})

        s.add_many(d)
        self.assertIsNone(s._cache_element)
        self.assertEqual(s._table, d)
예제 #50
0
    def test_remove_with_cache(self):
        """
        Test that removal correctly updates the cache element.
        """
        existing_data = {
            0: 1,
            'a': 'b',
        }

        c = DataMemoryElement(pickle.dumps(existing_data))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, existing_data)

        s.remove('a')
        self.assertDictEqual(s._table, {0: 1})
        self.assertDictEqual(pickle.loads(c.get_bytes()),
                             {0: 1})
예제 #51
0
 def test_get_default(self):
     # Check default config
     default_config = MemoryKeyValueStore.get_default_config()
     self.assertIsInstance(default_config, dict)
     # - Should just contain cache element property, which is a nested
     #   plugin config with no default type.
     self.assertIn('cache_element', default_config)
     self.assertIn('type', default_config['cache_element'])
     self.assertIsNone(default_config['cache_element']['type'])
예제 #52
0
 def test_from_config_none_type(self):
     # When config map given, but plugin type set to null/None
     config = {'cache_element': {
         'some_type': {'param': None},
         'type': None,
     }}
     s = MemoryKeyValueStore.from_config(config)
     self.assertIsNone(s._cache_element)
     self.assertEqual(s._table, {})
예제 #53
0
    def test_update_index_similar_descriptors(self):
        """
        Test that updating a built index with similar descriptors (same
        vectors, different UUIDs) results in contained structures having an
        expected state.
        """
        descr_index = MemoryDescriptorIndex()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(),
                                        descr_index, hash_kvs)

        # Similar Descriptors to build and update on (different instances)
        descriptors1 = [
            DescriptorMemoryElement('t', 0).set_vector([0]),
            DescriptorMemoryElement('t', 1).set_vector([1]),
            DescriptorMemoryElement('t', 2).set_vector([2]),
            DescriptorMemoryElement('t', 3).set_vector([3]),
            DescriptorMemoryElement('t', 4).set_vector([4]),
        ]
        descriptors2 = [
            DescriptorMemoryElement('t', 5).set_vector([0]),
            DescriptorMemoryElement('t', 6).set_vector([1]),
            DescriptorMemoryElement('t', 7).set_vector([2]),
            DescriptorMemoryElement('t', 8).set_vector([3]),
            DescriptorMemoryElement('t', 9).set_vector([4]),
        ]

        index.build_index(descriptors1)
        index.update_index(descriptors2)

        assert descr_index.count() == 10
        # Above descriptors should be considered "in" the descriptor set now.
        for d in descriptors1:
            assert d in descr_index
        for d in descriptors2:
            assert d in descr_index
        # Known hashes of the above descriptors should be in the KVS
        assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4}
        assert hash_kvs.get(0) == {0, 5}
        assert hash_kvs.get(1) == {1, 6}
        assert hash_kvs.get(2) == {2, 7}
        assert hash_kvs.get(3) == {3, 8}
        assert hash_kvs.get(4) == {4, 9}
예제 #54
0
    def test_update_index_add_new_descriptors(self):
        # Test that calling update index after a build index causes index
        # components to be properly updated.
        descr_index = MemoryDescriptorIndex()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(),
                                        descr_index, hash_kvs)
        descriptors1 = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        descriptors2 = [
            DescriptorMemoryElement('t', 5),
            DescriptorMemoryElement('t', 6),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for d in descriptors1 + descriptors2:
            d.set_vector(np.ones(1, float) * d.uuid())

        # Build initial index.
        index.build_index(descriptors1)
        self.assertEqual(descr_index.count(), 5)
        for d in descriptors1:
            self.assertIn(d, descr_index)
        for d in descriptors2:
            self.assertNotIn(d, descr_index)
        # Dummy hash function bins sum of descriptor vectors.
        self.assertEqual(hash_kvs.count(), 5)
        for i in range(5):
            self.assertSetEqual(hash_kvs.get(i), {i})

        # Update index and check that components have new data.
        index.update_index(descriptors2)
        self.assertEqual(descr_index.count(), 7)
        for d in descriptors1 + descriptors2:
            self.assertIn(d, descr_index)
        # Dummy hash function bins sum of descriptor vectors.
        self.assertEqual(hash_kvs.count(), 7)
        for i in range(7):
            self.assertSetEqual(hash_kvs.get(i), {i})
예제 #55
0
    def test_update_index_duplicate_descriptors(self):
        """
        Test that updating a built index with the same descriptors results in
        idempotent behavior.
        """
        descr_index = MemoryDescriptorIndex()
        hash_kvs = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(DummyHashFunctor(),
                                        descr_index, hash_kvs)

        # Identical Descriptors to build and update on (different instances)
        descriptors1 = [
            DescriptorMemoryElement('t', 0).set_vector([0]),
            DescriptorMemoryElement('t', 1).set_vector([1]),
            DescriptorMemoryElement('t', 2).set_vector([2]),
            DescriptorMemoryElement('t', 3).set_vector([3]),
            DescriptorMemoryElement('t', 4).set_vector([4]),
        ]
        descriptors2 = [
            DescriptorMemoryElement('t', 0).set_vector([0]),
            DescriptorMemoryElement('t', 1).set_vector([1]),
            DescriptorMemoryElement('t', 2).set_vector([2]),
            DescriptorMemoryElement('t', 3).set_vector([3]),
            DescriptorMemoryElement('t', 4).set_vector([4]),
        ]

        index.build_index(descriptors1)
        index.update_index(descriptors2)

        assert descr_index.count() == 5
        # Above descriptors should be considered "in" the descriptor set now.
        for d in descriptors1:
            assert d in descr_index
        for d in descriptors2:
            assert d in descr_index
        # Known hashes of the above descriptors should be in the KVS
        assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4}
        assert hash_kvs.get(0) == {0}
        assert hash_kvs.get(1) == {1}
        assert hash_kvs.get(2) == {2}
        assert hash_kvs.get(3) == {3}
        assert hash_kvs.get(4) == {4}
예제 #56
0
    def test_add_many_with_caching(self):
        """
        Test that adding many reflects in cache.
        """
        d = {
            'a': 'b',
            'foo': None,
            0: 89,
        }
        c = DataMemoryElement()

        s = MemoryKeyValueStore(c)
        self.assertEqual(s._table, {})
        self.assertEqual(c.get_bytes(), six.b(""))

        s.add_many(d)
        self.assertEqual(s._table, d)
        self.assertEqual(
            pickle.loads(c.get_bytes()),
            d
        )
예제 #57
0
    def test_remove_many_missing_key(self):
        """
        Test that we cannot remove keys not present in table and that table
        is not modified on error.
        """
        expected_table = {
            0: 0,
            1: 1,
            2: 2,
        }

        s = MemoryKeyValueStore()
        s._table = {
            0: 0,
            1: 1,
            2: 2,
        }

        self.assertRaisesRegexp(
            KeyError, 'a',
            s.remove_many, ['a']
        )
        self.assertDictEqual(s._table, expected_table)

        # Even if one of the keys is value, the table should not be modified if
        # one of the keys is invalid.
        self.assertRaisesRegexp(
            KeyError, '6',
            s.remove_many, [1, 6]
        )
        self.assertDictEqual(s._table, expected_table)

        PY2_SET_KEY_ERROR_RE = "set\(\[(?:7|8), (?:7|8)\]\)"
        PY3_SET_KEY_ERROR_RE = "{(?:7|8), (?:7|8)}"
        self.assertRaisesRegexp(
            KeyError,
            # Should show a "set" that contains 7 and 8, regardless of order.
            '(?:{}|{})'.format(PY2_SET_KEY_ERROR_RE, PY3_SET_KEY_ERROR_RE),
            s.remove_many, [7, 8]
        )
예제 #58
0
    def test_from_config_with_cache_element(self):
        # Pickled dictionary with a known entry
        expected_table = {'some_key': 'some_value'}
        empty_dict_pickle = six.b("(dp1\nS'some_key'\np2\nS'some_value'\np3\ns.")

        # Test construction with memory data element.
        config = {'cache_element': {
            'DataMemoryElement': {
                'bytes': empty_dict_pickle,
            },
            'type': 'DataMemoryElement'
        }}
        s = MemoryKeyValueStore.from_config(config)
        self.assertIsInstance(s._cache_element, DataMemoryElement)
        self.assertEqual(s._table, expected_table)
예제 #59
0
    def test_add_with_caching(self):
        """
        Test that we can add key-value pairs and they reflect in the cache
        element.
        """
        c = DataMemoryElement()
        s = MemoryKeyValueStore(c)

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        self.assertEqual(
            pickle.loads(c.get_bytes()),
            expected_cache_dict
        )
예제 #60
0
    def test_add(self):
        """ Test that we can add key-value pairs. """
        s = MemoryKeyValueStore()

        s.add('a', 'b')
        self.assertEqual(s._table, {'a': 'b'})

        s.add('foo', None)
        self.assertEqual(s._table, {
            'a': 'b',
            'foo': None,
        })

        s.add(0, 89)
        self.assertEqual(s._table, {
            'a': 'b',
            'foo': None,
            0: 89,
        })