Beispiel #1
0
    def test_fit_with_cache(self) -> None:
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement('test', i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(DataMemoryElement(),
                         DataMemoryElement(),
                         bit_length=1,
                         random_seed=0)
        assert itq.mean_vec_cache_elem is not None
        assert itq.rotation_cache_elem is not None
        itq.fit(fit_descriptors)

        # TODO: Explanation as to why this is the expected result.
        numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0])
        numpy.testing.assert_array_almost_equal(itq.rotation,
                                                [[1 / sqrt(2)], [1 / sqrt(2)]])
        self.assertIsNotNone(itq.mean_vec_cache_elem)
        # noinspection PyTypeChecker
        numpy.testing.assert_array_almost_equal(
            numpy.load(BytesIO(itq.mean_vec_cache_elem.get_bytes())), [0, 0])

        self.assertIsNotNone(itq.rotation_cache_elem)
        # noinspection PyTypeChecker
        numpy.testing.assert_array_almost_equal(
            numpy.load(BytesIO(itq.rotation_cache_elem.get_bytes())),
            [[1 / sqrt(2)], [1 / sqrt(2)]])
Beispiel #2
0
    def test_pickle_save_restore(self,
                                 m_cdg_setupNetwork: mock.MagicMock) -> None:
        # Mocking set_network so we don't have to worry about actually
        # initializing any caffe things for this test.
        expected_params: Dict[str, Any] = {
            'network_prototxt': DataMemoryElement(),
            'network_model': DataMemoryElement(),
            'image_mean': DataMemoryElement(),
            'return_layer': 'layer name',
            'batch_size': 777,
            'use_gpu': False,
            'gpu_device_id': 8,
            'network_is_bgr': False,
            'data_layer': 'data-other',
            'load_truncated_images': True,
            'pixel_rescale': (.2, .8),
            'input_scale': 1.5,
            'threads': 9,
        }
        g = CaffeDescriptorGenerator(**expected_params)
        # Initialization sets up the network on construction.
        self.assertEqual(m_cdg_setupNetwork.call_count, 1)

        g_pickled = pickle.dumps(g, -1)
        g2 = pickle.loads(g_pickled)
        # Network should be setup for second class class just like in
        # initial construction.
        self.assertEqual(m_cdg_setupNetwork.call_count, 2)

        self.assertIsInstance(g2, CaffeDescriptorGenerator)
        self.assertEqual(g.get_config(), g2.get_config())
 def test_set_bytes_when_readonly(self) -> None:
     bytes_a = b"test bytes first set"
     bytes_b = b"the second set of bytes"
     e = DataMemoryElement(bytes_a, readonly=True)
     self.assertEqual(e.get_bytes(), bytes_a)
     self.assertRaises(ReadOnlyError, e.set_bytes, bytes_b)
     self.assertEqual(e.get_bytes(), bytes_a)
Beispiel #4
0
    def test_save_model_with_writable_caches(self) -> None:
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        expected_mean_vec_bio = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_mean_vec_bio, expected_mean_vec)
        expected_mean_vec_bytes = expected_mean_vec_bio.getvalue()

        expected_rotation_bio = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_rotation_bio, expected_rotation)
        expected_rotation_bytes = expected_rotation_bio.getvalue()

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)

        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(),
                         expected_mean_vec_bytes)
        self.assertEqual(itq.rotation_cache_elem.get_bytes(),
                         expected_rotation_bytes)
Beispiel #5
0
    def test_configuration(self) -> None:
        ex_descr_set = MemoryDescriptorSet()
        ex_i2u_kvs = MemoryKeyValueStore()
        ex_u2i_kvs = MemoryKeyValueStore()
        ex_index_elem = DataMemoryElement()
        ex_index_param_elem = DataMemoryElement()

        i = FaissNearestNeighborsIndex(
            descriptor_set=ex_descr_set, idx2uid_kvs=ex_i2u_kvs,
            uid2idx_kvs=ex_u2i_kvs, index_element=ex_index_elem,
            index_param_element=ex_index_param_elem,
            read_only=True, factory_string=u'some fact str',
            ivf_nprobe=88, use_gpu=False, gpu_id=99, random_seed=8,
        )
        for inst in configuration_test_helper(i):
            assert isinstance(inst._descriptor_set, MemoryDescriptorSet)
            assert isinstance(inst._idx2uid_kvs, MemoryKeyValueStore)
            assert isinstance(inst._uid2idx_kvs, MemoryKeyValueStore)
            assert isinstance(inst._index_element, DataMemoryElement)
            assert isinstance(inst._index_param_element, DataMemoryElement)
            assert inst.read_only is True
            assert isinstance(inst.factory_string, str)
            assert inst.factory_string == 'some fact str'
            assert inst._ivf_nprobe == 88
            assert inst._use_gpu is False
            assert inst._gpu_id == 99
            assert inst.random_seed == 8
Beispiel #6
0
    def test_get_config(self, _m_cdg_setupNetwork: mock.MagicMock) -> None:
        # Mocking set_network so we don't have to worry about actually
        # initializing any caffe things for this test.
        expected_params: Dict[str, Any] = {
            'network_prototxt': DataMemoryElement(),
            'network_model': DataMemoryElement(),
            'image_mean': DataMemoryElement(),
            'return_layer': 'layer name',
            'batch_size': 777,
            'use_gpu': False,
            'gpu_device_id': 8,
            'network_is_bgr': False,
            'data_layer': 'data-other',
            'load_truncated_images': True,
            'pixel_rescale': (.2, .8),
            'input_scale': 1.5,
            'threads': 14,
        }
        # make sure that we're considering all constructor parameter
        # options
        default_params = CaffeDescriptorGenerator.get_default_config()
        assert set(default_params) == set(expected_params)
        g = CaffeDescriptorGenerator(**expected_params)

        # Shift to expecting sub-configs for DataElement params
        for key in ('network_prototxt', 'network_model', 'image_mean'):
            expected_params[key] = to_config_dict(
                cast(DataMemoryElement, expected_params[key]))
        assert g.get_config() == expected_params
Beispiel #7
0
 def test_build_index_with_cache(self) -> None:
     cache_element = DataMemoryElement()
     i = LinearHashIndex(cache_element)
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertFalse(cache_element.is_empty())
Beispiel #8
0
    def test_add_data(self) -> None:
        de = DataMemoryElement(b"some bytes", 'text/plain', True)
        expected_map = {de.uuid(): de}

        dms = DataMemorySet()
        dms.add_data(de)
        self.assertEqual(dms._element_map, expected_map)
    def test_added_descriptor_table_caching(self) -> None:
        cache_elem = DataMemoryElement(readonly=False)
        descrs = [random_descriptor() for _ in range(3)]
        expected_table = dict((r.uuid(), r) for r in descrs)

        i = MemoryDescriptorSet(cache_elem)
        assert i.cache_element is not None
        self.assertTrue(cache_elem.is_empty())

        # Should add descriptors to table, caching to writable element.
        i.add_many_descriptors(descrs)
        self.assertFalse(cache_elem.is_empty())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        # Changing the internal table (remove, add) it should reflect in
        # cache
        new_d = random_descriptor()
        expected_table[new_d.uuid()] = new_d
        i.add_descriptor(new_d)
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        rm_d = list(expected_table.values())[0]
        del expected_table[rm_d.uuid()]
        i.remove_descriptor(rm_d.uuid())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)
Beispiel #10
0
    def test_get_config(self) -> None:
        self.assertEqual(MemoryDescriptorSet().get_config(),
                         MemoryDescriptorSet.get_default_config())

        self.assertEqual(
            MemoryDescriptorSet(None).get_config(),
            MemoryDescriptorSet.get_default_config())

        empty_elem = DataMemoryElement()
        dme_key = 'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement'
        self.assertEqual(
            MemoryDescriptorSet(empty_elem).get_config(),
            merge_dict(MemoryDescriptorSet.get_default_config(),
                       {'cache_element': {
                           'type': dme_key
                       }}))

        dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1)
        dict_pickle_bytes_str = dict_pickle_bytes.decode(BYTES_CONFIG_ENCODING)
        cache_elem = DataMemoryElement(bytes=dict_pickle_bytes)
        self.assertEqual(
            MemoryDescriptorSet(cache_elem).get_config(),
            merge_dict(
                MemoryDescriptorSet.get_default_config(), {
                    'cache_element': {
                        dme_key: {
                            'bytes': dict_pickle_bytes_str
                        },
                        'type': dme_key
                    }
                }))
Beispiel #11
0
 def test_load_as_matrix_empty_data(self):
     """
     Test that we catch and do not load an empty data element.
     """
     empty_de = DataMemoryElement(readonly=True, content_type='image/png')
     assert empty_de.is_empty()
     msg = "GdalImageReader cannot load 0-sized data"
     with pytest.raises(ValueError, match=msg):
         GdalImageReader().load_as_matrix(empty_de)
Beispiel #12
0
    def test_load_as_matrix_invalid_bytes(self):
        """
        Test that data element with invalid data bytes fails to load.
        """
        d = DataMemoryElement(content_type='image/png')
        d.set_bytes(b"not valid bytes")

        inst = PilImageReader()
        with pytest.raises(IOError,
                           match="Failed to identify image from bytes "
                           "provided by DataMemoryElement"):
            inst.load_as_matrix(d)
Beispiel #13
0
    def test_save_cache_build_index(self) -> None:
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        self.assertFalse(cache_element.is_empty())
        # Check byte content
        expected_cache = {1, 2, 3, 4}
        actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes())))
        self.assertSetEqual(expected_cache, actual_cache)
Beispiel #14
0
    def test_remove_many_with_cache(self) -> None:
        starting_table = {
            0: 0,
            1: 1,
            2: 2,
        }
        c = DataMemoryElement(pickle.dumps(starting_table))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, starting_table)

        s.remove_many([0, 2])

        self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
Beispiel #15
0
    def test_cacheing_with_map(self) -> None:
        expected_cache = DataMemoryElement()
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet(expected_cache)
        dms._element_map = expected_map
        dms.cache()

        self.assertFalse(expected_cache.is_empty())
        self.assertEqual(pickle.loads(expected_cache.get_bytes()), expected_map)
Beispiel #16
0
    def test_persistence_with_update_index(self) -> None:
        n1 = 100
        n2 = 10
        dim = 8
        set1 = {DescriptorMemoryElement(i) for i in range(n1)}
        set2 = {DescriptorMemoryElement(i)
                for i in range(n1, n1+n2)}
        [d.set_vector(np.random.rand(dim)) for d in (set1 | set2)]

        # Create index with persistent entities
        index_element = DataMemoryElement(
            content_type='application/octet-stream')
        index_param_element = DataMemoryElement(
            content_type='text/plain')
        index = self._make_inst(
            index_element=index_element,
            index_param_element=index_param_element)
        descriptor_set = index._descriptor_set
        idx2uid_kvs = index._idx2uid_kvs
        uid2idx_kvs = index._uid2idx_kvs

        # Build initial index.
        index.build_index(set1)
        self.assertEqual(index.count(), len(set1))
        for d in set1:
            self.assertIn(d, index._descriptor_set)

        # Update and check that all intended descriptors are present in
        # index.
        index.update_index(set2)
        set_all = set1 | set2
        self.assertEqual(index.count(), len(set_all))
        for d in set_all:
            self.assertIn(d, index._descriptor_set)

        del index
        index = self._make_inst(
            descriptor_set=descriptor_set,
            idx2uid_kvs=idx2uid_kvs,
            uid2idx_kvs=uid2idx_kvs,
            index_element=index_element,
            index_param_element=index_param_element)

        # Check that NN can return something from the updated set.
        # - nearest element to the query element when the query is in the
        #   index should be the query element.
        for q in set_all:
            n_elems, n_dists = index.nn(q)
            self.assertEqual(n_elems[0], q)
Beispiel #17
0
 def test_cacheing_no_map(self) -> None:
     dms = DataMemorySet(DataMemoryElement())
     assert dms.cache_element is not None
     dms.cache()
     # technically caches something, but that something is an empty map.
     self.assertFalse(dms.cache_element.is_empty())
     self.assertEqual(pickle.loads(dms.cache_element.get_bytes()), {})
Beispiel #18
0
 def test_remove_many_readonly(self) -> None:
     """ Test that we cannot remove many from a read-only instance. """
     s = MemoryKeyValueStore(DataMemoryElement(readonly=True))
     self.assertRaises(
         ReadOnlyError,
         s.remove_many, [0]
     )
Beispiel #19
0
    def test_remove_from_index_last_element_with_cache(self) -> None:
        """
        Test removing final element also clears the cache element.
        """
        c = DataMemoryElement()
        bt = SkLearnBallTreeHashIndex(cache_element=c, random_seed=0)
        index = np.ndarray((1, 256), bool)
        index[0] = int_to_bit_vector_large(1, 256)

        bt.build_index(index)
        self.assertEqual(bt.count(), 1)
        self.assertFalse(c.is_empty())

        bt.remove_from_index(index)
        self.assertEqual(bt.count(), 0)
        self.assertTrue(c.is_empty())
Beispiel #20
0
 def test_add_many_readonly(self) -> None:
     """ Test that we can't add many on a read-only instance. """
     s = MemoryKeyValueStore(DataMemoryElement(readonly=True))
     self.assertRaises(
         ReadOnlyError,
         s.add_many, {0: 1}
     )
Beispiel #21
0
 def test_caching_readonly_cache(self) -> None:
     ro_cache = DataMemoryElement(readonly=True)
     dms = DataMemorySet(ro_cache)
     self.assertRaises(
         ReadOnlyError,
         dms.cache
     )
Beispiel #22
0
 def test_save_model_with_cache(self, m_savez: mock.MagicMock) -> None:
     cache_element = DataMemoryElement()
     bt = SkLearnBallTreeHashIndex(cache_element, random_seed=0)
     m = np.random.randint(0, 2, 1000 * 256).reshape(1000, 256)
     bt._build_bt_internal(m)
     self.assertTrue(m_savez.called)
     self.assertEqual(m_savez.call_count, 1)
Beispiel #23
0
    def test_remove_with_cache(self) -> None:
        """
        Test that removal correctly updates the cache element.
        """
        existing_data = {
            0: 1,
            'a': 'b',
        }

        c = DataMemoryElement(pickle.dumps(existing_data))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, existing_data)

        s.remove('a')
        self.assertDictEqual(s._table, {0: 1})
        self.assertDictEqual(pickle.loads(c.get_bytes()),
                             {0: 1})
Beispiel #24
0
 def test_new_empty_cache(self) -> None:
     """
     Test construction with a cache element set with no bytes (empty).
     """
     c = DataMemoryElement()
     s = MemoryKeyValueStore(c)
     self.assertEqual(s._cache_element, c)
     self.assertEqual(s._table, {})
Beispiel #25
0
 def iter_aug_img_data_elements(
 ) -> Generator[DataMemoryElement, None, None]:
     for a in masked_images:
         buff = io.BytesIO()
         a.save(buff, format="bmp")
         de = DataMemoryElement(buff.getvalue(),
                                content_type='image/bmp')
         yield de
Beispiel #26
0
    def test_add_with_caching(self) -> None:
        """
        Test that we can add key-value pairs and they reflect in the cache
        element.
        """
        c = DataMemoryElement()
        s = MemoryKeyValueStore(c)

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        self.assertEqual(
            pickle.loads(c.get_bytes()),
            expected_cache_dict
        )
Beispiel #27
0
    def test_add_read_only(self) -> None:
        """
        Test that we cannot add when read-only (based on cache element).
        """
        s = MemoryKeyValueStore(DataMemoryElement(readonly=True))

        self.assertRaises(ReadOnlyError, s.add, 'a', 'b')
        self.assertRaises(ReadOnlyError, s.add, 'foo', None)
Beispiel #28
0
    def test_save_cache_remove_from_index(self) -> None:
        # Test that the cache is updated appropriately on a removal.
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([
            [0, 1, 0],  # 2
            [0, 1, 1],  # 3
            [1, 0, 0],  # 4
            [1, 1, 0]
        ])  # 6
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))), {2, 3, 4, 6})

        # noinspection PyTypeChecker
        i.remove_from_index([
            [0, 1, 1],  # 3
            [1, 0, 0]
        ])  # 4
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))), {2, 6})
Beispiel #29
0
    def test_init_with_cache(self) -> None:
        expected_map = dict(a=1, b=2, c=3)
        expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_map))

        i = DataMemorySet(expected_cache)

        self.assertEqual(i.cache_element, expected_cache)
        self.assertEqual(i.pickle_protocol, -1)
        self.assertEqual(i._element_map, expected_map)
Beispiel #30
0
 def test_from_config(self) -> None:
     # Configured cache with some picked bytes
     # Then convert to "string" (decode -> unicode) for python version used.
     expected_table = dict(a=1, b=2, c=3)
     expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table))
     expected_cache_json_str = \
         expected_cache.get_bytes().decode(BYTES_CONFIG_ENCODING)
     dme_key = 'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement'
     inst = MemoryDescriptorSet.from_config({
         'cache_element': {
             'type': dme_key,
             dme_key: {
                 'bytes': expected_cache_json_str
             }
         }
     })
     self.assertEqual(inst.cache_element, expected_cache)
     self.assertEqual(inst._table, expected_table)