def test_fit_with_cache(self) -> None: fit_descriptors = [] for i in range(5): d = DescriptorMemoryElement('test', i) d.set_vector([-2. + i, -2. + i]) fit_descriptors.append(d) itq = ItqFunctor(DataMemoryElement(), DataMemoryElement(), bit_length=1, random_seed=0) assert itq.mean_vec_cache_elem is not None assert itq.rotation_cache_elem is not None itq.fit(fit_descriptors) # TODO: Explanation as to why this is the expected result. numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0]) numpy.testing.assert_array_almost_equal(itq.rotation, [[1 / sqrt(2)], [1 / sqrt(2)]]) self.assertIsNotNone(itq.mean_vec_cache_elem) # noinspection PyTypeChecker numpy.testing.assert_array_almost_equal( numpy.load(BytesIO(itq.mean_vec_cache_elem.get_bytes())), [0, 0]) self.assertIsNotNone(itq.rotation_cache_elem) # noinspection PyTypeChecker numpy.testing.assert_array_almost_equal( numpy.load(BytesIO(itq.rotation_cache_elem.get_bytes())), [[1 / sqrt(2)], [1 / sqrt(2)]])
def test_get_hash(self) -> None: fit_descriptors = [] for i in range(5): d = DescriptorMemoryElement('test', i) d.set_vector([-2. + i, -2. + i]) fit_descriptors.append(d) # The following "rotation" matrix should cause any 2-feature descriptor # to the right of the line ``y = -x`` to be True, and to the left as # False. If on the line, should be True. itq = ItqFunctor(bit_length=1, random_seed=0) itq.mean_vec = numpy.array([0., 0.]) itq.rotation = numpy.array([[1. / sqrt(2)], [1. / sqrt(2)]]) numpy.testing.assert_array_equal(itq.get_hash(numpy.array([1, 1])), [True]) numpy.testing.assert_array_equal(itq.get_hash(numpy.array([-1, -1])), [False]) numpy.testing.assert_array_equal(itq.get_hash(numpy.array([-1, 1])), [True]) numpy.testing.assert_array_equal( itq.get_hash(numpy.array([-1.001, 1])), [False]) numpy.testing.assert_array_equal( itq.get_hash(numpy.array([-1, 1.001])), [True]) numpy.testing.assert_array_equal(itq.get_hash(numpy.array([1, -1])), [True]) numpy.testing.assert_array_equal( itq.get_hash(numpy.array([1, -1.001])), [False]) numpy.testing.assert_array_equal( itq.get_hash(numpy.array([1.001, -1])), [True])
def test_fit_short_descriptors_for_bit_length(self) -> None: # Should error when input descriptors have fewer dimensions than set bit # length for output hash codes (limitation of PCA method currently # used). fit_descriptors = [] for i in range(3): d = DescriptorMemoryElement('test', i) d.set_vector([-1 + i, -1 + i]) fit_descriptors.append(d) itq = ItqFunctor(bit_length=8) self.assertRaisesRegex( ValueError, "Input descriptors have fewer features than requested bit encoding", itq.fit, fit_descriptors) self.assertIsNone(itq.mean_vec) self.assertIsNone(itq.rotation) # Should behave the same when input is an iterable self.assertRaisesRegex( ValueError, "Input descriptors have fewer features than requested bit encoding", itq.fit, iter(fit_descriptors)) self.assertIsNone(itq.mean_vec) self.assertIsNone(itq.rotation)
def test_save_model_with_writable_caches(self) -> None: # If one or both cache elements are read-only, no saving. expected_mean_vec = numpy.array([1, 2, 3]) expected_rotation = numpy.eye(3) expected_mean_vec_bio = BytesIO() # noinspection PyTypeChecker numpy.save(expected_mean_vec_bio, expected_mean_vec) expected_mean_vec_bytes = expected_mean_vec_bio.getvalue() expected_rotation_bio = BytesIO() # noinspection PyTypeChecker numpy.save(expected_rotation_bio, expected_rotation) expected_rotation_bytes = expected_rotation_bio.getvalue() itq = ItqFunctor() itq.mean_vec = expected_mean_vec itq.rotation = expected_rotation itq.mean_vec_cache_elem = DataMemoryElement(readonly=False) itq.rotation_cache_elem = DataMemoryElement(readonly=False) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), expected_mean_vec_bytes) self.assertEqual(itq.rotation_cache_elem.get_bytes(), expected_rotation_bytes)
def test_save_model_with_read_only_cache(self) -> None: # If one or both cache elements are read-only, no saving. expected_mean_vec = numpy.array([1, 2, 3]) expected_rotation = numpy.eye(3) itq = ItqFunctor() itq.mean_vec = expected_mean_vec itq.rotation = expected_rotation # read-only mean-vec cache itq.mean_vec_cache_elem = DataMemoryElement(readonly=True) itq.rotation_cache_elem = DataMemoryElement(readonly=False) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), b'') self.assertEqual(itq.rotation_cache_elem.get_bytes(), b'') # read-only rotation cache itq.mean_vec_cache_elem = DataMemoryElement(readonly=False) itq.rotation_cache_elem = DataMemoryElement(readonly=True) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), b'') self.assertEqual(itq.rotation_cache_elem.get_bytes(), b'') # Both read-only itq.mean_vec_cache_elem = DataMemoryElement(readonly=True) itq.rotation_cache_elem = DataMemoryElement(readonly=True) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), b'') self.assertEqual(itq.rotation_cache_elem.get_bytes(), b'')
def test_norm_vector_n2(self) -> None: itq = ItqFunctor(normalize=2) v = numpy.array([1, 0]) numpy.testing.assert_array_almost_equal(itq._norm_vector(v), [1, 0]) v = numpy.array([1, 1]) numpy.testing.assert_array_almost_equal(itq._norm_vector(v), [1. / sqrt(2), 1. / sqrt(2)])
def test_fit_has_model(self) -> None: # When trying to run fit where there is already a mean vector and # rotation set. itq = ItqFunctor() itq.mean_vec = 'sim vec' itq.rotation = 'sim rot' self.assertRaisesRegex(RuntimeError, "Model components have already been loaded.", itq.fit, [])
def _make_ftor_itq( self, bits: int = 32 ) -> Tuple[ItqFunctor, Callable[[Iterable[DescriptorElement]], None]]: itq_ftor = ItqFunctor(bit_length=bits, random_seed=self.RANDOM_SEED) def itq_fit(d_iter: Iterable[DescriptorElement]) -> None: itq_ftor.fit(d_iter) return itq_ftor, itq_fit
def test_norm_vector_no_normalization(self) -> None: itq = ItqFunctor(normalize=None) v = numpy.array([0, 1]) numpy.testing.assert_array_equal(itq._norm_vector(v), v) v = numpy.array([[0, 1, 1, .4, .1]]) numpy.testing.assert_array_equal(itq._norm_vector(v), v) v = numpy.array([0] * 128) numpy.testing.assert_array_equal(itq._norm_vector(v), v)
def test_save_model_no_caches(self) -> None: expected_mean_vec = numpy.array([1, 2, 3]) expected_rotation = numpy.eye(3) # Cache variables should remain None after save. itq = ItqFunctor() itq.mean_vec = expected_mean_vec itq.rotation = expected_rotation itq.save_model() self.assertIsNone(itq.mean_vec_cache_elem) self.assertIsNone(itq.mean_vec_cache_elem)
def test_get_config_no_cache(self) -> None: itq = ItqFunctor(bit_length=1, itq_iterations=2, normalize=3, random_seed=4) c = itq.get_config() self.assertEqual(c['bit_length'], 1) self.assertEqual(c['itq_iterations'], 2) self.assertEqual(c['normalize'], 3) self.assertEqual(c['random_seed'], 4) self.assertIsNone(c['mean_vec_cache']['type']) self.assertIsNone(c['rotation_cache']['type'])
def test_configuration(self) -> None: i = LSHNearestNeighborIndex( lsh_functor=ItqFunctor(), descriptor_set=MemoryDescriptorSet(), hash2uuids_kvstore=MemoryKeyValueStore(), hash_index=LinearHashIndex(), distance_method='euclidean', read_only=True ) for inst in configuration_test_helper(i): # type: LSHNearestNeighborIndex assert isinstance(inst.lsh_functor, LshFunctor) assert isinstance(inst.descriptor_set, MemoryDescriptorSet) assert isinstance(inst.hash_index, LinearHashIndex) assert isinstance(inst.hash2uuids_kvstore, MemoryKeyValueStore) assert inst.distance_method == 'euclidean' assert inst.read_only is True
def test_has_model(self) -> None: itq = ItqFunctor() # with no vector/rotation set, should return false. self.assertFalse(itq.has_model()) # If only one of the two is None, then false should be returned. itq.mean_vec = 'mean vec' itq.rotation = None self.assertFalse(itq.has_model()) itq.mean_vec = None itq.rotation = 'rotation' self.assertFalse(itq.has_model()) # If both are not None, return true. itq.mean_vec = 'mean vec' itq.rotation = 'rotation' self.assertTrue(itq.has_model())
def test_fit(self) -> None: fit_descriptors = [] for i in range(5): d = DescriptorMemoryElement('test', i) d.set_vector([-2. + i, -2. + i]) fit_descriptors.append(d) itq = ItqFunctor(bit_length=1, random_seed=0) itq.fit(fit_descriptors) # TODO: Explanation as to why this is the expected result. numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0]) numpy.testing.assert_array_almost_equal(itq.rotation, [[1 / sqrt(2)], [1 / sqrt(2)]]) self.assertIsNone(itq.mean_vec_cache_elem) self.assertIsNone(itq.rotation_cache_elem)
def test_get_config_with_cache_elements(self) -> None: itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7, random_seed=8) itq.mean_vec_cache_elem = DataMemoryElement(b'cached vec bytes') itq.rotation_cache_elem = DataMemoryElement(b'cached rot bytes') c = itq.get_config() self.assertEqual(c['bit_length'], 5) self.assertEqual(c['itq_iterations'], 6) self.assertEqual(c['normalize'], 7) self.assertEqual(c['random_seed'], 8) dme_key = 'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement' self.assertEqual(c['mean_vec_cache']['type'], dme_key) # Check using string encodings of set bytes (JSON compliant). self.assertEqual(c['mean_vec_cache'][dme_key]['bytes'], 'cached vec bytes') self.assertEqual(c['rotation_cache'][dme_key]['bytes'], 'cached rot bytes')