Ejemplo n.º 1
0
    def test_fit_with_cache(self) -> None:
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement('test', i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(DataMemoryElement(),
                         DataMemoryElement(),
                         bit_length=1,
                         random_seed=0)
        assert itq.mean_vec_cache_elem is not None
        assert itq.rotation_cache_elem is not None
        itq.fit(fit_descriptors)

        # TODO: Explanation as to why this is the expected result.
        numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0])
        numpy.testing.assert_array_almost_equal(itq.rotation,
                                                [[1 / sqrt(2)], [1 / sqrt(2)]])
        self.assertIsNotNone(itq.mean_vec_cache_elem)
        # noinspection PyTypeChecker
        numpy.testing.assert_array_almost_equal(
            numpy.load(BytesIO(itq.mean_vec_cache_elem.get_bytes())), [0, 0])

        self.assertIsNotNone(itq.rotation_cache_elem)
        # noinspection PyTypeChecker
        numpy.testing.assert_array_almost_equal(
            numpy.load(BytesIO(itq.rotation_cache_elem.get_bytes())),
            [[1 / sqrt(2)], [1 / sqrt(2)]])
Ejemplo n.º 2
0
    def test_norm_vector_n2(self) -> None:
        itq = ItqFunctor(normalize=2)

        v = numpy.array([1, 0])
        numpy.testing.assert_array_almost_equal(itq._norm_vector(v), [1, 0])

        v = numpy.array([1, 1])
        numpy.testing.assert_array_almost_equal(itq._norm_vector(v),
                                                [1. / sqrt(2), 1. / sqrt(2)])
Ejemplo n.º 3
0
 def test_fit_has_model(self) -> None:
     # When trying to run fit where there is already a mean vector and
     # rotation set.
     itq = ItqFunctor()
     itq.mean_vec = 'sim vec'
     itq.rotation = 'sim rot'
     self.assertRaisesRegex(RuntimeError,
                            "Model components have already been loaded.",
                            itq.fit, [])
Ejemplo n.º 4
0
    def test_norm_vector_no_normalization(self) -> None:
        itq = ItqFunctor(normalize=None)

        v = numpy.array([0, 1])
        numpy.testing.assert_array_equal(itq._norm_vector(v), v)

        v = numpy.array([[0, 1, 1, .4, .1]])
        numpy.testing.assert_array_equal(itq._norm_vector(v), v)

        v = numpy.array([0] * 128)
        numpy.testing.assert_array_equal(itq._norm_vector(v), v)
Ejemplo n.º 5
0
    def test_configuration_with_caches(self) -> None:
        # This should run without error in both python
        # 2 and 3, as str/unicode are JSON compliant in both.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        expected_mean_vec_bytes = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_mean_vec_bytes, expected_mean_vec)
        expected_mean_vec_str = \
            expected_mean_vec_bytes.getvalue().decode(BYTES_CONFIG_ENCODING)

        expected_rotation_bytes = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_rotation_bytes, expected_rotation)
        expected_rotation_str = \
            expected_rotation_bytes.getvalue().decode(BYTES_CONFIG_ENCODING)

        new_parts = {
            'mean_vec_cache': {
                'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement':
                {
                    'bytes': expected_mean_vec_str
                },
                'type':
                'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement'
            },
            'rotation_cache': {
                'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement':
                {
                    'bytes': expected_rotation_str
                },
                'type':
                'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement'
            },
            'bit_length': 153,
            'itq_iterations': 7,
            'normalize': 2,
            'random_seed': 58,
        }
        c = merge_dict(ItqFunctor.get_default_config(), new_parts)

        itq = ItqFunctor.from_config(c)

        # Checking that loaded parameters were correctly set and cache elements
        # correctly return intended vector/matrix.
        numpy.testing.assert_equal(itq.mean_vec, [1, 2, 3])
        numpy.testing.assert_equal(itq.rotation,
                                   [[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        self.assertEqual(itq.bit_length, 153)
        self.assertEqual(itq.itq_iterations, 7)
        self.assertEqual(itq.normalize, 2)
        self.assertEqual(itq.random_seed, 58)
Ejemplo n.º 6
0
 def test_get_config_no_cache(self) -> None:
     itq = ItqFunctor(bit_length=1,
                      itq_iterations=2,
                      normalize=3,
                      random_seed=4)
     c = itq.get_config()
     self.assertEqual(c['bit_length'], 1)
     self.assertEqual(c['itq_iterations'], 2)
     self.assertEqual(c['normalize'], 3)
     self.assertEqual(c['random_seed'], 4)
     self.assertIsNone(c['mean_vec_cache']['type'])
     self.assertIsNone(c['rotation_cache']['type'])
Ejemplo n.º 7
0
    def test_save_model_with_writable_caches(self) -> None:
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        expected_mean_vec_bio = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_mean_vec_bio, expected_mean_vec)
        expected_mean_vec_bytes = expected_mean_vec_bio.getvalue()

        expected_rotation_bio = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_rotation_bio, expected_rotation)
        expected_rotation_bytes = expected_rotation_bio.getvalue()

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)

        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(),
                         expected_mean_vec_bytes)
        self.assertEqual(itq.rotation_cache_elem.get_bytes(),
                         expected_rotation_bytes)
Ejemplo n.º 8
0
    def test_fit(self) -> None:
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement('test', i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(bit_length=1, random_seed=0)
        itq.fit(fit_descriptors)

        # TODO: Explanation as to why this is the expected result.
        numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0])
        numpy.testing.assert_array_almost_equal(itq.rotation,
                                                [[1 / sqrt(2)], [1 / sqrt(2)]])
        self.assertIsNone(itq.mean_vec_cache_elem)
        self.assertIsNone(itq.rotation_cache_elem)
Ejemplo n.º 9
0
    def test_fit_short_descriptors_for_bit_length(self) -> None:
        # Should error when input descriptors have fewer dimensions than set bit
        # length for output hash codes (limitation of PCA method currently
        # used).
        fit_descriptors = []
        for i in range(3):
            d = DescriptorMemoryElement('test', i)
            d.set_vector([-1 + i, -1 + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(bit_length=8)
        self.assertRaisesRegex(
            ValueError,
            "Input descriptors have fewer features than requested bit encoding",
            itq.fit, fit_descriptors)
        self.assertIsNone(itq.mean_vec)
        self.assertIsNone(itq.rotation)

        # Should behave the same when input is an iterable
        self.assertRaisesRegex(
            ValueError,
            "Input descriptors have fewer features than requested bit encoding",
            itq.fit, iter(fit_descriptors))
        self.assertIsNone(itq.mean_vec)
        self.assertIsNone(itq.rotation)
Ejemplo n.º 10
0
    def _make_ftor_itq(
        self,
        bits: int = 32
    ) -> Tuple[ItqFunctor, Callable[[Iterable[DescriptorElement]], None]]:
        itq_ftor = ItqFunctor(bit_length=bits, random_seed=self.RANDOM_SEED)

        def itq_fit(d_iter: Iterable[DescriptorElement]) -> None:
            itq_ftor.fit(d_iter)

        return itq_ftor, itq_fit
Ejemplo n.º 11
0
    def test_get_config_with_cache_elements(self) -> None:
        itq = ItqFunctor(bit_length=5,
                         itq_iterations=6,
                         normalize=7,
                         random_seed=8)
        itq.mean_vec_cache_elem = DataMemoryElement(b'cached vec bytes')
        itq.rotation_cache_elem = DataMemoryElement(b'cached rot bytes')

        c = itq.get_config()
        self.assertEqual(c['bit_length'], 5)
        self.assertEqual(c['itq_iterations'], 6)
        self.assertEqual(c['normalize'], 7)
        self.assertEqual(c['random_seed'], 8)
        dme_key = 'smqtk_dataprovider.impls.data_element.memory.DataMemoryElement'
        self.assertEqual(c['mean_vec_cache']['type'], dme_key)
        # Check using string encodings of set bytes (JSON compliant).
        self.assertEqual(c['mean_vec_cache'][dme_key]['bytes'],
                         'cached vec bytes')
        self.assertEqual(c['rotation_cache'][dme_key]['bytes'],
                         'cached rot bytes')
Ejemplo n.º 12
0
 def test_configuration(self) -> None:
     i = LSHNearestNeighborIndex(
         lsh_functor=ItqFunctor(), descriptor_set=MemoryDescriptorSet(),
         hash2uuids_kvstore=MemoryKeyValueStore(),
         hash_index=LinearHashIndex(), distance_method='euclidean',
         read_only=True
     )
     for inst in configuration_test_helper(i):  # type: LSHNearestNeighborIndex
         assert isinstance(inst.lsh_functor, LshFunctor)
         assert isinstance(inst.descriptor_set, MemoryDescriptorSet)
         assert isinstance(inst.hash_index, LinearHashIndex)
         assert isinstance(inst.hash2uuids_kvstore, MemoryKeyValueStore)
         assert inst.distance_method == 'euclidean'
         assert inst.read_only is True
Ejemplo n.º 13
0
    def test_save_model_no_caches(self) -> None:
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        # Cache variables should remain None after save.
        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation
        itq.save_model()
        self.assertIsNone(itq.mean_vec_cache_elem)
        self.assertIsNone(itq.mean_vec_cache_elem)
Ejemplo n.º 14
0
 def test_is_usable(self) -> None:
     # Should always be usable due to no non-standard dependencies.
     self.assertTrue(ItqFunctor.is_usable())
Ejemplo n.º 15
0
 def test_has_model(self) -> None:
     itq = ItqFunctor()
     # with no vector/rotation set, should return false.
     self.assertFalse(itq.has_model())
     # If only one of the two is None, then false should be returned.
     itq.mean_vec = 'mean vec'
     itq.rotation = None
     self.assertFalse(itq.has_model())
     itq.mean_vec = None
     itq.rotation = 'rotation'
     self.assertFalse(itq.has_model())
     # If both are not None, return true.
     itq.mean_vec = 'mean vec'
     itq.rotation = 'rotation'
     self.assertTrue(itq.has_model())
Ejemplo n.º 16
0
    def test_get_hash(self) -> None:
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement('test', i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        # The following "rotation" matrix should cause any 2-feature descriptor
        # to the right of the line ``y = -x`` to be True, and to the left as
        # False. If on the line, should be True.
        itq = ItqFunctor(bit_length=1, random_seed=0)
        itq.mean_vec = numpy.array([0., 0.])
        itq.rotation = numpy.array([[1. / sqrt(2)], [1. / sqrt(2)]])

        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([1, 1])),
                                         [True])
        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([-1, -1])),
                                         [False])

        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([-1, 1])),
                                         [True])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([-1.001, 1])), [False])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([-1, 1.001])), [True])

        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([1, -1])),
                                         [True])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([1, -1.001])), [False])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([1.001, -1])), [True])
Ejemplo n.º 17
0
    def test_save_model_with_read_only_cache(self) -> None:
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation

        # read-only mean-vec cache
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=True)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), b'')
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), b'')

        # read-only rotation cache
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=True)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), b'')
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), b'')

        # Both read-only
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=True)
        itq.rotation_cache_elem = DataMemoryElement(readonly=True)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), b'')
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), b'')
Ejemplo n.º 18
0
 def test_default_configuration(self) -> None:
     c = ItqFunctor.get_default_config()
     self.assertEqual(ItqFunctor.from_config(c).get_config(), c)