Esempio n. 1
0
    def test_none_set(self) -> None:
        d = DescriptorMemoryElement(0)
        self.assertFalse(d.has_vector())

        d.set_vector(numpy.ones(16))
        self.assertTrue(d.has_vector())
        numpy.testing.assert_equal(d.vector(), numpy.ones(16))

        d.set_vector(None)
        self.assertFalse(d.has_vector())
        self.assertIs(d.vector(), None)
Esempio n. 2
0
 def test_output_immutability(self) -> None:
     # make sure that data stored is not susceptible to modifications after
     # extraction
     v = numpy.ones(16)
     d = DescriptorMemoryElement(0)
     self.assertFalse(d.has_vector())
     d.set_vector(v)
     r = d.vector()
     assert r is not None
     r[:] = 0
     self.assertEqual(r.sum(), 0)
     r_again = d.vector()
     assert r_again is not None
     self.assertEqual(r_again.sum(), 16)
Esempio n. 3
0
    def test_input_immutability(self) -> None:
        # make sure that data stored is not susceptible to shifts in the
        # originating data matrix they were pulled from.

        #
        # Testing this with a single vector
        #
        v = numpy.random.rand(16)
        t = tuple(v.copy())
        d = DescriptorMemoryElement(0)
        d.set_vector(v)
        v[:] = 0
        self.assertTrue((v == 0).all())
        self.assertFalse(sum(t) == 0.)
        numpy.testing.assert_equal(d.vector(), t)

        #
        # Testing with matrix
        #
        m = numpy.random.rand(20, 16)

        v1 = m[3]
        v2 = m[15]
        v3 = m[19]

        # Save truth values of arrays as immutable tuples (copies)
        t1 = tuple(v1.copy())
        t2 = tuple(v2.copy())
        t3 = tuple(v3.copy())

        d1 = DescriptorMemoryElement(1)
        d1.set_vector(v1)
        d2 = DescriptorMemoryElement(2)
        d2.set_vector(v2)
        d3 = DescriptorMemoryElement(3)
        d3.set_vector(v3)

        numpy.testing.assert_equal(v1, d1.vector())
        numpy.testing.assert_equal(v2, d2.vector())
        numpy.testing.assert_equal(v3, d3.vector())

        # Changing the source should not change stored vectors
        m[:, :] = 0.
        self.assertTrue((v1 == 0).all())
        self.assertTrue((v2 == 0).all())
        self.assertTrue((v3 == 0).all())
        self.assertFalse(sum(t1) == 0.)
        self.assertFalse(sum(t2) == 0.)
        self.assertFalse(sum(t3) == 0.)
        numpy.testing.assert_equal(d1.vector(), t1)
        numpy.testing.assert_equal(d2.vector(), t2)
        numpy.testing.assert_equal(d3.vector(), t3)
Esempio n. 4
0
    def test_set_state_version_1(self) -> None:
        # Test support of older state version
        expected_uid = 'test-uid'
        expected_v = numpy.array([1, 2, 3])
        expected_v_b = BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_v_b, expected_v)
        expected_v_dump = expected_v_b.getvalue()

        # noinspection PyTypeChecker
        # - Using dummy data in constructor for testing __setstate__.
        e = DescriptorMemoryElement('unexpected-uid')
        e.__setstate__((expected_uid, expected_v_dump))
        self.assertEqual(e.uuid(), expected_uid)
        numpy.testing.assert_array_equal(e.vector(), expected_v)
Esempio n. 5
0
    def _random_euclidean(
        self,
        hash_ftor: LshFunctor,
        hash_idx: Optional[HashIndex],
        ftor_train_hook: Callable[[Iterable[DescriptorElement]],
                                  None] = lambda d: None
    ) -> None:
        # :param hash_ftor: Hash function class for generating hash codes for
        #   descriptors.
        # :param hash_idx: Hash index instance to use in local LSH algo
        #   instance.
        # :param ftor_train_hook: Function for training functor if necessary.

        # make random descriptors
        i = 1000
        dim = 256
        td = []
        np.random.seed(self.RANDOM_SEED)
        for j in range(i):
            d = DescriptorMemoryElement('random', j)
            d.set_vector(np.random.rand(dim))
            td.append(d)

        ftor_train_hook(td)

        di = MemoryDescriptorSet()
        kvstore = MemoryKeyValueStore()
        index = LSHNearestNeighborIndex(hash_ftor,
                                        di,
                                        kvstore,
                                        hash_index=hash_idx,
                                        distance_method='euclidean')
        index.build_index(td)

        # test query from build set -- should return same descriptor when k=1
        q = td[255]
        r, dists = index.nn(q, 1)
        self.assertEqual(r[0], q)

        # test query very near a build vector
        td_q = td[0]
        q = DescriptorMemoryElement('query', i)
        td_q_v = td_q.vector()
        assert td_q_v is not None
        v = td_q_v.copy()
        v_min = max(v.min(), 0.1)
        v[0] += v_min
        v[dim - 1] -= v_min
        q.set_vector(v)
        r, dists = index.nn(q, 1)
        self.assertFalse(np.array_equal(q.vector(), td_q.vector()))
        self.assertEqual(r[0], td_q)

        # random query
        q = DescriptorMemoryElement('query', i + 1)
        q.set_vector(np.random.rand(dim))

        # for any query of size k, results should at least be in distance order
        r, dists = index.nn(q, 10)
        for j in range(1, len(dists)):
            self.assertGreater(dists[j], dists[j - 1])
        r, dists = index.nn(q, i)
        for j in range(1, len(dists)):
            self.assertGreater(dists[j], dists[j - 1])