Beispiel #1
0
 def test_remove_from_index(self):
     # Test that actual removal occurs.
     i = LinearHashIndex()
     i.index = {0, 1, 2}
     # noinspection PyTypeChecker
     i.remove_from_index([[0, 0], [1, 0]])
     self.assertSetEqual(i.index, {1})
Beispiel #2
0
 def test_build_index_with_cache(self):
     cache_element = DataMemoryElement()
     i = LinearHashIndex(cache_element)
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
     nose.tools.assert_equal(i.index, {1, 2, 3, 4})
     nose.tools.assert_false(cache_element.is_empty())
Beispiel #3
0
    def test_save_cache_remove_from_index(self):
        # Test that the cache is updated appropriately on a removal.
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0],   # 2
                       [0, 1, 1],   # 3
                       [1, 0, 0],   # 4
                       [1, 1, 0]])  # 6
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))),
            {2, 3, 4, 6}
        )

        # noinspection PyTypeChecker
        i.remove_from_index([[0, 1, 1],   # 3
                             [1, 0, 0]])  # 4
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))),
            {2, 6}
        )
Beispiel #4
0
 def test_from_config_with_cache(self):
     c = LinearHashIndex.get_default_config()
     c['cache_element'][
         'type'] = 'smqtk.representation.data_element.memory_element.DataMemoryElement'
     i = LinearHashIndex.from_config(c)
     self.assertIsInstance(i.cache_element, DataMemoryElement)
     self.assertEqual(i.index, set())
Beispiel #5
0
    def nn(self, d, n=1):
        """
        Return the nearest `N` neighbors to the given descriptor element.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple
            of the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        super(LSHNearestNeighborIndex, self).nn(d, n)

        self._log.debug("generating hash for descriptor")
        d_v = d.vector()
        d_h = self.lsh_functor.get_hash(d_v)

        def comp_descr_dist(d2_v):
            return self._distance_function(d_v, d2_v)

        self._log.debug("getting near hashes")
        hi = self.hash_index
        # Make on-the-fly linear index if we weren't originally set with one
        if hi is None:
            hi = LinearHashIndex()
            # not calling ``build_index`` because we already have the int
            # hashes.
            with self._hash2uuid_lock:
                hi.index = numpy.array(self._hash2uuid.keys())
        hashes, hash_dists = hi.nn(d_h, n)

        self._log.debug("getting UUIDs of descriptors for nearby hashes")
        neighbor_uuids = []
        with self._hash2uuid_lock:
            for h_int in map(bit_vector_to_int_large, hashes):
                # If descriptor hash not in our map, we effectively skip it
                neighbor_uuids.extend(self._hash2uuid.get(h_int, ()))
        self._log.debug("-- matched %d UUIDs", len(neighbor_uuids))

        self._log.debug("getting descriptors for neighbor_uuids")
        neighbors = \
            list(self.descriptor_index.get_many_descriptors(neighbor_uuids))

        self._log.debug("ordering descriptors via distance method '%s'",
                        self.distance_method)
        self._log.debug('-- getting element vectors')
        neighbor_vectors = elements_to_matrix(neighbors,
                                              report_interval=1.0)
        self._log.debug('-- calculating distances')
        distances = map(comp_descr_dist, neighbor_vectors)
        self._log.debug('-- ordering')
        ordered = sorted(zip(neighbors, distances),
                         key=lambda p: p[1])
        self._log.debug('-- slicing top n=%d', n)
        return zip(*(ordered[:n]))
Beispiel #6
0
 def test_update_index_no_index(self):
     # Test calling update index with no existing index.  Should result the
     # same as calling build_index with no index.
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.update_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertIsNone(i.cache_element)
Beispiel #7
0
 def test_remove_from_index(self):
     # Test that actual removal occurs.
     i = LinearHashIndex()
     i.index = {0, 1, 2}
     # noinspection PyTypeChecker
     i.remove_from_index([[0, 0],
                          [1, 0]])
     self.assertSetEqual(i.index, {1})
Beispiel #8
0
 def test_build_index_no_cache(self):
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0],
                    [1, 0, 0],
                    [0, 1, 1],
                    [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertIsNone(i.cache_element)
Beispiel #9
0
    def test_save_cache(self):
        cache_element = DataMemoryElement()
        nose.tools.assert_true(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        nose.tools.assert_false(cache_element.is_empty())
        nose.tools.assert_true(len(cache_element.get_bytes()) > 0)
Beispiel #10
0
    def nn(self, d, n=1):
        """
        Return the nearest `N` neighbors to the given descriptor element.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple
            of the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        super(LSHNearestNeighborIndex, self).nn(d, n)

        self._log.debug("generating hash for descriptor")
        d_v = d.vector()
        d_h = self.lsh_functor.get_hash(d_v)

        def comp_descr_dist(d2_v):
            return self._distance_function(d_v, d2_v)

        self._log.debug("getting near hashes")
        hi = self.hash_index
        if hi is None:
            # Make on-the-fly linear index
            hi = LinearHashIndex()
            # not calling ``build_index`` because we already have the int
            # hashes.
            hi.index = numpy.array(list(self.hash2uuids_kvstore.keys()))
        near_hashes, _ = hi.nn(d_h, n)

        self._log.debug("getting UUIDs of descriptors for nearby hashes")
        neighbor_uuids = []
        for h_int in map(bit_vector_to_int_large, near_hashes):
            # If descriptor hash not in our map, we effectively skip it
            #: :type: collections.Iterable
            near_uuids = self.hash2uuids_kvstore.get(h_int, ())
            neighbor_uuids.extend(near_uuids)
        self._log.debug("-- matched %d UUIDs", len(neighbor_uuids))

        self._log.debug("getting descriptors for neighbor_uuids")
        neighbors = \
            list(self.descriptor_index.get_many_descriptors(neighbor_uuids))

        self._log.debug("ordering descriptors via distance method '%s'",
                        self.distance_method)
        self._log.debug('-- getting element vectors')
        neighbor_vectors = elements_to_matrix(neighbors, report_interval=1.0)
        self._log.debug('-- calculating distances')
        distances = map(comp_descr_dist, neighbor_vectors)
        self._log.debug('-- ordering')
        ordered = sorted(zip(neighbors, distances), key=lambda p: p[1])
        self._log.debug('-- slicing top n=%d', n)
        return zip(*(ordered[:n]))
Beispiel #11
0
 def test_build_index_with_cache(self):
     cache_element = DataMemoryElement()
     i = LinearHashIndex(cache_element)
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0],
                    [1, 0, 0],
                    [0, 1, 1],
                    [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertFalse(cache_element.is_empty())
Beispiel #12
0
 def test_remove_from_index_single_not_in_index(self):
     # Test attempting to remove single hash not in the index.
     i = LinearHashIndex()
     i.index = {0, 1, 2}
     self.assertRaises(
         KeyError,
         i.remove_from_index,
         [[1, 0, 0]]  # 4
     )
     self.assertSetEqual(i.index, {0, 1, 2})
Beispiel #13
0
 def test_remove_from_index_single_not_in_index(self):
     # Test attempting to remove single hash not in the index.
     i = LinearHashIndex()
     i.index = {0, 1, 2}
     self.assertRaises(
         KeyError,
         i.remove_from_index,
         [[1, 0, 0]]  # 4
     )
     self.assertSetEqual(i.index, {0, 1, 2})
Beispiel #14
0
    def test_get_config(self):
        i = LinearHashIndex()

        # Without cache element
        expected_c = LinearHashIndex.get_default_config()
        self.assertEqual(i.get_config(), expected_c)

        # With cache element
        i.cache_element = DataMemoryElement()
        expected_c['cache_element']['type'] = 'DataMemoryElement'
        self.assertEqual(i.get_config(), expected_c)
Beispiel #15
0
 def test_update_index_no_index(self):
     # Test calling update index with no existing index.  Should result the
     # same as calling build_index with no index.
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.update_index([[0, 1, 0],
                     [1, 0, 0],
                     [0, 1, 1],
                     [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertIsNone(i.cache_element)
Beispiel #16
0
    def test_load_cache(self):
        cache_element = DataMemoryElement()
        i1 = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i1.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])

        # load called on initialization.
        i2 = LinearHashIndex(cache_element)

        nose.tools.assert_equal(i1.cache_element, i2.cache_element)
        nose.tools.assert_equal(i1.index, i2.index)
Beispiel #17
0
 def test_nn(self):
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0], [1, 1, 0], [0, 1, 1], [0, 0, 1]])
     near_codes, near_dists = i.nn([0, 0, 0], 4)
     nose.tools.assert_equal(set(map(tuple, near_codes[:2])), {(0, 1, 0),
                                                               (0, 0, 1)})
     nose.tools.assert_equal(set(map(tuple, near_codes[2:])), {(1, 1, 0),
                                                               (0, 1, 1)})
     numpy.testing.assert_array_almost_equal(
         near_dists, (1 / 3., 1 / 3., 2 / 3., 2 / 3.))
Beispiel #18
0
 def test_update_index_add_hashes(self):
     i = LinearHashIndex()
     # Build index with some initial hashes
     # noinspection PyTypeChecker
     i.build_index([[0, 0],
                    [0, 1]])
     self.assertSetEqual(i.index, {0, 1})
     # Update index with new stuff
     # noinspection PyTypeChecker
     i.update_index([[1, 0],
                     [1, 1]])
     self.assertSetEqual(i.index, {0, 1, 2, 3})
Beispiel #19
0
    def test_save_cache_build_index(self):
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        self.assertFalse(cache_element.is_empty())
        # Check byte content
        expected_cache = {1, 2, 3, 4}
        actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes())))
        self.assertSetEqual(expected_cache, actual_cache)
Beispiel #20
0
 def test_remove_from_index_one_of_many_not_in_index(self):
     # Test attempting to remove hashes where one of them is not in the
     # index.
     i = LinearHashIndex()
     i.index = {0, 1, 2}
     self.assertRaises(
         KeyError,
         i.remove_from_index, [[0, 0],  # 0
                               [0, 1],  # 1
                               [1, 1]]  # 3
     )
     # Check that the index has not been modified.
     self.assertSetEqual(i.index, {0, 1, 2})
Beispiel #21
0
    def test_load_cache(self):
        cache_element = DataMemoryElement()
        i1 = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i1.build_index([[0, 1, 0],
                        [1, 0, 0],
                        [0, 1, 1],
                        [0, 0, 1]])

        # load called on initialization.
        i2 = LinearHashIndex(cache_element)

        self.assertEqual(i1.cache_element, i2.cache_element)
        self.assertEqual(i1.index, i2.index)
Beispiel #22
0
    def test_save_cache_build_index(self):
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0],
                       [1, 0, 0],
                       [0, 1, 1],
                       [0, 0, 1]])
        self.assertFalse(cache_element.is_empty())
        # Check byte content
        expected_cache = {1, 2, 3, 4}
        actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes())))
        self.assertSetEqual(expected_cache, actual_cache)
Beispiel #23
0
 def test_nn(self):
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0],
                    [1, 1, 0],
                    [0, 1, 1],
                    [0, 0, 1]])
     # noinspection PyTypeChecker
     near_codes, near_dists = i.nn([0, 0, 0], 4)
     self.assertEqual(set(map(tuple, near_codes[:2])),
                      {(0, 1, 0), (0, 0, 1)})
     self.assertEqual(set(map(tuple, near_codes[2:])),
                      {(1, 1, 0), (0, 1, 1)})
     numpy.testing.assert_array_almost_equal(near_dists,
                                             (1/3., 1/3., 2/3., 2/3.))
Beispiel #24
0
 def test_remove_from_index_one_of_many_not_in_index(self):
     # Test attempting to remove hashes where one of them is not in the
     # index.
     i = LinearHashIndex()
     i.index = {0, 1, 2}
     self.assertRaises(
         KeyError,
         i.remove_from_index,
         [
             [0, 0],  # 0
             [0, 1],  # 1
             [1, 1]
         ]  # 3
     )
     # Check that the index has not been modified.
     self.assertSetEqual(i.index, {0, 1, 2})
Beispiel #25
0
    def test_update_index_with_hash_index(self):
        # Similar test to `test_update_index_add_new_descriptors` but with a
        # linear hash index.
        descr_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        linear_hi = LinearHashIndex()  # simplest hash index, heap-sorts.
        index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set,
                                        hash_kvs, linear_hi)

        descriptors1 = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        descriptors2 = [
            DescriptorMemoryElement('t', 5),
            DescriptorMemoryElement('t', 6),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for d in descriptors1 + descriptors2:
            d.set_vector(np.ones(1, float) * d.uuid())

        # Build initial index.
        index.build_index(descriptors1)
        # Initial hash index should only encode hashes for first batch of
        # descriptors.
        self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4})

        # Update index and check that components have new data.
        index.update_index(descriptors2)
        # Now the hash index should include all descriptor hashes.
        self.assertSetEqual(linear_hi.index, {0, 1, 2, 3, 4, 5, 6})
Beispiel #26
0
    def test_save_cache_remove_from_index(self):
        # Test that the cache is updated appropriately on a removal.
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([
            [0, 1, 0],  # 2
            [0, 1, 1],  # 3
            [1, 0, 0],  # 4
            [1, 1, 0]
        ])  # 6
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))), {2, 3, 4, 6})

        # noinspection PyTypeChecker
        i.remove_from_index([
            [0, 1, 1],  # 3
            [1, 0, 0]
        ])  # 4
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))), {2, 6})
Beispiel #27
0
 def test_configuration(self):
     i = LSHNearestNeighborIndex(lsh_functor=ItqFunctor(),
                                 descriptor_set=MemoryDescriptorSet(),
                                 hash2uuids_kvstore=MemoryKeyValueStore(),
                                 hash_index=LinearHashIndex(),
                                 distance_method='euclidean',
                                 read_only=True)
     for inst in configuration_test_helper(
             i):  # type: LSHNearestNeighborIndex
         assert isinstance(inst.lsh_functor, LshFunctor)
         assert isinstance(inst.descriptor_set, MemoryDescriptorSet)
         assert isinstance(inst.hash_index, LinearHashIndex)
         assert isinstance(inst.hash2uuids_kvstore, MemoryKeyValueStore)
         assert inst.distance_method == 'euclidean'
         assert inst.read_only is True
Beispiel #28
0
 def test_update_index_add_hashes(self):
     i = LinearHashIndex()
     # Build index with some initial hashes
     # noinspection PyTypeChecker
     i.build_index([[0, 0], [0, 1]])
     self.assertSetEqual(i.index, {0, 1})
     # Update index with new stuff
     # noinspection PyTypeChecker
     i.update_index([[1, 0], [1, 1]])
     self.assertSetEqual(i.index, {0, 1, 2, 3})
Beispiel #29
0
    def test_get_config(self):
        i = LinearHashIndex()

        # Without cache element
        expected_c = LinearHashIndex.get_default_config()
        nose.tools.assert_equal(i.get_config(), expected_c)

        # With cache element
        i.cache_element = DataMemoryElement()
        expected_c['cache_element']['type'] = 'DataMemoryElement'
        nose.tools.assert_equal(i.get_config(), expected_c)
Beispiel #30
0
    def test_get_config(self):
        i = LinearHashIndex()

        # Without cache element
        expected_c = LinearHashIndex.get_default_config()
        self.assertEqual(i.get_config(), expected_c)

        # With cache element
        i.cache_element = DataMemoryElement()
        expected_c['cache_element'][
            'type'] = 'smqtk.representation.data_element.memory_element.DataMemoryElement'
        self.assertEqual(i.get_config(), expected_c)
Beispiel #31
0
    def test_build_index_fresh_build_with_hash_index(self):
        descr_set = MemoryDescriptorSet()
        hash_kvs = MemoryKeyValueStore()
        linear_hi = LinearHashIndex()  # simplest hash index, heap-sorts.
        index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set,
                                        hash_kvs, linear_hi)

        descriptors = [
            DescriptorMemoryElement('t', 0),
            DescriptorMemoryElement('t', 1),
            DescriptorMemoryElement('t', 2),
            DescriptorMemoryElement('t', 3),
            DescriptorMemoryElement('t', 4),
        ]
        # Vectors of length 1 for easy dummy hashing prediction.
        for i, d in enumerate(descriptors):
            d.set_vector(np.ones(1, float) * i)
        index.build_index(descriptors)
        # Hash index should have been built with hash vectors, and linearHI
        # converts those to integers for storage.
        self.assertEqual(linear_hi.index, {0, 1, 2, 3, 4})
Beispiel #32
0
 def test_from_config_with_cache(self):
     c = LinearHashIndex.get_default_config()
     c['cache_element']['type'] = "DataMemoryElement"
     i = LinearHashIndex.from_config(c)
     self.assertIsInstance(i.cache_element, DataMemoryElement)
     self.assertEqual(i.index, set())
Beispiel #33
0
 def test_build_index_no_input(self):
     i = LinearHashIndex()
     nose.tools.assert_raises_regexp(ValueError, "No hashes given to index",
                                     i.build_index, [])
Beispiel #34
0
 def test_from_config_no_cache(self):
     # Default config is valid and specifies no cache.
     c = LinearHashIndex.get_default_config()
     i = LinearHashIndex.from_config(c)
     self.assertIsNone(i.cache_element)
     self.assertEqual(i.index, set())
Beispiel #35
0
 def test_build_index_no_cache(self):
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
     nose.tools.assert_equal(i.index, {1, 2, 3, 4})
     nose.tools.assert_is_none(i.cache_element)
Beispiel #36
0
 def test_from_config_no_cache(self):
     # Default config is valid and specifies no cache.
     c = LinearHashIndex.get_default_config()
     i = LinearHashIndex.from_config(c)
     nose.tools.assert_is_none(i.cache_element)
     nose.tools.assert_equal(i.index, set())
Beispiel #37
0
 def test_from_config_with_cache(self):
     c = LinearHashIndex.get_default_config()
     c['cache_element']['type'] = "DataMemoryElement"
     i = LinearHashIndex.from_config(c)
     nose.tools.assert_is_instance(i.cache_element, DataMemoryElement)
     nose.tools.assert_equal(i.index, set())
Beispiel #38
0
 def test_is_usable(self):
     # Should always be true since this impl does no have special deps.
     nose.tools.assert_true(LinearHashIndex.is_usable())
Beispiel #39
0
 def test_default_config(self):
     c = LinearHashIndex.get_default_config()
     nose.tools.assert_equal(len(c), 1)
     nose.tools.assert_is_none(c['cache_element']['type'])
Beispiel #40
0
    def _nn(self, d, n=1):
        """
        Internal method to be implemented by sub-classes to return the nearest
        `N` neighbors to the given descriptor element.

        When this internal method is called, we have already checked that there
        is a vector in ``d`` and our index is not empty.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple of
            the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        self._log.debug("generating hash for descriptor")
        d_v = d.vector()
        d_h = self.lsh_functor.get_hash(d_v)

        def comp_descr_dist(d2_v):
            return self._distance_function(d_v, d2_v)

        with self._model_lock:
            self._log.debug("getting near hashes")
            hi = self.hash_index
            if hi is None:
                # Make on-the-fly linear index
                hi = LinearHashIndex()
                # not calling ``build_index`` because we already have the int
                # hashes.
                hi.index = numpy.array(list(self.hash2uuids_kvstore.keys()))
            near_hashes, _ = hi.nn(d_h, n)

            self._log.debug("getting UUIDs of descriptors for nearby hashes")
            neighbor_uuids = []
            for h_int in map(bit_vector_to_int_large, near_hashes):
                # If descriptor hash not in our map, we effectively skip it.
                # Get set of descriptor UUIDs for a hash code.
                #: :type: set[collections.Hashable]
                near_uuids = self.hash2uuids_kvstore.get(h_int, set())
                # Accumulate matching descriptor UUIDs to a list.
                neighbor_uuids.extend(near_uuids)
            self._log.debug("-- matched %d UUIDs", len(neighbor_uuids))

            self._log.debug("getting descriptors for neighbor_uuids")
            neighbors = \
                list(self.descriptor_index.get_many_descriptors(neighbor_uuids))

        # Done with model parts at this point, so releasing lock.

        self._log.debug("ordering descriptors via distance method '%s'",
                        self.distance_method)
        self._log.debug('-- getting element vectors')
        neighbor_vectors = elements_to_matrix(neighbors,
                                              report_interval=1.0)
        self._log.debug('-- calculating distances')
        distances = list(map(comp_descr_dist, neighbor_vectors))
        self._log.debug('-- ordering')
        ordered = sorted(zip(neighbors, distances),
                         key=lambda p: p[1])
        self._log.debug('-- slicing top n=%d', n)
        return list(zip(*(ordered[:n])))
Beispiel #41
0
 def test_default_config(self):
     c = LinearHashIndex.get_default_config()
     self.assertEqual(len(c), 1)
     self.assertIsNone(c['cache_element']['type'])
Beispiel #42
0
 def test_build_index_no_input(self):
     i = LinearHashIndex()
     self.assertRaises(ValueError, i.build_index, [])
Beispiel #43
0
 def test_update_index_no_input(self):
     i = LinearHashIndex()
     self.assertRaises(ValueError, i.update_index, [])
Beispiel #44
0
 def test_is_usable(self):
     # Should always be true since this impl does no have special deps.
     self.assertTrue(LinearHashIndex.is_usable())
Beispiel #45
0
 def test_save_cache_readonly(self):
     ro_cache = DataMemoryElement(readonly=True)
     i = LinearHashIndex(ro_cache)
     nose.tools.assert_raises_regexp(
         ValueError, "is read-only", i.build_index,
         [[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
Beispiel #46
0
 def test_build_index_no_cache(self):
     i = LinearHashIndex()
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertIsNone(i.cache_element)
Beispiel #47
0
 def _make_hi_linear(self):
     return LinearHashIndex()
Beispiel #48
0
    def _nn(self, d, n=1):
        """
        Internal method to be implemented by sub-classes to return the nearest
        `N` neighbors to the given descriptor element.

        When this internal method is called, we have already checked that there
        is a vector in ``d`` and our index is not empty.

        :param d: Descriptor element to compute the neighbors of.
        :type d: smqtk.representation.DescriptorElement

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N DescriptorElement instances, and a tuple of
            the distance values to those neighbors.
        :rtype: (tuple[smqtk.representation.DescriptorElement], tuple[float])

        """
        self._log.debug("generating hash for descriptor")
        d_v = d.vector()
        d_h = self.lsh_functor.get_hash(d_v)

        def comp_descr_dist(d2_v):
            return self._distance_function(d_v, d2_v)

        with self._model_lock:
            self._log.debug("getting near hashes")
            hi = self.hash_index
            if hi is None:
                # Make on-the-fly linear index
                hi = LinearHashIndex()
                # not calling ``build_index`` because we already have the int
                # hashes.
                hi.index = set(self.hash2uuids_kvstore.keys())
            near_hashes, _ = hi.nn(d_h, n)

            self._log.debug("getting UUIDs of descriptors for nearby hashes")
            neighbor_uuids = []
            for h_int in map(bit_vector_to_int_large, near_hashes):
                # If descriptor hash not in our map, we effectively skip it.
                # Get set of descriptor UUIDs for a hash code.
                #: :type: set[collections.Hashable]
                near_uuids = self.hash2uuids_kvstore.get(h_int, set())
                # Accumulate matching descriptor UUIDs to a list.
                neighbor_uuids.extend(near_uuids)
            self._log.debug("-- matched %d UUIDs", len(neighbor_uuids))

            self._log.debug("getting descriptors for neighbor_uuids")
            neighbors = \
                list(self.descriptor_index.get_many_descriptors(neighbor_uuids))

        # Done with model parts at this point, so releasing lock.

        self._log.debug("ordering descriptors via distance method '%s'",
                        self.distance_method)
        self._log.debug('-- getting element vectors')
        neighbor_vectors = elements_to_matrix(neighbors,
                                              report_interval=1.0)
        self._log.debug('-- calculating distances')
        distances = list(map(comp_descr_dist, neighbor_vectors))
        self._log.debug('-- ordering')
        ordered = sorted(zip(neighbors, distances),
                         key=lambda p: p[1])
        self._log.debug('-- slicing top n=%d', n)
        return list(zip(*(ordered[:n])))