Esempio n. 1
0
    def _make_ftor_itq(self, bits=32):
        itq_ftor = ItqFunctor(bit_length=bits, random_seed=self.RANDOM_SEED)

        def itq_fit(D):
            itq_ftor.fit(D)

        return itq_ftor, itq_fit
Esempio n. 2
0
    def nearestNeighborIndex(item, user, descriptorSet):
        """
        Get the nearest neighbor index from a given item and descriptor set.

        :param item: Item to find the nn index from, usually the item that the
            user is performing the nearest neighbors search on.
        :param user: The owner of the .smqtk folder.
        :param descriptorSet: The relevant descriptor set.
        """
        folder = ModelImporter.model('folder')

        _GirderDataElement = functools.partial(GirderDataElement,
                                               api_root=getApiUrl(),
                                               token=getCurrentToken()['_id'])

        smqtkFolder = folder.createFolder(folder.load(item['folderId'], user=user), '.smqtk',
                                          reuseExisting=True)

        try:
            meanVecFileId = localSmqtkFileIdFromName(smqtkFolder, 'mean_vec.npy')
            rotationFileId = localSmqtkFileIdFromName(smqtkFolder, 'rotation.npy')
            hash2uuidsFileId = localSmqtkFileIdFromName(smqtkFolder, 'hash2uuids.pickle')
        except Exception:
            logger.warn('SMQTK files didn\'t exist for performing NN on %s' % item['_id'])
            return None

        # TODO Should these be Girder data elements? Unnecessary HTTP requests.
        functor = ItqFunctor(mean_vec_cache=_GirderDataElement(meanVecFileId),
                             rotation_cache=_GirderDataElement(rotationFileId))

        hash2uuidsKV = MemoryKeyValueStore(_GirderDataElement(hash2uuidsFileId))

        return LSHNearestNeighborIndex(functor, descriptorSet,
                                       hash2uuidsKV, read_only=True)
Esempio n. 3
0
    def test_save_model_with_writable_caches(self):
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        expected_mean_vec_bytes = six.BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_mean_vec_bytes, expected_mean_vec)
        expected_mean_vec_bytes = expected_mean_vec_bytes.getvalue()

        expected_rotation_bytes = six.BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_rotation_bytes, expected_rotation)
        expected_rotation_bytes = expected_rotation_bytes.getvalue()

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)

        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(),
                         expected_mean_vec_bytes)
        self.assertEqual(itq.rotation_cache_elem.get_bytes(),
                         expected_rotation_bytes)
Esempio n. 4
0
    def test_fit_short_descriptors_for_bit_length(self):
        # Should error when input descriptors have fewer dimensions than set bit
        # length for output hash codes (limitation of PCA method currently
        # used).
        fit_descriptors = []
        for i in range(3):
            d = DescriptorMemoryElement(six.b('test'), i)
            d.set_vector([-1 + i, -1 + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(bit_length=8)
        self.assertRaisesRegex(
            ValueError,
            "Input descriptors have fewer features than requested bit encoding",
            itq.fit, fit_descriptors)
        self.assertIsNone(itq.mean_vec)
        self.assertIsNone(itq.rotation)

        # Should behave the same when input is an iterable
        self.assertRaisesRegex(
            ValueError,
            "Input descriptors have fewer features than requested bit encoding",
            itq.fit, iter(fit_descriptors))
        self.assertIsNone(itq.mean_vec)
        self.assertIsNone(itq.rotation)
Esempio n. 5
0
    def test_save_model_with_read_only_cache(self):
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation

        # read-only mean-vec cache
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=True)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b(''))
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))

        # read-only rotation cache
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=True)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b(''))
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))

        # Both read-only
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=True)
        itq.rotation_cache_elem = DataMemoryElement(readonly=True)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b(''))
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))
Esempio n. 6
0
    def test_get_hash(self):
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement(six.b('test'), i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        # The following "rotation" matrix should cause any 2-feature descriptor
        # to the right of the line ``y = -x`` to be True, and to the left as
        # False. If on the line, should be True.
        itq = ItqFunctor(bit_length=1, random_seed=0)
        itq.mean_vec = numpy.array([0., 0.])
        itq.rotation = numpy.array([[1. / sqrt(2)], [1. / sqrt(2)]])

        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([1, 1])),
                                         [True])
        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([-1, -1])),
                                         [False])

        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([-1, 1])),
                                         [True])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([-1.001, 1])), [False])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([-1, 1.001])), [True])

        numpy.testing.assert_array_equal(itq.get_hash(numpy.array([1, -1])),
                                         [True])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([1, -1.001])), [False])
        numpy.testing.assert_array_equal(
            itq.get_hash(numpy.array([1.001, -1])), [True])
Esempio n. 7
0
    def test_fit_with_cache(self):
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement(six.b('test'), i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(DataMemoryElement(),
                         DataMemoryElement(),
                         bit_length=1,
                         random_seed=0)
        itq.fit(fit_descriptors)

        # TODO: Explanation as to why this is the expected result.
        numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0])
        numpy.testing.assert_array_almost_equal(itq.rotation,
                                                [[1 / sqrt(2)], [1 / sqrt(2)]])
        self.assertIsNotNone(itq.mean_vec_cache_elem)
        numpy.testing.assert_array_almost_equal(
            numpy.load(six.BytesIO(itq.mean_vec_cache_elem.get_bytes())),
            [0, 0])

        self.assertIsNotNone(itq.rotation_cache_elem)
        numpy.testing.assert_array_almost_equal(
            numpy.load(six.BytesIO(itq.rotation_cache_elem.get_bytes())),
            [[1 / sqrt(2)], [1 / sqrt(2)]])
Esempio n. 8
0
    def _nearestNeighborIndex(sid, descriptor_set):
        """
        Retrieve the Nearest neighbor index for a given session.

        :param sid: ID of the session
        :param descriptor_set: The descriptor set corresponding to the session id,
        see _descriptorSetFromSessionId.
        :returns: Nearest neighbor index or None if no session exists
        :rtype: LSHNearestNeighborIndex|None
        """
        session = ModelImporter.model('item').findOne({'_id': ObjectId(sid)})

        if not session:
            return None
        else:
            smqtkFolder = {'_id': ObjectId(session['meta']['smqtk_folder_id'])}

            functor = ItqFunctor(
                smqtkDataElementFromGirderFileId(
                    localSmqtkFileIdFromName(smqtkFolder, 'mean_vec.npy')),
                smqtkDataElementFromGirderFileId(
                    localSmqtkFileIdFromName(smqtkFolder, 'rotation.npy')))
            hash2uuidsKV = MemoryKeyValueStore(
                smqtkDataElementFromGirderFileId(
                    localSmqtkFileIdFromName(smqtkFolder,
                                             'hash2uuids.pickle')))

            return LSHNearestNeighborIndex(functor,
                                           descriptor_set,
                                           hash2uuidsKV,
                                           read_only=True)
Esempio n. 9
0
    def test_norm_vector_n2(self):
        itq = ItqFunctor(normalize=2)

        v = numpy.array([1, 0])
        numpy.testing.assert_array_almost_equal(itq._norm_vector(v), [1, 0])

        v = numpy.array([1, 1])
        numpy.testing.assert_array_almost_equal(itq._norm_vector(v),
                                                [1. / sqrt(2), 1. / sqrt(2)])
Esempio n. 10
0
 def test_fit_has_model(self):
     # When trying to run fit where there is already a mean vector and
     # rotation set.
     itq = ItqFunctor()
     itq.mean_vec = 'sim vec'
     itq.rotation = 'sim rot'
     self.assertRaisesRegex(RuntimeError,
                            "Model components have already been loaded.",
                            itq.fit, [])
Esempio n. 11
0
 def test_get_config_no_cache(self):
     itq = ItqFunctor(bit_length=1, itq_iterations=2, normalize=3,
                      random_seed=4)
     c = itq.get_config()
     NT.assert_equal(c['bit_length'], 1)
     NT.assert_equal(c['itq_iterations'], 2)
     NT.assert_equal(c['normalize'], 3)
     NT.assert_equal(c['random_seed'], 4)
     NT.assert_is_none(c['mean_vec_cache']['type'])
     NT.assert_is_none(c['rotation_cache']['type'])
Esempio n. 12
0
    def test_norm_vector_no_normalization(self):
        itq = ItqFunctor(normalize=None)

        v = numpy.array([0, 1])
        numpy.testing.assert_array_equal(itq._norm_vector(v), v)

        v = numpy.array([[0, 1, 1, .4, .1]])
        numpy.testing.assert_array_equal(itq._norm_vector(v), v)

        v = numpy.array([0] * 128)
        numpy.testing.assert_array_equal(itq._norm_vector(v), v)
Esempio n. 13
0
    def test_save_model_no_caches(self):
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        # Cache variables should remain None after save.
        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation
        itq.save_model()
        self.assertIsNone(itq.mean_vec_cache_elem)
        self.assertIsNone(itq.mean_vec_cache_elem)
Esempio n. 14
0
 def test_configuration(self):
     i = LSHNearestNeighborIndex(lsh_functor=ItqFunctor(),
                                 descriptor_set=MemoryDescriptorSet(),
                                 hash2uuids_kvstore=MemoryKeyValueStore(),
                                 hash_index=LinearHashIndex(),
                                 distance_method='euclidean',
                                 read_only=True)
     for inst in configuration_test_helper(
             i):  # type: LSHNearestNeighborIndex
         assert isinstance(inst.lsh_functor, LshFunctor)
         assert isinstance(inst.descriptor_set, MemoryDescriptorSet)
         assert isinstance(inst.hash_index, LinearHashIndex)
         assert isinstance(inst.hash2uuids_kvstore, MemoryKeyValueStore)
         assert inst.distance_method == 'euclidean'
         assert inst.read_only is True
Esempio n. 15
0
 def test_has_model(self):
     itq = ItqFunctor()
     # with no vector/rotation set, should return false.
     self.assertFalse(itq.has_model())
     # If only one of the two is None, then false should be returned.
     itq.mean_vec = 'mean vec'
     itq.rotation = None
     self.assertFalse(itq.has_model())
     itq.mean_vec = None
     itq.rotation = 'rotation'
     self.assertFalse(itq.has_model())
     # If both are not None, return true.
     itq.mean_vec = 'mean vec'
     itq.rotation = 'rotation'
     self.assertTrue(itq.has_model())
Esempio n. 16
0
    def test_get_config_with_cache_elements(self):
        itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7,
                         random_seed=8)
        itq.mean_vec_cache_elem = DataMemoryElement('cached vec bytes')
        itq.rotation_cache_elem = DataMemoryElement('cached rot bytes')

        c = itq.get_config()
        NT.assert_equal(c['bit_length'], 5)
        NT.assert_equal(c['itq_iterations'], 6)
        NT.assert_equal(c['normalize'], 7)
        NT.assert_equal(c['random_seed'], 8)
        NT.assert_equal(c['mean_vec_cache']['type'], "DataMemoryElement")
        NT.assert_equal(c['mean_vec_cache']['DataMemoryElement']['bytes'],
                        'cached vec bytes')
        NT.assert_equal(c['rotation_cache']['DataMemoryElement']['bytes'],
                        'cached rot bytes')
Esempio n. 17
0
    def test_fit(self):
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement('test', i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(bit_length=1, random_seed=0)
        itq.fit(fit_descriptors)

        # TODO: Explanation as to why this is the expected result.
        numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0])
        numpy.testing.assert_array_almost_equal(itq.rotation,
                                                [[1 / sqrt(2)], [1 / sqrt(2)]])
        NT.assert_is_none(itq.mean_vec_cache_elem)
        NT.assert_is_none(itq.rotation_cache_elem)
Esempio n. 18
0
    def test_get_config_with_cache_elements(self):
        itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7,
                         random_seed=8)
        itq.mean_vec_cache_elem = DataMemoryElement(b'cached vec bytes')
        itq.rotation_cache_elem = DataMemoryElement(b'cached rot bytes')

        c = itq.get_config()
        self.assertEqual(c['bit_length'], 5)
        self.assertEqual(c['itq_iterations'], 6)
        self.assertEqual(c['normalize'], 7)
        self.assertEqual(c['random_seed'], 8)
        self.assertEqual(c['mean_vec_cache']['type'], "DataMemoryElement")
        # Check using string encodings of set bytes (JSON compliant).
        self.assertEqual(c['mean_vec_cache']['DataMemoryElement']['bytes'],
                         'cached vec bytes')
        self.assertEqual(c['rotation_cache']['DataMemoryElement']['bytes'],
                         'cached rot bytes')
Esempio n. 19
0
def compute_hash_codes(task, folderId, **kwargs):
    """
    Celery task for computing hash codes on a given folder (descriptor index).

    :param task: Celery provided task object.
    :param folderId: The folder to train ITQ for, note this is only used to
        infer the descriptor index.
    """
    task.job_manager.updateProgress(message='Computing Hash Codes',
                                    forceFlush=True)

    index = descriptorIndexFromFolderId(task.girder_client, folderId)

    smqtkFolder = getCreateFolder(task.girder_client, folderId, '.smqtk')

    meanVecFileId = smqtkFileIdFromName(task.girder_client, smqtkFolder,
                                        'mean_vec.npy')
    rotationFileId = smqtkFileIdFromName(task.girder_client, smqtkFolder,
                                         'rotation.npy')
    hash2uuidsFile = initializeItemWithFile(
        task.girder_client,
        createOverwriteItem(task.girder_client, smqtkFolder['_id'],
                            'hash2uuids.pickle'))

    functor = ItqFunctor(
        mean_vec_cache=GirderDataElement(
            meanVecFileId,
            api_root=task.request.apiUrl,
            token=task.request.jobInfoSpec['headers']['Girder-Token']),
        rotation_cache=GirderDataElement(
            rotationFileId,
            api_root=task.request.apiUrl,
            token=task.request.jobInfoSpec['headers']['Girder-Token']))

    hash2uuids = compute_functions.compute_hash_codes(index.iterkeys(),
                                                      index,
                                                      functor,
                                                      use_mp=False)

    data = pickle.dumps(dict((y, x) for (x, y) in hash2uuids))
    task.girder_client.uploadFileContents(hash2uuidsFile['_id'],
                                          six.BytesIO(data), len(data))
Esempio n. 20
0
def itq(task, folderId, **kwargs):
    """
    Celery task for training ITQ on a given folder.

    This trains ITQ on all descriptors within the index. Since this
    is typically called after computing descriptors, it will often
    only contain what's in the folder.

    :param task: Celery provided task object.
    :param folderId: The folder to train ITQ for, note this is only used to
        infer the descriptor index.
    """
    task.job_manager.updateProgress(message='Training ITQ', forceFlush=True)
    index = descriptorIndexFromFolderId(task.girder_client, folderId)

    if not index.count():
        # TODO SMQTK should account for this?
        raise Exception('Descriptor index is empty, cannot train ITQ.')

    smqtkFolder = getCreateFolder(task.girder_client, folderId, '.smqtk')
    meanVecFile = initializeItemWithFile(
        task.girder_client,
        createOverwriteItem(task.girder_client, smqtkFolder['_id'],
                            'mean_vec.npy'))
    rotationFile = initializeItemWithFile(
        task.girder_client,
        createOverwriteItem(task.girder_client, smqtkFolder['_id'],
                            'rotation.npy'))

    functor = ItqFunctor(
        mean_vec_cache=GirderDataElement(
            meanVecFile['_id'],
            api_root=task.request.apiUrl,
            token=task.request.jobInfoSpec['headers']['Girder-Token']),
        rotation_cache=GirderDataElement(
            rotationFile['_id'],
            api_root=task.request.apiUrl,
            token=task.request.jobInfoSpec['headers']['Girder-Token']))

    functor.fit(index.iterdescriptors(), use_multiprocessing=False)
Esempio n. 21
0
 def test_build_index_read_only(self):
     index = LSHNearestNeighborIndex(ItqFunctor(),
                                     MemoryDescriptorIndex(),
                                     MemoryKeyValueStore(),
                                     read_only=True)
     ntools.assert_raises(ReadOnlyError, index.build_index, [])