Esempio n. 1
0
    def test_configuration(self) -> None:
        c = DescriptorElementFactory.get_default_config()
        self.assertIsNone(c['type'])
        dme_key = 'smqtk_descriptors.impls.descriptor_element.memory.DescriptorMemoryElement'
        self.assertIn(dme_key, c)

        c['type'] = dme_key
        factory = DescriptorElementFactory.from_config(c)
        self.assertEqual(factory._d_type.__name__,
                         DescriptorMemoryElement.__name__)
        self.assertEqual(factory._d_type_config, {})

        d = factory.new_descriptor('foo')
        self.assertEqual(d.uuid(), 'foo')
 def test_get_config(self) -> None:
     """
     We should be able to get the configuration of the current factory.
     This should look like the same as the
     """
     test_params = {
         'p1': 'some dir',
         'vec': 1
     }
     dummy_key = f"{__name__}.{DummyElementImpl.__name__}"
     factory = DescriptorElementFactory(DummyElementImpl, test_params)
     factory_config = factory.get_config()
     assert factory_config == {"type": dummy_key,
                               dummy_key: test_params}
Esempio n. 3
0
    def test_generate_elements_all_preexisting_overwrite(self) -> None:
        """ Test that descriptors are computed even though the generated
        elements (mocked) report as having a vector.
        """
        # Mock data element input
        data_iter = [
            mock.Mock(spec=DataElement),
            mock.Mock(spec=DataElement),
            mock.Mock(spec=DataElement),
        ]
        for d in data_iter:
            d.content_type.return_value = 'image/png'

        # Mock element type
        m_de_type = mock.MagicMock(name="DescrElemType")

        # Mock factory
        fact = DescriptorElementFactory(m_de_type, {})

        # Mock element instance
        m_de_inst = m_de_type.from_config()  # from factory
        # !!! Mock that elements all *have* a vector set
        m_de_inst.has_vector.return_value = True

        # Default factor is the in-memory descriptor element.
        list(
            self.inst.generate_elements(data_iter,
                                        descr_factory=fact,
                                        overwrite=True))
        # expect no has-vec checks because its after overwrite short-circuit.
        assert m_de_inst.has_vector.call_count == 0
        assert m_de_inst.set_vector.call_count == 3

        # Complete iteration should cause post-yield method to be called.
        self.inst._post_iterator_check.assert_called_once()
Esempio n. 4
0
    def test_no_params(self) -> None:
        test_params: Dict[str, Any] = {}

        factory = DescriptorElementFactory(DummyElementImpl, test_params)

        expected_uuid = 'uuid'
        expected_args = ()
        expected_kwds: Dict[str, Any] = {}

        # Should construct a new DEI instance under they hood somewhere
        r = factory.new_descriptor(expected_uuid)

        assert isinstance(r, DummyElementImpl)
        self.assertEqual(r._uuid, expected_uuid)
        self.assertEqual(r.args, expected_args)
        self.assertEqual(r.kwds, expected_kwds)
Esempio n. 5
0
    def test_with_params(self) -> None:
        v = numpy.random.randint(0, 10, 10)
        test_params = {
            'p1': 'some dir',
            'vec': v
        }

        factory = DescriptorElementFactory(DummyElementImpl, test_params)

        ex_uuid = 'uuid'
        ex_args = ()
        ex_kwds = test_params
        # Should construct a new DEI instance under they hood somewhere
        r = factory.new_descriptor(ex_uuid)

        assert isinstance(r, DummyElementImpl)
        self.assertEqual(r._uuid, ex_uuid)
        self.assertEqual(r.args, ex_args)
        self.assertEqual(r.kwds, ex_kwds)
    def test_call(self) -> None:
        # Same as `test_with_params` but using __call__ entry point
        v = numpy.random.randint(0, 10, 10)
        test_params = {
            'p1': 'some dir',
            'vec': v
        }

        factory = DescriptorElementFactory(DummyElementImpl, test_params)

        ex_type = 'type'
        ex_uuid = 'uuid'
        ex_args = ()
        ex_kwds = test_params
        # Should construct a new DEI instance under they hood somewhere
        r = factory(ex_type, ex_uuid)

        assert isinstance(r, DummyElementImpl)
        self.assertEqual(r._type_label, ex_type)
        self.assertEqual(r._uuid, ex_uuid)
        self.assertEqual(r.args, ex_args)
        self.assertEqual(r.kwds, ex_kwds)
Esempio n. 7
0
    def test_generate_elements_non_preexisting(self) -> None:
        """ Test generating descriptor elements where none produced by the
        factory have existing vectors, i.e. all data elements are passed to
        underlying generation method. """
        # Mock data element input
        data_iter = [
            mock.Mock(spec=DataElement),
            mock.Mock(spec=DataElement),
            mock.Mock(spec=DataElement),
        ]
        for d in data_iter:
            d.content_type.return_value = 'image/png'

        # Mock element type
        m_de_type = mock.MagicMock(name="DescrElemType")

        # Mock factory
        fact = DescriptorElementFactory(m_de_type, {})

        # Mock element instance
        m_de_inst = m_de_type.from_config()  # from factory
        # !!! Mock that elements all have *no* vector set
        m_de_inst.has_vector.return_value = False

        # Default factory is the in-memory descriptor element.
        list(
            self.inst.generate_elements(data_iter,
                                        descr_factory=fact,
                                        overwrite=False))
        assert m_de_inst.has_vector.call_count == 3
        assert m_de_inst.set_vector.call_count == 3
        # We know the dummy vectors that should have been iterated out
        m_de_inst.set_vector.assert_any_call([0])
        m_de_inst.set_vector.assert_any_call([1])
        m_de_inst.set_vector.assert_any_call([2])

        # Complete iteration should cause post-yield method to be called.
        self.inst._post_iterator_check.assert_called_once()
Esempio n. 8
0
import abc
from collections import deque
import logging
from typing import Deque, Generator, Iterable, List, Optional, Tuple
import numpy as np

from smqtk_core import Configurable, Pluggable
from smqtk_dataprovider import ContentTypeValidator, DataElement
from smqtk_descriptors import DescriptorElement, DescriptorElementFactory
from smqtk_descriptors.impls.descriptor_element.memory import DescriptorMemoryElement

DFLT_DESCRIPTOR_FACTORY = DescriptorElementFactory(DescriptorMemoryElement, {})
LOG = logging.getLogger(__name__)


class DescriptorGenerator(Configurable, Pluggable, ContentTypeValidator):
    """
    Base abstract Feature Descriptor interface.
    """
    @abc.abstractmethod
    def _generate_arrays(
            self, data_iter: Iterable[DataElement]) -> Iterable[np.ndarray]:
        """
        Inner template method that defines the generation of descriptor vectors
        for a given iterable of data elements.

        Pre-conditions:
          - Data elements input to this method have been validated to be of at
            least one of this class's reported ``valid_content_types``.

        :param collections.abc.Iterable[smqtk.representation.DataElement] data_iter:
Esempio n. 9
0
    def test_no_save_model_pickle(self):
        # Test model preservation across pickling even without model cache
        # file paths set.
        classifier = LibSvmClassifier(
            train_params={
                '-t': 0,  # linear kernel
                '-b': 1,  # enable probability estimates
                '-c': 2,  # SVM-C parameter C
                '-q': '',  # quite mode
            },
            normalize=None,  # DO NOT normalize descriptors
        )
        self.assertTrue(classifier.svm_model is None)
        # Empty model should not trigger __LOCAL__ content in pickle
        self.assertNotIn('__LOCAL__', classifier.__getstate__())
        _ = pickle.loads(pickle.dumps(classifier))

        # train arbitrary model (same as ``test_simple_classification``)
        DIM = 2
        N = 1000
        POS_LABEL = 'positive'
        NEG_LABEL = 'negative'
        d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})

        def make_element(iv):
            i, v = iv
            d = d_factory.new_descriptor('test', i)
            d.set_vector(v)
            return d

        # Constructing artificial descriptors
        x = numpy.random.rand(N, DIM)
        x_pos = x[x[:, 1] <= 0.45]
        x_neg = x[x[:, 1] >= 0.55]
        p = multiprocessing.pool.ThreadPool()
        d_pos = p.map(make_element, enumerate(x_pos))
        d_neg = p.map(make_element, enumerate(x_neg, start=N // 2))
        p.close()
        p.join()

        # Training
        classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

        # Test original classifier
        # - Using classification method implemented by the subclass directly
        #   in order to test simplest scope possible.
        t_v = numpy.random.rand(DIM)
        c_expected = list(classifier._classify_arrays([t_v]))[0]

        # Should see __LOCAL__ content in pickle state now
        p_state = classifier.__getstate__()
        self.assertIn('__LOCAL__', p_state)
        self.assertIn('__LOCAL_LABELS__', p_state)
        self.assertIn('__LOCAL_MODEL__', p_state)
        self.assertTrue(len(p_state['__LOCAL_LABELS__']) > 0)
        self.assertTrue(len(p_state['__LOCAL_MODEL__']) > 0)

        # Restored classifier should classify the same test descriptor the
        # same.
        # - If this fails after a new parameter was added its probably because
        #   the parameter was not restored during the __setstate__.
        #: :type: LibSvmClassifier
        classifier2 = pickle.loads(pickle.dumps(classifier))
        c_post_pickle = list(classifier2._classify_arrays([t_v]))[0]
        # There may be floating point error, so extract actual confidence
        # values and check post round
        c_pp_positive = c_post_pickle[POS_LABEL]
        c_pp_negative = c_post_pickle[NEG_LABEL]
        c_e_positive = c_expected[POS_LABEL]
        c_e_negative = c_expected[NEG_LABEL]
        self.assertAlmostEqual(c_e_positive, c_pp_positive, 5)
        self.assertAlmostEqual(c_e_negative, c_pp_negative, 5)
Esempio n. 10
0
    def test_simple_multiclass_classification(self):
        """
        simple LibSvmClassifier test - 3-class

        Test libSVM classification functionality using random constructed
        data, training the y=0.33 and y=.66 split
        """
        DIM = 2
        N = 1000
        P1_LABEL = 'p1'
        P2_LABEL = 'p2'
        P3_LABEL = 'p3'
        p = multiprocessing.pool.ThreadPool()
        d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
        di = 0

        def make_element(iv):
            _i, _v = iv
            elem = d_factory.new_descriptor('test', _i)
            elem.set_vector(_v)
            return elem

        # Constructing artificial descriptors
        x = numpy.random.rand(N, DIM)
        x_p1 = x[x[:, 1] <= 0.30]
        x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
        x_p3 = x[x[:, 1] >= 0.69]

        d_p1 = p.map(make_element, enumerate(x_p1, di))
        di += len(d_p1)
        d_p2 = p.map(make_element, enumerate(x_p2, di))
        di += len(d_p2)
        d_p3 = p.map(make_element, enumerate(x_p3, di))
        di += len(d_p3)

        # Create/Train test classifier
        classifier = LibSvmClassifier(
            train_params={
                '-t': 0,  # linear kernel
                '-b': 1,  # enable probability estimates
                '-c': 2,  # SVM-C parameter C
                '-q': ''  # quite mode
            },
            normalize=None,  # DO NOT normalize descriptors
        )
        classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3})

        # Test classifier
        x = numpy.random.rand(N, DIM)
        x_p1 = x[x[:, 1] <= 0.30]
        x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)]
        x_p3 = x[x[:, 1] >= 0.69]

        # Test that examples expected to classify to certain classes are.
        c_map_p1 = list(classifier._classify_arrays(x_p1))
        for v, c_map in zip(x_p1, c_map_p1):
            assert c_map[P1_LABEL] > max(c_map[P2_LABEL], c_map[P3_LABEL]), \
                "Incorrect {} label: {} :: {}".format(P1_LABEL, v, c_map)

        c_map_p2 = list(classifier._classify_arrays(x_p2))
        for v, c_map in zip(x_p2, c_map_p2):
            assert c_map[P2_LABEL] > max(c_map[P1_LABEL], c_map[P3_LABEL]), \
                "Incorrect {} label: {} :: {}".format(P2_LABEL, v, c_map)

        c_map_p3 = list(classifier._classify_arrays(x_p3))
        for v, c_map in zip(x_p3, c_map_p3):
            assert c_map[P3_LABEL] > max(c_map[P1_LABEL], c_map[P2_LABEL]), \
                "Incorrect {} label: {} :: {}".format(P3_LABEL, v, c_map)

        # Closing resources
        p.close()
        p.join()
Esempio n. 11
0
    def test_simple_classification(self):
        """
        simple LibSvmClassifier test - 2-class

        Test libSVM classification functionality using random constructed
        data, training the y=0.5 split
        """
        DIM = 2
        N = 1000
        POS_LABEL = 'positive'
        NEG_LABEL = 'negative'
        p = multiprocessing.pool.ThreadPool()
        d_factory = DescriptorElementFactory(DescriptorMemoryElement, {})

        def make_element(iv):
            _i, _v = iv
            elem = d_factory.new_descriptor('test', _i)
            elem.set_vector(_v)
            return elem

        # Constructing artificial descriptors
        x = numpy.random.rand(N, DIM)
        x_pos = x[x[:, 1] <= 0.45]
        x_neg = x[x[:, 1] >= 0.55]

        d_pos = p.map(make_element, enumerate(x_pos))
        d_neg = p.map(make_element, enumerate(x_neg, start=N // 2))

        # Create/Train test classifier
        classifier = LibSvmClassifier(
            train_params={
                '-t': 0,  # linear kernel
                '-b': 1,  # enable probability estimates
                '-c': 2,  # SVM-C parameter C
                '-q': '',  # quite mode
            },
            normalize=None,  # DO NOT normalize descriptors
        )
        classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg})

        # Test classifier
        x = numpy.random.rand(N, DIM)
        x_pos = x[x[:, 1] <= 0.45]
        x_neg = x[x[:, 1] >= 0.55]

        # Test that examples expected to classify to the positive class are,
        # and same for those expected to be in the negative class.
        c_map_pos = list(classifier._classify_arrays(x_pos))
        for v, c_map in zip(x_pos, c_map_pos):
            assert c_map[POS_LABEL] > c_map[NEG_LABEL], \
                "Found False positive: {} :: {}" \
                .format(v, c_map)

        c_map_neg = list(classifier._classify_arrays(x_neg))
        for v, c_map in zip(x_neg, c_map_neg):
            assert c_map[NEG_LABEL] > c_map[POS_LABEL], \
                "Found False negative: {} :: {}" \
                .format(v, c_map)

        # Closing resources
        p.close()
        p.join()