def test_configuration(self) -> None: c = DescriptorElementFactory.get_default_config() self.assertIsNone(c['type']) dme_key = 'smqtk_descriptors.impls.descriptor_element.memory.DescriptorMemoryElement' self.assertIn(dme_key, c) c['type'] = dme_key factory = DescriptorElementFactory.from_config(c) self.assertEqual(factory._d_type.__name__, DescriptorMemoryElement.__name__) self.assertEqual(factory._d_type_config, {}) d = factory.new_descriptor('foo') self.assertEqual(d.uuid(), 'foo')
def test_get_config(self) -> None: """ We should be able to get the configuration of the current factory. This should look like the same as the """ test_params = { 'p1': 'some dir', 'vec': 1 } dummy_key = f"{__name__}.{DummyElementImpl.__name__}" factory = DescriptorElementFactory(DummyElementImpl, test_params) factory_config = factory.get_config() assert factory_config == {"type": dummy_key, dummy_key: test_params}
def test_generate_elements_all_preexisting_overwrite(self) -> None: """ Test that descriptors are computed even though the generated elements (mocked) report as having a vector. """ # Mock data element input data_iter = [ mock.Mock(spec=DataElement), mock.Mock(spec=DataElement), mock.Mock(spec=DataElement), ] for d in data_iter: d.content_type.return_value = 'image/png' # Mock element type m_de_type = mock.MagicMock(name="DescrElemType") # Mock factory fact = DescriptorElementFactory(m_de_type, {}) # Mock element instance m_de_inst = m_de_type.from_config() # from factory # !!! Mock that elements all *have* a vector set m_de_inst.has_vector.return_value = True # Default factor is the in-memory descriptor element. list( self.inst.generate_elements(data_iter, descr_factory=fact, overwrite=True)) # expect no has-vec checks because its after overwrite short-circuit. assert m_de_inst.has_vector.call_count == 0 assert m_de_inst.set_vector.call_count == 3 # Complete iteration should cause post-yield method to be called. self.inst._post_iterator_check.assert_called_once()
def test_no_params(self) -> None: test_params: Dict[str, Any] = {} factory = DescriptorElementFactory(DummyElementImpl, test_params) expected_uuid = 'uuid' expected_args = () expected_kwds: Dict[str, Any] = {} # Should construct a new DEI instance under they hood somewhere r = factory.new_descriptor(expected_uuid) assert isinstance(r, DummyElementImpl) self.assertEqual(r._uuid, expected_uuid) self.assertEqual(r.args, expected_args) self.assertEqual(r.kwds, expected_kwds)
def test_with_params(self) -> None: v = numpy.random.randint(0, 10, 10) test_params = { 'p1': 'some dir', 'vec': v } factory = DescriptorElementFactory(DummyElementImpl, test_params) ex_uuid = 'uuid' ex_args = () ex_kwds = test_params # Should construct a new DEI instance under they hood somewhere r = factory.new_descriptor(ex_uuid) assert isinstance(r, DummyElementImpl) self.assertEqual(r._uuid, ex_uuid) self.assertEqual(r.args, ex_args) self.assertEqual(r.kwds, ex_kwds)
def test_call(self) -> None: # Same as `test_with_params` but using __call__ entry point v = numpy.random.randint(0, 10, 10) test_params = { 'p1': 'some dir', 'vec': v } factory = DescriptorElementFactory(DummyElementImpl, test_params) ex_type = 'type' ex_uuid = 'uuid' ex_args = () ex_kwds = test_params # Should construct a new DEI instance under they hood somewhere r = factory(ex_type, ex_uuid) assert isinstance(r, DummyElementImpl) self.assertEqual(r._type_label, ex_type) self.assertEqual(r._uuid, ex_uuid) self.assertEqual(r.args, ex_args) self.assertEqual(r.kwds, ex_kwds)
def test_generate_elements_non_preexisting(self) -> None: """ Test generating descriptor elements where none produced by the factory have existing vectors, i.e. all data elements are passed to underlying generation method. """ # Mock data element input data_iter = [ mock.Mock(spec=DataElement), mock.Mock(spec=DataElement), mock.Mock(spec=DataElement), ] for d in data_iter: d.content_type.return_value = 'image/png' # Mock element type m_de_type = mock.MagicMock(name="DescrElemType") # Mock factory fact = DescriptorElementFactory(m_de_type, {}) # Mock element instance m_de_inst = m_de_type.from_config() # from factory # !!! Mock that elements all have *no* vector set m_de_inst.has_vector.return_value = False # Default factory is the in-memory descriptor element. list( self.inst.generate_elements(data_iter, descr_factory=fact, overwrite=False)) assert m_de_inst.has_vector.call_count == 3 assert m_de_inst.set_vector.call_count == 3 # We know the dummy vectors that should have been iterated out m_de_inst.set_vector.assert_any_call([0]) m_de_inst.set_vector.assert_any_call([1]) m_de_inst.set_vector.assert_any_call([2]) # Complete iteration should cause post-yield method to be called. self.inst._post_iterator_check.assert_called_once()
import abc from collections import deque import logging from typing import Deque, Generator, Iterable, List, Optional, Tuple import numpy as np from smqtk_core import Configurable, Pluggable from smqtk_dataprovider import ContentTypeValidator, DataElement from smqtk_descriptors import DescriptorElement, DescriptorElementFactory from smqtk_descriptors.impls.descriptor_element.memory import DescriptorMemoryElement DFLT_DESCRIPTOR_FACTORY = DescriptorElementFactory(DescriptorMemoryElement, {}) LOG = logging.getLogger(__name__) class DescriptorGenerator(Configurable, Pluggable, ContentTypeValidator): """ Base abstract Feature Descriptor interface. """ @abc.abstractmethod def _generate_arrays( self, data_iter: Iterable[DataElement]) -> Iterable[np.ndarray]: """ Inner template method that defines the generation of descriptor vectors for a given iterable of data elements. Pre-conditions: - Data elements input to this method have been validated to be of at least one of this class's reported ``valid_content_types``. :param collections.abc.Iterable[smqtk.representation.DataElement] data_iter:
def test_no_save_model_pickle(self): # Test model preservation across pickling even without model cache # file paths set. classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '', # quite mode }, normalize=None, # DO NOT normalize descriptors ) self.assertTrue(classifier.svm_model is None) # Empty model should not trigger __LOCAL__ content in pickle self.assertNotIn('__LOCAL__', classifier.__getstate__()) _ = pickle.loads(pickle.dumps(classifier)) # train arbitrary model (same as ``test_simple_classification``) DIM = 2 N = 1000 POS_LABEL = 'positive' NEG_LABEL = 'negative' d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) def make_element(iv): i, v = iv d = d_factory.new_descriptor('test', i) d.set_vector(v) return d # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] p = multiprocessing.pool.ThreadPool() d_pos = p.map(make_element, enumerate(x_pos)) d_neg = p.map(make_element, enumerate(x_neg, start=N // 2)) p.close() p.join() # Training classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg}) # Test original classifier # - Using classification method implemented by the subclass directly # in order to test simplest scope possible. t_v = numpy.random.rand(DIM) c_expected = list(classifier._classify_arrays([t_v]))[0] # Should see __LOCAL__ content in pickle state now p_state = classifier.__getstate__() self.assertIn('__LOCAL__', p_state) self.assertIn('__LOCAL_LABELS__', p_state) self.assertIn('__LOCAL_MODEL__', p_state) self.assertTrue(len(p_state['__LOCAL_LABELS__']) > 0) self.assertTrue(len(p_state['__LOCAL_MODEL__']) > 0) # Restored classifier should classify the same test descriptor the # same. # - If this fails after a new parameter was added its probably because # the parameter was not restored during the __setstate__. #: :type: LibSvmClassifier classifier2 = pickle.loads(pickle.dumps(classifier)) c_post_pickle = list(classifier2._classify_arrays([t_v]))[0] # There may be floating point error, so extract actual confidence # values and check post round c_pp_positive = c_post_pickle[POS_LABEL] c_pp_negative = c_post_pickle[NEG_LABEL] c_e_positive = c_expected[POS_LABEL] c_e_negative = c_expected[NEG_LABEL] self.assertAlmostEqual(c_e_positive, c_pp_positive, 5) self.assertAlmostEqual(c_e_negative, c_pp_negative, 5)
def test_simple_multiclass_classification(self): """ simple LibSvmClassifier test - 3-class Test libSVM classification functionality using random constructed data, training the y=0.33 and y=.66 split """ DIM = 2 N = 1000 P1_LABEL = 'p1' P2_LABEL = 'p2' P3_LABEL = 'p3' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) di = 0 def make_element(iv): _i, _v = iv elem = d_factory.new_descriptor('test', _i) elem.set_vector(_v) return elem # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_p3 = x[x[:, 1] >= 0.69] d_p1 = p.map(make_element, enumerate(x_p1, di)) di += len(d_p1) d_p2 = p.map(make_element, enumerate(x_p2, di)) di += len(d_p2) d_p3 = p.map(make_element, enumerate(x_p3, di)) di += len(d_p3) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '' # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({P1_LABEL: d_p1, P2_LABEL: d_p2, P3_LABEL: d_p3}) # Test classifier x = numpy.random.rand(N, DIM) x_p1 = x[x[:, 1] <= 0.30] x_p2 = x[(x[:, 1] >= 0.36) & (x[:, 1] <= 0.63)] x_p3 = x[x[:, 1] >= 0.69] # Test that examples expected to classify to certain classes are. c_map_p1 = list(classifier._classify_arrays(x_p1)) for v, c_map in zip(x_p1, c_map_p1): assert c_map[P1_LABEL] > max(c_map[P2_LABEL], c_map[P3_LABEL]), \ "Incorrect {} label: {} :: {}".format(P1_LABEL, v, c_map) c_map_p2 = list(classifier._classify_arrays(x_p2)) for v, c_map in zip(x_p2, c_map_p2): assert c_map[P2_LABEL] > max(c_map[P1_LABEL], c_map[P3_LABEL]), \ "Incorrect {} label: {} :: {}".format(P2_LABEL, v, c_map) c_map_p3 = list(classifier._classify_arrays(x_p3)) for v, c_map in zip(x_p3, c_map_p3): assert c_map[P3_LABEL] > max(c_map[P1_LABEL], c_map[P2_LABEL]), \ "Incorrect {} label: {} :: {}".format(P3_LABEL, v, c_map) # Closing resources p.close() p.join()
def test_simple_classification(self): """ simple LibSvmClassifier test - 2-class Test libSVM classification functionality using random constructed data, training the y=0.5 split """ DIM = 2 N = 1000 POS_LABEL = 'positive' NEG_LABEL = 'negative' p = multiprocessing.pool.ThreadPool() d_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) def make_element(iv): _i, _v = iv elem = d_factory.new_descriptor('test', _i) elem.set_vector(_v) return elem # Constructing artificial descriptors x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] d_pos = p.map(make_element, enumerate(x_pos)) d_neg = p.map(make_element, enumerate(x_neg, start=N // 2)) # Create/Train test classifier classifier = LibSvmClassifier( train_params={ '-t': 0, # linear kernel '-b': 1, # enable probability estimates '-c': 2, # SVM-C parameter C '-q': '', # quite mode }, normalize=None, # DO NOT normalize descriptors ) classifier.train({POS_LABEL: d_pos, NEG_LABEL: d_neg}) # Test classifier x = numpy.random.rand(N, DIM) x_pos = x[x[:, 1] <= 0.45] x_neg = x[x[:, 1] >= 0.55] # Test that examples expected to classify to the positive class are, # and same for those expected to be in the negative class. c_map_pos = list(classifier._classify_arrays(x_pos)) for v, c_map in zip(x_pos, c_map_pos): assert c_map[POS_LABEL] > c_map[NEG_LABEL], \ "Found False positive: {} :: {}" \ .format(v, c_map) c_map_neg = list(classifier._classify_arrays(x_neg)) for v, c_map in zip(x_neg, c_map_neg): assert c_map[NEG_LABEL] > c_map[POS_LABEL], \ "Found False negative: {} :: {}" \ .format(v, c_map) # Closing resources p.close() p.join()