Ejemplo n.º 1
0
    def __setstate__(self, state):
        self.__dict__.update(state)

        self.svm_model_elem = \
            self.svm_model_uri and from_uri(self.svm_model_uri)
        self.svm_label_map_elem = \
            self.svm_label_map_uri and from_uri(self.svm_label_map_uri)

        # C libraries/pointers don't survive across processes.
        if '__LOCAL__' in state:
            # These would have gotten copied into dict during the updated.
            # The instance doesn't need to keep them around after this.
            del self.__dict__['__LOCAL__']
            del self.__dict__['__LOCAL_LABELS__']
            del self.__dict__['__LOCAL_MODEL__']

            fd, fp = tempfile.mkstemp()
            try:
                os.close(fd)

                self.svm_label_map = state['__LOCAL_LABELS__']

                # write model to file, then load via libSVM
                with open(fp, 'wb') as model_f:
                    model_f.write(state['__LOCAL_MODEL__'])

                fp_bytes = fp.encode('utf8')
                self.svm_model = svmutil.svm_load_model(fp_bytes)

            finally:
                os.remove(fp)
        else:
            self.svm_model = None
            self._reload_model()
Ejemplo n.º 2
0
    def __setstate__(self, state):
        self.svm_model_uri = state['svm_model_uri']
        self.svm_label_map_uri = state['svm_model_uri']
        self.train_params = state['train_params']
        self.normalize = state['normalize']

        self.svm_model_elem = \
            self.svm_model_uri and from_uri(self.svm_model_uri)
        self.svm_label_map_elem = \
            self.svm_label_map_uri and from_uri(self.svm_label_map_uri)

        # C libraries/pointers don't survive across processes.
        if '__LOCAL__' in state:
            fd, fp = tempfile.mkstemp()
            try:
                os.close(fd)

                self.svm_label_map = state['__LOCAL_LABELS__']

                # write model to file, then load via libSVM
                with open(fp, 'wb') as model_f:
                    model_f.write(state['__LOCAL_MODEL__'])

                self.svm_model = svmutil.svm_load_model(fp)

            finally:
                os.remove(fp)
        else:
            self.svm_model = None
            self._reload_model()
Ejemplo n.º 3
0
    def __setstate__(self, state):
        self.svm_model_uri = state['svm_model_uri']
        self.svm_label_map_uri = state['svm_model_uri']
        self.train_params = state['train_params']
        self.normalize = state['normalize']

        self.svm_model_elem = \
            self.svm_model_uri and from_uri(self.svm_model_uri)
        self.svm_label_map_elem = \
            self.svm_label_map_uri and from_uri(self.svm_label_map_uri)

        # C libraries/pointers don't survive across processes.
        if '__LOCAL__' in state:
            fd, fp = tempfile.mkstemp()
            try:
                os.close(fd)

                self.svm_label_map = state['__LOCAL_LABELS__']

                # write model to file, then load via libSVM
                with open(fp, 'wb') as model_f:
                    model_f.write(state['__LOCAL_MODEL__'])

                fp_bytes = fp.encode('utf8')
                self.svm_model = svmutil.svm_load_model(fp_bytes)

            finally:
                os.remove(fp)
        else:
            self.svm_model = None
            self._reload_model()
Ejemplo n.º 4
0
    def test_from_uri_plugin_level(self):
        # will be absolute path
        test_file_path = os.path.join(TEST_DATA_DIR, "test_file.dat")
        print("Test file path:", test_file_path)

        e = from_uri(test_file_path)
        self.assertIsInstance(e, DataFileElement)
        self.assertEqual(e._filepath, test_file_path)
        self.assertEqual(e.get_bytes(), six.b(''))

        e = from_uri('file://' + test_file_path)
        self.assertIsInstance(e, DataFileElement)
        self.assertEqual(e._filepath, test_file_path)
        self.assertEqual(e.get_bytes(), six.b(''))
Ejemplo n.º 5
0
    def test_from_uri_plugin_level(self):
        # will be absolute path
        test_file_path = os.path.join(TEST_DATA_DIR, "test_file.dat")
        print("Test file path:", test_file_path)

        e = from_uri(test_file_path)
        self.assertIsInstance(e, DataFileElement)
        self.assertEqual(e._filepath, test_file_path)
        self.assertEqual(e.get_bytes(), six.b(''))

        e = from_uri('file://' + test_file_path)
        self.assertIsInstance(e, DataFileElement)
        self.assertEqual(e._filepath, test_file_path)
        self.assertEqual(e.get_bytes(), six.b(''))
Ejemplo n.º 6
0
 def test_invalid_datatype(self, m_cdg_setupNetwork):
     # dummy network setup
     g = CaffeDescriptorGenerator(None, None, None)
     bad_element = from_uri(os.path.join(TEST_DATA_DIR,
                                         'test_file.dat'))
     nose.tools.assert_raises(ValueError, g.compute_descriptor,
                              bad_element)
Ejemplo n.º 7
0
        def test_invalid_datatype(self, _m_cdg_setupNetwork):
            # Test that a data element with an incorrect content type raises an
            # exception.

            # Passing purposefully bag constructor parameters and ignoring
            # Caffe network setup (above mocking).
            # noinspection PyTypeChecker
            g = CaffeDescriptorGenerator(None, None, None)
            bad_element = from_uri(os.path.join(TEST_DATA_DIR,
                                                'test_file.dat'))
            self.assertRaises(ValueError, g.compute_descriptor, bad_element)
Ejemplo n.º 8
0
        def test_compute_descriptor_dummy_model(self):
            # Caffe dummy network interaction test Lenna image)

            # Construct network with an empty model just to see that our
            # interaction with the Caffe API is successful. We expect a
            # zero-valued descriptor vector.
            g = CaffeDescriptorGenerator(self.dummy_net_topo_fp,
                                         self.dummy_caffe_model_fp,
                                         self.dummy_img_mean_fp,
                                         return_layer='fc', use_gpu=False)
            d = g.compute_descriptor(from_uri(self.lenna_image_fp))
            nose.tools.assert_almost_equal(d.vector().sum(), 0., 12)
Ejemplo n.º 9
0
        def test_process_load_img(self):
            # using image shape, meaning no transformation should occur
            test_data_layer = 'data'
            test_transformer = \
                caffe.io.Transformer({test_data_layer: (1, 3, 512, 512)})

            lenna_elem = from_uri(self.lenna_image_fp)
            a_expected = numpy.asarray(PIL.Image.open(self.lenna_image_fp),
                                       numpy.float32)
            a = _process_load_img_array(
                (lenna_elem, test_transformer, test_data_layer, None, None))
            numpy.testing.assert_allclose(a, a_expected)
Ejemplo n.º 10
0
        def test_compute_descriptor_dummy_model(self):
            # Caffe dummy network interaction test Lenna image)

            # Construct network with an empty model just to see that our
            # interaction with the Caffe API is successful. We expect a
            # zero-valued descriptor vector.
            g = CaffeDescriptorGenerator(self.dummy_net_topo_fp,
                                         self.dummy_caffe_model_fp,
                                         self.dummy_img_mean_fp,
                                         return_layer='fc', use_gpu=False)
            d = g.compute_descriptor(from_uri(self.lenna_image_fp))
            self.assertAlmostEqual(d.vector().sum(), 0., 12)
Ejemplo n.º 11
0
        def test_process_load_img(self):
            # using image shape, meaning no transformation should occur
            test_data_layer = 'data'
            test_transformer = \
                caffe.io.Transformer({test_data_layer: (1, 3, 512, 512)})

            lenna_elem = from_uri(self.lenna_image_fp)
            a_expected = numpy.asarray(PIL.Image.open(self.lenna_image_fp),
                                       numpy.float32)
            a = _process_load_img_array((
                lenna_elem, test_transformer, test_data_layer, None, None
            ))
            numpy.testing.assert_allclose(a, a_expected)
Ejemplo n.º 12
0
 def test_compute_descriptor_from_url_lenna_description(self):
     # Caffe AlexNet interaction test (Lenna image)
     # This is a long test since it has to download data for remote URIs
     d = CaffeDescriptorGenerator(
         self.www_uri_alexnet_prototxt,
         self.www_uri_alexnet_caffemodel,
         self.www_uri_image_mean_proto,
         return_layer='fc7',
         use_gpu=False,
     )
     lenna_elem = from_uri(self.lenna_image_fp)
     expected_descr = numpy.load(self.lenna_alexnet_fc7_descr_fp)
     descr = d.compute_descriptor(lenna_elem).vector()
     numpy.testing.assert_allclose(descr, expected_descr, atol=1e-5)
Ejemplo n.º 13
0
        def test_invalid_datatype(self, _m_cdg_setupNetwork):
            # Test that a data element with an incorrect content type raises an
            # exception.

            # Passing purposefully bag constructor parameters and ignoring
            # Caffe network setup (above mocking).
            # noinspection PyTypeChecker
            g = CaffeDescriptorGenerator(None, None, None)
            bad_element = from_uri(os.path.join(TEST_DATA_DIR, 'test_file.dat'))
            self.assertRaises(
                ValueError,
                g.compute_descriptor,
                bad_element
            )
Ejemplo n.º 14
0
 def test_compute_descriptor_from_url_lenna_description(self):
     # Caffe AlexNet interaction test (Lenna image)
     # This is a long test since it has to download data for remote URIs
     d = CaffeDescriptorGenerator(
         self.www_uri_alexnet_prototxt,
         self.www_uri_alexnet_caffemodel,
         self.www_uri_image_mean_proto,
         return_layer='fc7',
         use_gpu=False,
     )
     lenna_elem = from_uri(self.lenna_image_fp)
     expected_descr = numpy.load(self.lenna_alexnet_fc7_descr_fp)
     descr = d.compute_descriptor(lenna_elem).vector()
     numpy.testing.assert_allclose(descr, expected_descr, atol=1e-5)
Ejemplo n.º 15
0
    def __init__(self, index_to_label_uri):
        """
        Construct a new "classifier" that applies labels to input vector
        indices.

        We expect to be given a URI to a new-line separated text file where each
        line is a separate label in order and matching the dimensionality of an
        input descriptor.

        :param index_to_label_uri: URI to new-line separated sequence of labels.
        :type index_to_label_uri: str

        """
        super(IndexLabelClassifier, self).__init__()

        # load label vector
        self.index_to_label_uri = index_to_label_uri
        self.label_vector = [l.strip() for l in
                             from_uri(index_to_label_uri).to_buffered_reader()]
Ejemplo n.º 16
0
    def __init__(self, index_to_label_uri):
        """
        Construct a new "classifier" that applies labels to input vector
        indices.

        We expect to be given a URI to a new-line separated text file where each
        line is a separate label in order and matching the dimensionality of an
        input descriptor.

        :param index_to_label_uri: URI to new-line separated sequence of labels.
        :type index_to_label_uri: str

        """
        super(IndexLabelClassifier, self).__init__()

        # load label vector
        self.index_to_label_uri = index_to_label_uri
        self.label_vector = [
            l.strip()
            for l in from_uri(index_to_label_uri).to_buffered_reader()
        ]
Ejemplo n.º 17
0
    def __init__(self, index_uri=None, parameters_uri=None,
                 descriptor_cache_uri=None,
                 # Parameters for building an index
                 autotune=False, target_precision=0.95, sample_fraction=0.1,
                 distance_method='hik', random_seed=None):
        """
        Initialize FLANN index properties. Does not contain a query-able index
        until one is built via the ``build_index`` method, or loaded from
        existing model files.

        When using this algorithm in a multiprocessing environment, the model
        file path parameters must be specified due to needing to reload the
        FLANN index on separate processes. This is because FLANN is in C and
        its instances are not copied into processes.

        Documentation on index building parameters and their meaning can be
        found in the FLANN documentation PDF:

            http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf

        See the MATLAB section for detailed descriptions (python section will
        just point you to the MATLAB section).

        :param index_uri: Optional URI to where to load/store FLANN index
            when initialized and/or built.

            If not configured, no model files are written to or loaded from
            disk.
        :type index_uri: None | str

        :param parameters_uri: Optional file location to load/save FLANN
            index parameters determined at build time.

            If not configured, no model files are written to or loaded from
            disk.
        :type parameters_uri: None | str

        :param descriptor_cache_uri: Optional file location to load/store
            DescriptorElements in this index.

            If not configured, no model files are written to or loaded from
            disk.
        :type descriptor_cache_uri: None | str

        :param autotune: Whether or not to perform parameter auto-tuning when
            building the index. If this is False, then the `target_precision`
            and `sample_fraction` parameters are not used.
        :type autotune: bool

        :param target_precision: Target estimation accuracy when determining
            nearest neighbor when tuning parameters. This should be between
            [0,1] and represents percentage accuracy.
        :type target_precision: float

        :param sample_fraction: Sub-sample percentage of the total index to use
            when performing auto-tuning. Value should be in the range of [0,1]
            and represents percentage.
        :type sample_fraction: float

        :param distance_method: Method label of the distance function to use.
            See FLANN documentation manual for available methods. Common
            methods include "hik", "chi_square" (default), and "euclidean".
            When loading and existing index, this value is ignored in
            preference for the distance method used to build the loaded index.
        :type distance_method: str

        :param random_seed: Integer to use as the random number generator seed.
        :type random_seed: int

        """
        super(FlannNearestNeighborsIndex, self).__init__()

        self._index_uri = index_uri
        self._index_param_uri = parameters_uri
        self._descr_cache_uri = descriptor_cache_uri

        # Elements will be None if input URI is None
        self._index_elem = \
            self._index_uri and from_uri(self._index_uri)
        self._index_param_elem = \
            self._index_param_uri and from_uri(self._index_param_uri)
        self._descr_cache_elem = \
            self._descr_cache_uri and from_uri(self._descr_cache_uri)

        # parameters for building an index
        self._build_autotune = autotune
        self._build_target_precision = float(target_precision)
        self._build_sample_frac = float(sample_fraction)
        self._distance_method = str(distance_method)

        # Lock for model component access.  Using a multiprocessing due to
        # possible cases where another thread/process attempts to restore a
        # model before its fully written.  A reordering of _build_index could
        # lessen the requirement to a `threading.RLock`.
        self._model_lock = multiprocessing.RLock()

        # In-order cache of descriptors we're indexing over.
        # - flann.nn_index will spit out indices to list
        #: :type: list[smqtk.representation.DescriptorElement] | None
        self._descr_cache = None

        # The flann instance with a built index. None before index load/build.
        #: :type: pyflann.index.FLANN or None
        self._flann = None
        # Flann index parameters determined during building. None before index
        # load/build.
        #: :type: dict
        self._flann_build_params = None

        #: :type: None | int
        self._rand_seed = None
        if random_seed:
            self._rand_seed = int(random_seed)

        # The process ID that the currently set FLANN instance was built/loaded
        # on. If this differs from the current process ID, the index should be
        # reloaded from cache.
        self._pid = None

        # Load the index/parameters if one exists
        if self._has_model_data():
            self._log.info("Found existing model data. Loading.")
            self._load_flann_model()
Ejemplo n.º 18
0
    def __init__(
            self,
            index_uri=None,
            parameters_uri=None,
            descriptor_cache_uri=None,
            # Parameters for building an index
            autotune=False,
            target_precision=0.95,
            sample_fraction=0.1,
            distance_method='hik',
            random_seed=None):
        """
        Initialize FLANN index properties. Does not contain a query-able index
        until one is built via the ``build_index`` method, or loaded from
        existing model files.

        When using this algorithm in a multiprocessing environment, the model
        file path parameters must be specified due to needing to reload the
        FLANN index on separate processes. This is because FLANN is in C and
        its instances are not copied into processes.

        Documentation on index building parameters and their meaning can be
        found in the FLANN documentation PDF:

            http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf

        See the MATLAB section for detailed descriptions (python section will
        just point you to the MATLAB section).

        :param index_uri: Optional URI to where to load/store FLANN index
            when initialized and/or built.

            If not configured, no model files are written to or loaded from
            disk.
        :type index_uri: None | str

        :param parameters_uri: Optional file location to load/save FLANN
            index parameters determined at build time.

            If not configured, no model files are written to or loaded from
            disk.
        :type parameters_uri: None | str

        :param descriptor_cache_uri: Optional file location to load/store
            DescriptorElements in this index.

            If not configured, no model files are written to or loaded from
            disk.
        :type descriptor_cache_uri: None | str

        :param autotune: Whether or not to perform parameter auto-tuning when
            building the index. If this is False, then the `target_precision`
            and `sample_fraction` parameters are not used.
        :type autotune: bool

        :param target_precision: Target estimation accuracy when determining
            nearest neighbor when tuning parameters. This should be between
            [0,1] and represents percentage accuracy.
        :type target_precision: float

        :param sample_fraction: Sub-sample percentage of the total index to use
            when performing auto-tuning. Value should be in the range of [0,1]
            and represents percentage.
        :type sample_fraction: float

        :param distance_method: Method label of the distance function to use.
            See FLANN documentation manual for available methods. Common
            methods include "hik", "chi_square" (default), and "euclidean".
            When loading and existing index, this value is ignored in
            preference for the distance method used to build the loaded index.
        :type distance_method: str

        :param random_seed: Integer to use as the random number generator seed.
        :type random_seed: int

        """
        super(FlannNearestNeighborsIndex, self).__init__()

        self._index_uri = index_uri
        self._index_param_uri = parameters_uri
        self._descr_cache_uri = descriptor_cache_uri

        # Elements will be None if input URI is None
        self._index_elem = \
            self._index_uri and from_uri(self._index_uri)
        self._index_param_elem = \
            self._index_param_uri and from_uri(self._index_param_uri)
        self._descr_cache_elem = \
            self._descr_cache_uri and from_uri(self._descr_cache_uri)

        # parameters for building an index
        self._build_autotune = autotune
        self._build_target_precision = float(target_precision)
        self._build_sample_frac = float(sample_fraction)
        self._distance_method = str(distance_method)

        # Lock for model component access.  Using a multiprocessing due to
        # possible cases where another thread/process attempts to restore a
        # model before its fully written.  A reordering of _build_index could
        # lessen the requirement to a `threading.RLock`.
        self._model_lock = multiprocessing.RLock()

        # In-order cache of descriptors we're indexing over.
        # - flann.nn_index will spit out indices to list
        #: :type: list[smqtk.representation.DescriptorElement] | None
        self._descr_cache = None

        # The flann instance with a built index. None before index load/build.
        #: :type: pyflann.index.FLANN or None
        self._flann = None
        # Flann index parameters determined during building. None before index
        # load/build.
        #: :type: dict
        self._flann_build_params = None

        #: :type: None | int
        self._rand_seed = None
        if random_seed:
            self._rand_seed = int(random_seed)

        # The process ID that the currently set FLANN instance was built/loaded
        # on. If this differs from the current process ID, the index should be
        # reloaded from cache.
        self._pid = None

        # Load the index/parameters if one exists
        if self._has_model_data():
            self._log.info("Found existing model data. Loading.")
            self._load_flann_model()
Ejemplo n.º 19
0
    def __init__(self, svm_model_uri=None, svm_label_map_uri=None,
                 train_params={
                     '-s': 0,  # C-SVC, assumed default if not provided
                     '-t': 0,  # linear kernel
                     '-b': 1,  # enable probability estimates
                     '-c': 2,  # SVM parameter C
                     # '-g': 0.0078125,  # initial gamma (1 / 128)
                 },
                 normalize=None,
                 n_jobs=4,
                 ):
        """
        Initialize the classifier with an empty or existing model.

        Model file paths are optional. If they are given and the file(s) exist,
        we will load them. If they do not, we treat the path(s) as the output
        path(s) for saving a model after calling ``train``. If this is None
        (default), no model is loaded nor output via training, thus any model
        trained will only exist in memory during the lifetime of this instance.

        :param svm_model_uri: Path to the libSVM model file.
        :type svm_model_uri: None | str

        :param svm_label_map_uri: Path to the pickle file containing this
            model's output labels.
        :type svm_label_map_uri: None | str

        :param train_params: SVM parameters used for training. See libSVM
            documentation for parameter flags and values.
        :type train_params: dict[basestring, int|float]

        :param normalize: Normalize input vectors to training and
            classification methods using ``numpy.linalg.norm``. This may either
            be  ``None``, disabling normalization, or any valid value that
            could be passed to the ``ord`` parameter in ``numpy.linalg.norm``
            for 1D arrays. This is ``None`` by default (no normalization).
        :type normalize: None | int | float | str

        :param int|None n_jobs:
            Number of processes to use to parallelize prediction. If None or a
            negative value, all cores are used.

        """
        super(LibSvmClassifier, self).__init__()

        self.svm_model_uri = svm_model_uri
        self.svm_label_map_uri = svm_label_map_uri

        # Elements will be None if input URI is None
        #: :type: None | smqtk.representation.DataElement
        self.svm_model_elem = \
            svm_model_uri and from_uri(svm_model_uri)
        #: :type: None | smqtk.representation.DataElement
        self.svm_label_map_elem = \
            svm_label_map_uri and from_uri(svm_label_map_uri)

        self.train_params = train_params
        self.normalize = normalize
        self.n_jobs = n_jobs
        # Validate normalization parameter by trying it on a random vector
        if normalize is not None:
            self._norm_vector(numpy.random.rand(8))

        # generated parameters
        #: :type: svm.svm_model
        self.svm_model = None
        # dictionary mapping SVM integer labels to semantic labels
        #: :type: dict[int, collections.abc.Hashable]
        self.svm_label_map = {}

        self._reload_model()
Ejemplo n.º 20
0
    def _setup_network(self):
        """
        Initialize Caffe and the network
        """
        self._set_caffe_mode()

        # Questions:
        #   - ``caffe.TEST`` indicates phase of either TRAIN or TEST
        self._log.debug("Initializing network")
        network_prototxt_element = from_uri(self.network_prototxt_uri)
        network_model_element = from_uri(self.network_model_uri)
        self._log.debug("Loading Caffe network from network/model configs")
        self.network = caffe.Net(network_prototxt_element.write_temp(),
                                 caffe.TEST,
                                 weights=network_model_element.write_temp())
        network_prototxt_element.clean_temp()
        network_model_element.clean_temp()
        # Assuming the network has a 'data' layer and notion of data shape
        self.net_data_shape = self.network.blobs[self.data_layer].data.shape
        self._log.debug("Network data shape: %s", self.net_data_shape)

        # Crating input data transformer
        self._log.debug("Initializing data transformer")
        self.transformer = caffe.io.Transformer(
            {self.data_layer: self.network.blobs[self.data_layer].data.shape}
        )
        self._log.debug("Initializing data transformer -> %s",
                        self.transformer.inputs)

        self._log.debug("Loading image mean")
        image_mean_elem = from_uri(self.image_mean_uri)
        image_mean_bytes = image_mean_elem.get_bytes()
        try:
            a = numpy.load(io.BytesIO(image_mean_bytes))
            self._log.info("Loaded image mean from numpy bytes")
        except IOError:
            self._log.debug("Image mean file not a numpy array, assuming "
                            "URI to protobuf binary.")
            # noinspection PyUnresolvedReferences
            blob = caffe.proto.caffe_pb2.BlobProto()
            blob.ParseFromString(image_mean_bytes)
            a = numpy.array(caffe.io.blobproto_to_array(blob))
            assert a.shape[0] == 1, \
                "Input image mean blob protobuf consisted of more than one " \
                "image. Not sure how to handle this yet."
            a = a.reshape(a.shape[1:])
            self._log.info("Loaded image mean from protobuf bytes")
        assert a.shape[0] in [1, 3], \
            "Currently asserting that we either get 1 or 3 channel images. " \
            "Got a %d channel image." % a[0]
        # TODO: Instead of always using pixel mean, try to use image-mean if
        #       given. Might have to rescale if image/data layer shape is
        #       different.
        a_mean = a.mean(1).mean(1)
        self._log.debug("Initializing data transformer -- mean")
        self.transformer.set_mean(self.data_layer, a_mean)

        self._log.debug("Initializing data transformer -- transpose")
        self.transformer.set_transpose(self.data_layer, (2, 0, 1))
        if self.network_is_bgr:
            self._log.debug("Initializing data transformer -- channel swap")
            self.transformer.set_channel_swap(self.data_layer, (2, 1, 0))
        if self.input_scale:
            self._log.debug("Initializing data transformer -- input scale")
            self.transformer.set_input_scale(self.data_layer, self.input_scale)
Ejemplo n.º 21
0
    def _setup_network(self):
        """
        Initialize Caffe and the network
        """
        self._set_caffe_mode()

        # Questions:
        #   - ``caffe.TEST`` indicates phase of either TRAIN or TEST
        self._log.debug("Initializing network")
        network_prototxt_element = from_uri(self.network_prototxt_uri)
        network_model_element = from_uri(self.network_model_uri)
        self._log.debug("Loading Caffe network from network/model configs")
        self.network = caffe.Net(network_prototxt_element.write_temp(),
                                 caffe.TEST,
                                 weights=network_model_element.write_temp())
        network_prototxt_element.clean_temp()
        network_model_element.clean_temp()
        # Assuming the network has a 'data' layer and notion of data shape
        self.net_data_shape = self.network.blobs[self.data_layer].data.shape
        self._log.debug("Network data shape: %s", self.net_data_shape)

        # Crating input data transformer
        self._log.debug("Initializing data transformer")
        self.transformer = caffe.io.Transformer(
            {self.data_layer: self.network.blobs[self.data_layer].data.shape})
        self._log.debug("Initializing data transformer -> %s",
                        self.transformer.inputs)

        self._log.debug("Loading image mean")
        image_mean_elem = from_uri(self.image_mean_uri)
        image_mean_bytes = image_mean_elem.get_bytes()
        try:
            a = numpy.load(io.BytesIO(image_mean_bytes))
            self._log.info("Loaded image mean from numpy bytes")
        except IOError:
            self._log.debug("Image mean file not a numpy array, assuming "
                            "URI to protobuf binary.")
            # noinspection PyUnresolvedReferences
            blob = caffe.proto.caffe_pb2.BlobProto()
            blob.ParseFromString(image_mean_bytes)
            a = numpy.array(caffe.io.blobproto_to_array(blob))
            assert a.shape[0] == 1, \
                "Input image mean blob protobuf consisted of more than one " \
                "image. Not sure how to handle this yet."
            a = a.reshape(a.shape[1:])
            self._log.info("Loaded image mean from protobuf bytes")
        assert a.shape[0] in [1, 3], \
            "Currently asserting that we either get 1 or 3 channel images. " \
            "Got a %d channel image." % a[0]
        # TODO: Instead of always using pixel mean, try to use image-mean if
        #       given. Might have to rescale if image/data layer shape is
        #       different.
        a_mean = a.mean(1).mean(1)
        self._log.debug("Initializing data transformer -- mean")
        self.transformer.set_mean(self.data_layer, a_mean)

        self._log.debug("Initializing data transformer -- transpose")
        self.transformer.set_transpose(self.data_layer, (2, 0, 1))
        if self.network_is_bgr:
            self._log.debug("Initializing data transformer -- channel swap")
            self.transformer.set_channel_swap(self.data_layer, (2, 1, 0))
        if self.input_scale:
            self._log.debug("Initializing data transformer -- input scale")
            self.transformer.set_input_scale(self.data_layer, self.input_scale)
Ejemplo n.º 22
0
    def __init__(self, svm_model_uri=None, svm_label_map_uri=None,
                 train_params={
                     '-s': 0,  # C-SVC, assumed default if not provided
                     '-t': 0,  # linear kernel
                     '-b': 1,  # enable probability estimates
                     '-c': 2,  # SVM parameter C
                     # '-g': 0.0078125,  # initial gamma (1 / 128)
                 },
                 normalize=None,
                 ):
        """
        Initialize the classifier with an empty or existing model.

        Model file paths are optional. If they are given and the file(s) exist,
        we will load them. If they do not, we treat the path(s) as the output
        path(s) for saving a model after calling ``train``. If this is None
        (default), no model is loaded nor output via training, thus any model
        trained will only exist in memory during the lifetime of this instance.

        :param svm_model_uri: Path to the libSVM model file.
        :type svm_model_uri: None | str

        :param svm_label_map_uri: Path to the pickle file containing this
            model's output labels.
        :type svm_label_map_uri: None | str

        :param train_params: SVM parameters used for training. See libSVM
            documentation for parameter flags and values.
        :type train_params: dict[basestring, int|float]

        :param normalize: Normalize input vectors to training and
            classification methods using ``numpy.linalg.norm``. This may either
            be  ``None``, disabling normalization, or any valid value that
            could be passed to the ``ord`` parameter in ``numpy.linalg.norm``
            for 1D arrays. This is ``None`` by default (no normalization).
        :type normalize: None | int | float | str

        """
        super(LibSvmClassifier, self).__init__()

        self.svm_model_uri = svm_model_uri
        self.svm_label_map_uri = svm_label_map_uri

        # Elements will be None if input URI is None
        #: :type: None | smqtk.representation.DataElement
        self.svm_model_elem = \
            svm_model_uri and from_uri(svm_model_uri)
        #: :type: None | smqtk.representation.DataElement
        self.svm_label_map_elem = \
            svm_label_map_uri and from_uri(svm_label_map_uri)

        self.train_params = train_params
        self.normalize = normalize
        # Validate normalization parameter by trying it on a random vector
        if normalize is not None:
            self._norm_vector(numpy.random.rand(8))

        # generated parameters
        #: :type: svm.svm_model
        self.svm_model = None
        # dictionary mapping SVM integer labels to semantic labels
        #: :type: dict[int, collections.Hashable]
        self.svm_label_map = None

        self._reload_model()