class TestMultiboxCoder(unittest.TestCase): def setUp(self): self.coder = MultiboxCoder(self.grids, self.aspect_ratios, self.steps, self.sizes, (0.1, 0.2)) self.n_bbox = sum(grid * grid * (len(ar) + 1) * 2 for grid, ar in zip(self.grids, self.aspect_ratios)) self.bbox = _random_array((5, 4)) self.label = np.random.randint(0, self.n_fg_class, size=5) self.mb_loc = _random_array((self.n_bbox, 4)) self.mb_conf = _random_array((self.n_bbox, self.n_fg_class + 1)) @attr.gpu def test_to_cpu(self): self.coder.to_gpu() self.coder.to_cpu() self.assertEqual(self.coder.xp, np) @attr.gpu def test_to_gpu(self): self.coder.to_gpu() self.assertEqual(self.coder.xp, cuda.cupy) def test_dafault_bbox(self): self.assertEqual(self.coder._default_bbox.shape, (self.n_bbox, 4)) def _check_encode(self, bbox, label): xp = self.coder.xp mb_loc, mb_label = self.coder.encode(bbox, label) self.assertIsInstance(mb_loc, xp.ndarray) self.assertEqual(mb_loc.shape, (self.n_bbox, 4)) self.assertIsInstance(mb_label, xp.ndarray) self.assertEqual(mb_label.shape, (self.n_bbox, )) def test_encode_cpu(self): self._check_encode(self.bbox, self.label) @attr.gpu def test_encode_gpu(self): self.coder.to_gpu() self._check_encode(cuda.to_gpu(self.bbox), cuda.to_gpu(self.label)) def _check_decode(self, mb_loc, mb_conf): xp = self.coder.xp bbox, label, score = self.coder.decode(mb_loc, mb_conf, self.nms_thresh, self.score_thresh) self.assertIsInstance(bbox, xp.ndarray) self.assertEqual(bbox.ndim, 2) self.assertLessEqual(bbox.shape[0], self.n_bbox * self.n_fg_class) self.assertEqual(bbox.shape[1], 4) self.assertIsInstance(label, xp.ndarray) self.assertEqual(label.ndim, 1) self.assertEqual(label.shape[0], bbox.shape[0]) self.assertIsInstance(score, xp.ndarray) self.assertEqual(score.ndim, 1) self.assertEqual(score.shape[0], bbox.shape[0]) def test_decode_cpu(self): self._check_decode(self.mb_loc, self.mb_conf) @attr.gpu def test_decode_gpu(self): self.coder.to_gpu() self._check_decode(cuda.to_gpu(self.mb_loc), cuda.to_gpu(self.mb_conf))
class SSD(chainer.Chain): """Base class of Single Shot Multibox Detector. This is a base class of Single Shot Multibox Detector [#]_. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: extractor: A link which extracts feature maps. This link must have :obj:`insize`, :obj:`grids` and :meth:`__call__`. * :obj:`insize`: An integer which indicates \ the size of input images. Images are resized to this size before \ feature extraction. * :obj:`grids`: An iterable of integer. Each integer indicates \ the size of feature map. This value is used by \ :class:`~chainercv.links.model.ssd.MultiBboxCoder`. * :meth:`__call_`: A method which computes feature maps. \ It must take a batched images and return batched feature maps. multibox: A link which computes :obj:`mb_locs` and :obj:`mb_confs` from feature maps. This link must have :obj:`n_class`, :obj:`aspect_ratios` and :meth:`__call__`. * :obj:`n_class`: An integer which indicates the number of \ classes. \ This value should include the background class. * :obj:`aspect_ratios`: An iterable of tuple of integer. \ Each tuple indicates the aspect ratios of default bounding boxes \ at each feature maps. This value is used by \ :class:`~chainercv.links.model.ssd.MultiboxCoder`. * :meth:`__call__`: A method which computes \ :obj:`mb_locs` and :obj:`mb_confs`. \ It must take a batched feature maps and \ return :obj:`mb_locs` and :obj:`mb_confs`. steps (iterable of float): The step size for each feature map. This value is used by :class:`~chainercv.links.model.ssd.MultiboxCoder`. sizes (iterable of float): The base size of default bounding boxes for each feature map. This value is used by :class:`~chainercv.links.model.ssd.MultiboxCoder`. variance (tuple of floats): Two coefficients for decoding the locations of bounding boxes. This value is used by :class:`~chainercv.links.model.ssd.MultiboxCoder`. The default value is :obj:`(0.1, 0.2)`. Parameters: nms_thresh (float): The threshold value for :func:`~chainercv.utils.non_maximum_suppression`. The default value is :obj:`0.45`. This value can be changed directly or by using :meth:`use_preset`. score_thresh (float): The threshold value for confidence score. If a bounding box whose confidence score is lower than this value, the bounding box will be suppressed. The default value is :obj:`0.6`. This value can be changed directly or by using :meth:`use_preset`. """ def __init__( self, extractor, multibox, steps, sizes, variance=(0.1, 0.2), mean=0): self.mean = mean self.use_preset('visualize') super(SSD, self).__init__() with self.init_scope(): self.extractor = extractor self.multibox = multibox self.coder = MultiboxCoder( extractor.grids, multibox.aspect_ratios, steps, sizes, variance) @property def insize(self): return self.extractor.insize @property def n_fg_class(self): return self.multibox.n_class - 1 def to_cpu(self): super(SSD, self).to_cpu() self.coder.to_cpu() def to_gpu(self, device=None): super(SSD, self).to_gpu(device) self.coder.to_gpu(device=device) def __call__(self, x): """Compute localization and classification from a batch of images. This method computes two variables, :obj:`mb_locs` and :obj:`mb_confs`. :func:`self.coder.decode` converts these variables to bounding box coordinates and confidence scores. These variables are also used in training SSD. Args: x (chainer.Variable): A variable holding a batch of images. The images are preprocessed by :meth:`_prepare`. Returns: tuple of chainer.Variable: This method returns two variables, :obj:`mb_locs` and :obj:`mb_confs`. * **mb_locs**: A variable of float arrays of shape \ :math:`(B, K, 4)`, \ where :math:`B` is the number of samples in the batch and \ :math:`K` is the number of default bounding boxes. * **mb_confs**: A variable of float arrays of shape \ :math:`(B, K, n\_fg\_class + 1)`. """ return self.multibox(self.extractor(x)) def _prepare(self, img): img = img.astype(np.float32) img = transforms.resize(img, (self.insize, self.insize)) img -= self.mean return img def use_preset(self, preset): """Use the given preset during prediction. This method changes values of :obj:`nms_thresh` and :obj:`score_thresh`. These values are a threshold value used for non maximum suppression and a threshold value to discard low confidence proposals in :meth:`predict`, respectively. If the attributes need to be changed to something other than the values provided in the presets, please modify them by directly accessing the public attributes. Args: preset ({'visualize', 'evaluate'}): A string to determine the preset to use. """ if preset == 'visualize': self.nms_thresh = 0.45 self.score_thresh = 0.6 elif preset == 'evaluate': self.nms_thresh = 0.45 self.score_thresh = 0.01 else: raise ValueError('preset must be visualize or evaluate') def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bounding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = [] sizes = [] for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.stack(x)) mb_locs, mb_confs = self(x) mb_locs, mb_confs = mb_locs.array, mb_confs.array bboxes = [] labels = [] scores = [] for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes): bbox, label, score = self.coder.decode( mb_loc, mb_conf, self.nms_thresh, self.score_thresh) bbox = transforms.resize_bbox( bbox, (self.insize, self.insize), size) bboxes.append(chainer.backends.cuda.to_cpu(bbox)) labels.append(chainer.backends.cuda.to_cpu(label)) scores.append(chainer.backends.cuda.to_cpu(score)) return bboxes, labels, scores
class TestMultiboxCoder(unittest.TestCase): def setUp(self): self.coder = MultiboxCoder( self.grids, self.aspect_ratios, self.steps, self.sizes, (0.1, 0.2)) self.n_bbox = sum( grid * grid * (len(ar) + 1) * 2 for grid, ar in zip(self.grids, self.aspect_ratios)) self.bbox = _random_array((5, 4)) self.label = np.random.randint(0, self.n_fg_class, size=5) self.mb_loc = _random_array((self.n_bbox, 4)) self.mb_conf = _random_array((self.n_bbox, self.n_fg_class + 1)) @attr.gpu def test_to_cpu(self): self.coder.to_gpu() self.coder.to_cpu() self.assertEqual(self.coder.xp, np) @attr.gpu def test_to_gpu(self): self.coder.to_gpu() self.assertEqual(self.coder.xp, cuda.cupy) def test_dafault_bbox(self): self.assertEqual( self.coder._default_bbox.shape, (self.n_bbox, 4)) def _check_encode(self, bbox, label): xp = self.coder.xp mb_loc, mb_label = self.coder.encode(bbox, label) self.assertIsInstance(mb_loc, xp.ndarray) self.assertEqual(mb_loc.shape, (self.n_bbox, 4)) self.assertIsInstance(mb_label, xp.ndarray) self.assertEqual(mb_label.shape, (self.n_bbox,)) def test_encode_cpu(self): self._check_encode(self.bbox, self.label) @attr.gpu def test_encode_gpu(self): self.coder.to_gpu() self._check_encode(cuda.to_gpu(self.bbox), cuda.to_gpu(self.label)) def _check_decode(self, mb_loc, mb_conf): xp = self.coder.xp bbox, label, score = self.coder.decode( mb_loc, mb_conf, self.nms_thresh, self.score_thresh) self.assertIsInstance(bbox, xp.ndarray) self.assertEqual(bbox.ndim, 2) self.assertLessEqual(bbox.shape[0], self.n_bbox * self.n_fg_class) self.assertEqual(bbox.shape[1], 4) self.assertIsInstance(label, xp.ndarray) self.assertEqual(label.ndim, 1) self.assertEqual(label.shape[0], bbox.shape[0]) self.assertIsInstance(score, xp.ndarray) self.assertEqual(score.ndim, 1) self.assertEqual(score.shape[0], bbox.shape[0]) def test_decode_cpu(self): self._check_decode(self.mb_loc, self.mb_conf) @attr.gpu def test_decode_gpu(self): self.coder.to_gpu() self._check_decode(cuda.to_gpu(self.mb_loc), cuda.to_gpu(self.mb_conf))