def __call__(self, src, label, segm):
        """Apply transform to training image/label."""
        # resize shorter side but keep in max_size
        h, w, _ = src.shape
        if self._random_resize:
            short = randint(self._short[0], self._short[1])
        else:
            short = self._short
        img = timage.resize_short_within(src, short, self._max_size, interp=1)
        bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0]))
        # segm = [tmask.resize(polys, (w, h), (img.shape[1], img.shape[0])) for polys in segm]

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])
        # segm = [tmask.flip(polys, (w, h), flip_x=flips[0]) for polys in segm]

        # gt_masks (n, im_height, im_width) of uint8 -> float32 (cannot take uint8)
        # masks = [mx.nd.array(tmask.to_mask(polys, (w, h))) for polys in segm]
        masks = cocomask.decode(segm) # hxwxn
        mask_list = []
        for i in range(masks.shape[-1]):
            mask = cv2.resize(masks[:,:,i], (img.shape[1],img.shape[0]),
                interpolation=cv2.INTER_NEAREST)
            mask_list.append(mx.nd.array(mask))
       # n * (im_height, im_width) -> (n, im_height, im_width)
        masks = mx.nd.stack(*mask_list, axis=0)
        if flips[0]:
            masks = mx.nd.flip(masks, axis=2)
        # to tensor
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)

        if self._anchors is None:
            return img, bbox.astype(img.dtype), masks

        # generate RPN target so cpu workers can help reduce the workload
        # feat_h, feat_w = (img.shape[1] // self._stride, img.shape[2] // self._stride)
        gt_bboxes = mx.nd.array(bbox[:, :4])
        if self._multi_stage:
            oshapes = []
            anchor_targets = []
            for feat_sym in self._feat_sym:
                oshapes.append(feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0])
            for anchor, oshape in zip(self._anchors, oshapes):
                anchor = anchor[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4))
                anchor_targets.append(anchor)
            anchor_targets = mx.nd.concat(*anchor_targets, dim=0)
            cls_target, box_target, box_mask = self._target_generator(
                gt_bboxes, anchor_targets, img.shape[2], img.shape[1])
        else:
            oshape = self._feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0]
            anchor = self._anchors[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4))

            cls_target, box_target, box_mask = self._target_generator(
                gt_bboxes, anchor, img.shape[2], img.shape[1])
        return img, bbox.astype(img.dtype), masks, cls_target, box_target, box_mask
Example #2
0
 def __call__(self, src):
     """Apply transform to validation image/label."""
     # resize shorter side but keep in max_size
     h, w, _ = src.shape
     img = timage.resize_short_within(src, self._short, self._max_size, interp=1)
     # no scaling ground-truth, return image scaling ratio instead
     # im_scale = h / float(img.shape[0])
     img = mx.nd.image.to_tensor(img)  # Converts from 0-255 to 0-1
     img = mx.nd.image.normalize(img, mean=self._mean, std=self._std)
     return img
def transform_gt_bbox(img_path, model, bbox):
    if model == 'yolo':
        short, max_size = 416, 1024
    if model == 'rcnn':
        short, max_size = 600, 1000
    img = mx.image.imread(img_path)
    h, w, _ = img.shape
    resized_img = timage.resize_short_within(img, short, max_size)
    bbox = tbbox.resize(bbox, (w, h),
                        (resized_img.shape[1], resized_img.shape[0]))
    return bbox
 def preprocess(raw_image_buf,
                short=512,
                max_size=768,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)):
     orig_image = mx.img.imdecode(raw_image_buf)
     img = timage.resize_short_within(orig_image,
                                      short,
                                      max_size,
                                      mult_base=32)
     img = mx.nd.image.to_tensor(img)
     img = mx.nd.image.normalize(img, mean=mean, std=std)
     return img.expand_dims(0), orig_image
def transform_test(imgs,
                   short=600,
                   max_size=1000,
                   mean=(0.485, 0.456, 0.406),
                   std=(0.229, 0.224, 0.225)):
    """A util function to transform all images to tensors as network input by applying
    normalizations. This function support 1 NDArray or iterable of NDArrays. This is similar to:
    gcv.data.transforms.presets.ssd.transform_test() but the orig image isn't squashed and the x is squashed square
    Parameters
    ----------
    imgs : NDArray or iterable of NDArray
        Image(s) to be transformed.
    short : int
        Resize image short side to this `short` and keep aspect ratio.
    max_size : int, optional
        Maximum longer side length to fit image.
        This is to limit the input image shape. Aspect ratio is intact because we
        support arbitrary input size in our SSD implementation.
    mean : iterable of float
        Mean pixel values.
    std : iterable of float
        Standard deviations of pixel values.
    Returns
    -------
    (mxnet.NDArray, numpy.ndarray) or list of such tuple
        A (1, 3, H, W) mxnet NDArray as input to network, and a numpy ndarray as
        original un-normalized color image for display.
        If multiple image names are supplied, return two lists. You can use
        `zip()`` to collapse it.
    """
    if isinstance(imgs, mx.nd.NDArray):
        imgs = [imgs]
    for im in imgs:
        assert isinstance(im, mx.nd.NDArray), "Expect NDArray, got {}".format(
            type(im))

    tensors = []
    origs = []
    for img in imgs:
        orig_img = img.asnumpy().astype('uint8')
        # img = timage.imresize(img, short, max_size, interp=9)
        img = timage.resize_short_within(img, short, max_size)
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=mean, std=std)
        tensors.append(img.expand_dims(0))
        origs.append(orig_img)
    if len(tensors) == 1:
        return tensors[0], origs[0]
    return tensors, origs
Example #6
0
def load_test(filenames,
              short,
              max_size=1024,
              mean=(0.485, 0.456, 0.406),
              std=(0.229, 0.224, 0.225)):
    """A util function to load all images, transform them to tensor by applying
    normalizations. This function support 1 filename or list of filenames.

    Parameters
    ----------
    filenames : str or list of str
        Image filename(s) to be loaded.
    short : int
        Resize image short side to this `short` and keep aspect ratio.
    max_size : int, optional
        Maximum longer side length to fit image.
        This is to limit the input image shape. Aspect ratio is intact because we
        support arbitrary input size in our SSD implementation.
    mean : iterable of float
        Mean pixel values.
    std : iterable of float
        Standard deviations of pixel values.

    Returns
    -------
    (mxnet.NDArray, numpy.ndarray) or list of such tuple
        A (1, 3, H, W) mxnet NDArray as input to network, and a numpy ndarray as
        original un-normalized color image for display.
        If multiple image names are supplied, return two lists. You can use
        `zip()`` to collapse it.

    """
    if isinstance(filenames, str):
        filenames = [filenames]
    tensors = []
    origs = []
    for f in filenames:
        img = mx.image.imread(f)
        img = timage.resize_short_within(img, short, max_size)
        orig_img = img.asnumpy().astype('uint8')
        img = mx.nd.image.to_tensor(img)
        img = mx.nd.image.normalize(img, mean=mean, std=std)
        tensors.append(img.expand_dims(0))
        origs.append(orig_img)
    if len(tensors) == 1:
        return tensors[0], origs[0]
    return tensors, origs
Example #7
0
def load_test_from_numpy(np_array, short=720, max_size=1280, mean=(0.485, 0.456, 0.406),
              std=(0.229, 0.224, 0.225),ctx=mx.cpu()):
    
    img = mx.nd.array(np_array)
    tensors = []
    origs = []
    
    img = timage.resize_short_within(img, short, max_size)
    orig_img = img.asnumpy().astype('uint8')
    img = mx.nd.image.to_tensor(img,ctx=ctx)
    img = mx.nd.image.normalize(img, mean=mean, std=std)
    tensors.append(img.expand_dims(0))
    origs.append(orig_img)

    if len(tensors) == 1:
        return tensors[0], origs[0]
    return tensors, origs
    def __call__(self, src):
        src = src.transpose([1, 2, 0])
        h, w, _ = src.shape
        img_resize = timage.resize_short_within(src,
                                                self._short,
                                                self._max_size,
                                                interp=1)
        im_scale = float(img_resize.shape[0]) / h

        imgs = [
            img_resize.squeeze(),
            img_resize.squeeze(),
            img_resize.squeeze()
        ]
        img_resize = mx.nd.stack(*imgs, axis=0)
        img_resize = mx.nd.image.normalize(img_resize, mean=0.5, std=0.22)
        img_info = mx.nd.array(
            [img_resize.shape[-2], img_resize.shape[-1], im_scale])

        return img_resize, img_info
    def inference(self,
                  input_image_file,
                  labels,
                  architecture_file,
                  weights_file,
                  args=None):
        default_args = {
            'threshold': 0.5,
            'print_top_n': 10,
        }
        tmp_args = default_args.copy()
        if args:
            tmp_args.update(args)
        args = Map(tmp_args)
        logger.debug('Try loading network from files "{}" and "{}"'.format(
            architecture_file, weights_file))

        self.checkAborted()

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            ctx = self.getContext()
            net = gluon.nn.SymbolBlock.imports(architecture_file, ['data'],
                                               weights_file,
                                               ctx=ctx)
            class_names = labels
            net.collect_params().reset_ctx(ctx)
            img = mx.image.imread(input_image_file)
            img = timage.resize_short_within(img,
                                             608,
                                             max_size=1024,
                                             mult_base=1)

            self.checkAborted()
            self.thread.update.emit(None, -1, -1)

            def make_tensor(img):
                np_array = np.expand_dims(np.transpose(img, (0, 1, 2)),
                                          axis=0).astype(np.float32)
                return mx.nd.array(np_array)

            image = img.asnumpy().astype('uint8')
            x = make_tensor(image)
            cid, score, bbox = net(x)

            self.thread.data.emit({
                'files': {
                    'input_image_file': input_image_file,
                    'architecture_file': architecture_file,
                    'weights_file': weights_file,
                },
                'imgsize': [image.shape[0], image.shape[1]],
                'classid': cid.asnumpy().tolist(),
                'score': score.asnumpy().tolist(),
                'bbox': bbox.asnumpy().tolist(),
                'labels': labels,
            })
            self.thread.update.emit(None, -1, -1)

            n_top = args.print_top_n
            classes = cid[0][:n_top].asnumpy().astype(
                'int32').flatten().tolist()
            scores = score[0][:n_top].asnumpy().astype(
                'float32').flatten().tolist()
            result_str = '\n'.join([
                'class: {}, score: {}'.format(classes[i], scores[i])
                for i in range(n_top)
            ])
            logger.debug('Top {} inference results:\n {}'.format(
                n_top, result_str))