예제 #1
0
 def __call__(self, pic):
     """
     Args:
         pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
     Returns:
         Tensor: Converted image.
     """
     return F.to_tensor(pic)
예제 #2
0
def transform_test(imgs,
                   short=416,
                   max_size=1024,
                   stride=1,
                   mean=(0.485, 0.456, 0.406),
                   std=(0.229, 0.224, 0.225)):
    """A util function to transform all images to tensors as network input by applying
    normalizations. This function support 1 NDArray or iterable of NDArrays.

    Parameters
    ----------
    imgs : PIL.Image or iterable of PIL.Image
        Image(s) to be transformed.
    short : int, default=416
        Resize image short side to this `short` and keep aspect ratio. Note that yolo network
    max_size : int, optional
        Maximum longer side length to fit image.
        This is to limit the input image shape. Aspect ratio is intact because we
        support arbitrary input size in our YOLO implementation.
    stride : int, optional, default is 1
        The stride constraint due to precise alignment of bounding box prediction module.
        Image's width and height must be multiples of `stride`. Use `stride = 1` to
        relax this constraint.
    mean : iterable of float
        Mean pixel values.
    std : iterable of float
        Standard deviations of pixel values.

    Returns
    -------
    (Tensor, numpy.array) or list of such tuple
        A (1, 3, H, W) mxnet NDArray as input to network, and a numpy ndarray as
        original un-normalized color image for display.
        If multiple image names are supplied, return two lists. You can use
        `zip()`` to collapse it.

    """
    if isinstance(imgs, np.ndarray):
        imgs = [imgs]
    for im in imgs:
        assert isinstance(im, np.ndarray), "Expect NDArray, got {}".format(
            type(im))

    tensors = []
    origs = []
    for img in imgs:
        img = timage.resize_short_within(img,
                                         short,
                                         max_size,
                                         mult_base=stride)
        orig_img = img.astype('uint8')
        img = vf.to_tensor(img)
        img = vf.normalize(img, mean=mean, std=std)
        tensors.append(img.unsqueeze(0))
        origs.append(orig_img)
    if len(tensors) == 1:
        return tensors[0], origs[0]
    return tensors, origs
예제 #3
0
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize
        h, w, _ = src.shape
        img = timage.imresize(src, self._width, self._height, interp=9)
        bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height))

        img = vf.to_tensor(img)
        img = vf.normalize(img, mean=self._mean, std=self._std)
        return img, bbox.astype(type_map[img.dtype])
예제 #4
0
    def __call__(self, src, label):
        """Apply transform to training image/label."""
        # random color jittering
        img = eximage.random_color_distort(src)

        # random expansion with prob 0.5
        if np.random.uniform(0, 1) > 0.5:
            img, expand = timage.random_expand(
                img, fill=[m * 255 for m in self._mean])
            bbox = tbbox.translate(label,
                                   x_offset=expand[0],
                                   y_offset=expand[1])
        else:
            img, bbox = img, label

        # random cropping
        h, w, _ = img.shape
        bbox, crop = exbbox.random_crop_with_constraints(bbox, (w, h))
        x0, y0, w, h = crop
        img = timage.fixed_crop(img, x0, y0, w, h)

        # resize with random interpolation
        h, w, _ = img.shape
        interp = np.random.randint(0, 5)
        img = timage.imresize(img, self._width, self._height, interp=interp)
        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

        # random horizontal flip
        h, w, _ = img.shape
        img, flips = timage.random_flip(img, px=0.5)
        bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0])

        # to tensor
        img = vf.to_tensor(img / 255)
        img = vf.normalize(img, mean=self._mean, std=self._std)

        if self._target_generator is None:
            return img, bbox.astype(img.dtype)

        # generate training target so cpu workers can help reduce the workload on gpu
        gt_bboxes = torch.from_numpy(bbox[np.newaxis, :, :4])
        gt_ids = torch.from_numpy(bbox[np.newaxis, :, 4:5])
        if self._mixup:
            gt_mixratio = torch.from_numpy(bbox[np.newaxis, :, -1:])
        else:
            gt_mixratio = None
        objectness, center_targets, scale_targets, weights, class_targets = self._target_generator(
            self._height, self._width, self._feat_maps, self._anchors,
            self._offsets, gt_bboxes, gt_ids, gt_mixratio)
        return (img, objectness[0], center_targets[0], scale_targets[0],
                weights[0], class_targets[0], gt_bboxes[0])
예제 #5
0
def transform_test(imgs,
                   short=600,
                   max_size=1000,
                   mean=(0.485, 0.456, 0.406),
                   std=(0.229, 0.224, 0.225)):
    """A util function to transform all images to tensors as network input by applying
    normalizations. This function support 1 NDArray or iterable of NDArrays.

    Parameters
    ----------
    imgs : NDArray or iterable of NDArray
        Image(s) to be transformed.
    short : int, optional, default is 600
        Resize image short side to this `short` and keep aspect ratio.
    max_size : int, optional, default is 1000
        Maximum longer side length to fit image.
        This is to limit the input image shape, avoid processing too large image.
    mean : iterable of float
        Mean pixel values.
    std : iterable of float
        Standard deviations of pixel values.

    Returns
    -------
    (mxnet.NDArray, numpy.ndarray) or list of such tuple
        A (1, 3, H, W) mxnet NDArray as input to network, and a numpy ndarray as
        original un-normalized color image for display.
        If multiple image names are supplied, return two lists. You can use
        `zip()`` to collapse it.

    """
    if isinstance(imgs, np.ndarray):
        imgs = [imgs]
    for im in imgs:
        assert isinstance(im, np.ndarray), "Expect NDArray, got {}".format(
            type(im))

    tensors = []
    origs = []
    for img in imgs:
        img = timage.resize_short_within(img, short, max_size)
        orig_img = img.astype('uint8')
        img = vf.to_tensor(img)
        img = vf.normalize(img, mean=mean, std=std)
        tensors.append(img.unsqueeze(0))
        origs.append(orig_img)
    if len(tensors) == 1:
        return tensors[0], origs[0]
    return tensors, origs
예제 #6
0
    def __call__(self, src, label, mask):
        """Apply transform to validation image/label."""
        # resize shorter side but keep in max_size
        h, _, _ = src.shape
        img = timage.resize_short_within(src,
                                         self._short,
                                         self._max_size,
                                         interp=1)
        # no scaling ground-truth, return image scaling ratio instead
        im_scale = float(img.shape[0]) / h

        img = vf.to_tensor(img)
        img = vf.normalize(img, mean=self._mean, std=self._std)
        return img, torch.tensor([img.shape[-2], img.shape[-1], im_scale],
                                 dtype=torch.float32)
예제 #7
0
    def __call__(self, src, label):
        """Apply transform to validation image/label."""
        # resize shorter side but keep in max_size
        h, w, _ = src.shape
        img = timage.resize_short_within(src,
                                         self._short,
                                         self._max_size,
                                         interp=1)
        # no scaling ground-truth, return image scaling ratio instead
        bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0]))
        im_scale = h / float(img.shape[0])

        img = vf.to_tensor(img)
        img = vf.normalize(img, mean=self._mean, std=self._std)
        return img, bbox.astype('float32'), torch.tensor([im_scale],
                                                         dtype=torch.float32)