Exemple #1
0
    def __call__(
            self,
            img: np.ndarray,
            mode: Optional[Union[NumpyPadMode, str]] = None) -> np.ndarray:
        """
        Args:
            img: data to be transformed, assuming `img` is channel-first
                and padding doesn't apply to the channel dim.
            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
        """
        new_size = compute_divisible_spatial_size(spatial_shape=img.shape[1:],
                                                  k=self.k)
        spatial_pad = SpatialPad(spatial_size=new_size,
                                 method=Method.SYMMETRIC,
                                 mode=mode or self.mode)

        return spatial_pad(img)
Exemple #2
0
    def compute_bounding_box(self, img: np.ndarray):
        """
        Compute the start points and end points of bounding box to crop.
        And adjust bounding box coords to be divisible by `k`.

        """
        box_start, box_end = generate_spatial_bounding_box(
            img, self.select_fn, self.channel_indices, self.margin)
        box_start_ = np.asarray(box_start, dtype=np.int16)
        box_end_ = np.asarray(box_end, dtype=np.int16)
        orig_spatial_size = box_end_ - box_start_
        # make the spatial size divisible by `k`
        spatial_size = np.asarray(
            compute_divisible_spatial_size(spatial_shape=orig_spatial_size,
                                           k=self.k_divisible))
        # update box_start and box_end
        box_start_ = box_start_ - np.floor_divide(
            np.asarray(spatial_size) - orig_spatial_size, 2)
        box_end_ = box_start_ + spatial_size
        return box_start_, box_end_
Exemple #3
0
def pad_images(
    input_images: Union[List[Tensor], Tensor],
    spatial_dims: int,
    size_divisible: Union[int, Sequence[int]],
    mode: Union[PytorchPadMode, str] = PytorchPadMode.CONSTANT,
    **kwargs,
) -> Tuple[Tensor, List[List[int]]]:
    """
    Pad the input images, so that the output spatial sizes are divisible by `size_divisible`.
    It pads them at the end to create a (B, C, H, W) or (B, C, H, W, D) Tensor.
    Padded size (H, W) or (H, W, D) is divisible by size_divisible.
    Default padding uses constant padding with value 0.0

    Args:
        input_images: It can be 1) a tensor sized (B, C, H, W) or  (B, C, H, W, D),
            or 2) a list of image tensors, each image i may have different size (C, H_i, W_i) or  (C, H_i, W_i, D_i).
        spatial_dims: number of spatial dimensions of the images, 2D or 3D.
        size_divisible: int or Sequence[int], is the expected pattern on the input image shape.
            If an int, the same `size_divisible` will be applied to all the input spatial dimensions.
        mode: available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
            See also: https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
        kwargs: other arguments for `torch.pad` function.

    Return:
        - images, a (B, C, H, W) or (B, C, H, W, D) Tensor
        - image_sizes, the original spatial size of each image
    """
    size_divisible = ensure_tuple_rep(size_divisible, spatial_dims)

    # If input_images: Tensor
    if isinstance(input_images, Tensor):
        orig_size = list(input_images.shape[-spatial_dims:])
        new_size = compute_divisible_spatial_size(spatial_shape=orig_size,
                                                  k=size_divisible)
        all_pad_width = [(0, max(sp_i - orig_size[i], 0))
                         for i, sp_i in enumerate(new_size)]
        pt_pad_width = [
            val for sublist in all_pad_width for val in sublist[::-1]
        ][::-1]
        if max(pt_pad_width) == 0:
            # if there is no need to pad
            return input_images, [orig_size] * input_images.shape[0]
        mode_: str = convert_pad_mode(dst=input_images, mode=mode)
        return F.pad(input_images, pt_pad_width, mode=mode_,
                     **kwargs), [orig_size] * input_images.shape[0]

    # If input_images: List[Tensor])
    image_sizes = [img.shape[-spatial_dims:] for img in input_images]
    in_channels = input_images[0].shape[0]
    dtype = input_images[0].dtype
    device = input_images[0].device

    # compute max_spatial_size
    image_sizes_t = torch.tensor(image_sizes)
    max_spatial_size_t, _ = torch.max(image_sizes_t, dim=0)

    if len(max_spatial_size_t) != spatial_dims or len(
            size_divisible) != spatial_dims:
        raise ValueError(
            " Require len(max_spatial_size_t) == spatial_dims ==len(size_divisible)."
        )

    max_spatial_size = compute_divisible_spatial_size(
        spatial_shape=list(max_spatial_size_t), k=size_divisible)

    # allocate memory for the padded images
    images = torch.zeros([len(image_sizes), in_channels] + max_spatial_size,
                         dtype=dtype,
                         device=device)

    # Use `SpatialPad` to match sizes, padding in the end will not affect boxes
    padder = SpatialPad(spatial_size=max_spatial_size,
                        method="end",
                        mode=mode,
                        **kwargs)
    for idx, img in enumerate(input_images):
        images[idx, ...] = padder(img)

    return images, [list(ss) for ss in image_sizes]