Esempio n. 1
0
def roi_pool(input, boxes, output_size, spatial_scale=1.0):
    """
    Performs Region of Interest (RoI) Pool operator described in Fast R-CNN

    Arguments:
        input (Tensor[N, C, H, W]): input tensor
        boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
            format where the regions will be taken from. If a single Tensor is passed,
            then the first column should contain the batch index. If a list of Tensors
            is passed, then each Tensor will correspond to the boxes for an element i
            in a batch
        output_size (int or Tuple[int, int]): the size of the output after the cropping
            is performed, as (height, width)
        spatial_scale (float): a scaling factor that maps the input coordinates to
            the box coordinates. Default: 1.0

    Returns:
        output (Tensor[K, C, output_size[0], output_size[1]])
    """
    rois = boxes
    if not isinstance(rois, torch.Tensor):
        rois = convert_boxes_to_roi_format(rois)
    # TODO: Change this to support backwards, which we
    #       do not currently support when JIT tracing.
    if torch._C._get_tracing_state():
        _lazy_import()
        output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
                                                   output_size[0],
                                                   output_size[1])
        return output
    return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
Esempio n. 2
0
def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
    """
    Performs Region of Interest (RoI) Align operator described in Mask R-CNN

    Arguments:
        input (Tensor[N, C, H, W]): input tensor
        boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
            format where the regions will be taken from. If a single Tensor is passed,
            then the first column should contain the batch index. If a list of Tensors
            is passed, then each Tensor will correspond to the boxes for an element i
            in a batch
        output_size (int or Tuple[int, int]): the size of the output after the cropping
            is performed, as (height, width)
        spatial_scale (float): a scaling factor that maps the input coordinates to
            the box coordinates. Default: 1.0
        sampling_ratio (int): number of sampling points in the interpolation grid
            used to compute the output value of each pooled output bin. If > 0,
            then exactly sampling_ratio x sampling_ratio grid points are used. If
            <= 0, then an adaptive number of grid points are used (computed as
            ceil(roi_width / pooled_w), and likewise for height). Default: -1

    Returns:
        output (Tensor[K, C, output_size[0], output_size[1]])
    """
    rois = boxes
    if not isinstance(rois, torch.Tensor):
        rois = convert_boxes_to_roi_format(rois)
    _lazy_import()
    return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
                                           output_size[0], output_size[1],
                                           sampling_ratio)
Esempio n. 3
0
def nms(boxes, scores, iou_threshold):
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Parameters
    ----------
    boxes : Tensor[N, 4])
        boxes to perform NMS on. They
        are expected to be in (x1, y1, x2, y2) format
    scores : Tensor[N]
        scores for each one of the boxes
    iou_threshold : float
        discards all overlapping
        boxes with IoU < iou_threshold

    Returns
    -------
    keep : Tensor
        int64 tensor with the indices
        of the elements that have been kept
        by NMS, sorted in decreasing order of scores
    """
    _lazy_import()
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
Esempio n. 4
0
 def forward(ctx, input, rois, output_size, spatial_scale):
     ctx.output_size = _pair(output_size)
     ctx.spatial_scale = spatial_scale
     ctx.input_shape = input.size()
     _C = _lazy_import()
     output, argmax = _C.roi_pool_forward(input, rois, spatial_scale,
                                          output_size[0], output_size[1])
     ctx.save_for_backward(rois, argmax)
     return output
Esempio n. 5
0
 def backward(ctx, grad_output):
     rois, argmax = ctx.saved_tensors
     output_size = ctx.output_size
     spatial_scale = ctx.spatial_scale
     bs, ch, h, w = ctx.input_shape
     _C = _lazy_import()
     grad_input = _C.roi_pool_backward(grad_output, rois, argmax,
                                       spatial_scale, output_size[0],
                                       output_size[1], bs, ch, h, w)
     return grad_input, None, None, None
Esempio n. 6
0
 def backward(ctx, grad_output):
     rois, = ctx.saved_tensors
     output_size = ctx.output_size
     spatial_scale = ctx.spatial_scale
     sampling_ratio = ctx.sampling_ratio
     bs, ch, h, w = ctx.input_shape
     _C = _lazy_import()
     grad_input = _C.roi_align_backward(grad_output, rois, spatial_scale,
                                        output_size[0], output_size[1], bs,
                                        ch, h, w, sampling_ratio)
     return grad_input, None, None, None, None
Esempio n. 7
0
 def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
     ctx.save_for_backward(roi)
     ctx.output_size = _pair(output_size)
     ctx.spatial_scale = spatial_scale
     ctx.sampling_ratio = sampling_ratio
     ctx.input_shape = input.size()
     _C = _lazy_import()
     output = _C.roi_align_forward(input, roi, spatial_scale,
                                   output_size[0], output_size[1],
                                   sampling_ratio)
     return output
Esempio n. 8
0
def nms(boxes, scores, iou_threshold):
    """
    Performs non-maximum suppression (NMS) on the boxes according
    to their intersection-over-union (IoU).

    NMS iteratively removes lower scoring boxes which have an
    IoU greater than iou_threshold with another (higher scoring)
    box.

    Arguments:
        boxes (Tensor[N, 4]): boxes to perform NMS on
        scores (Tensor[N]): scores for each one of the boxes
        iou_threshold (float): discards all overlapping
            boxes with IoU < iou_threshold

    Returns:
        keep (Tensor): int64 tensor with the indices
            of the elements that have been kept
            by NMS, sorted in decreasing order of scores
    """
    _C = _lazy_import()
    return _C.nms(boxes, scores, iou_threshold)