Esempio n. 1
0
    def forward(self, image_list, feature_maps):
        # type: (ImageList, List[Tensor])
        if torchvision._is_tracing():
            # For onnx export, Python int can only be traced as Constant
            from torch.onnx import operators
            grid_sizes = list([
                operators.shape_as_tensor(feature_map)[-2:]
                for feature_map in feature_maps
            ])
            image_size = operators.shape_as_tensor(image_list.tensors)[-2:]
            strides = [image_size / g for g in grid_sizes]
        else:
            grid_sizes = list(
                [feature_map.shape[-2:] for feature_map in feature_maps])
            image_size = image_list.tensors.shape[-2:]
            strides = [[int(image_size[0] / g[0]),
                        int(image_size[1] / g[1])] for g in grid_sizes]

        dtype, device = feature_maps[0].dtype, feature_maps[0].device
        self.set_cell_anchors(dtype, device)
        anchors_over_all_feature_maps = self.cached_grid_anchors(
            grid_sizes, strides)
        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
        for i, (image_height,
                image_width) in enumerate(image_list.image_sizes):
            anchors_in_image = []
            for anchors_per_feature_map in anchors_over_all_feature_maps:
                anchors_in_image.append(anchors_per_feature_map)
            anchors.append(anchors_in_image)
        anchors = [
            torch.cat(anchors_per_image) for anchors_per_image in anchors
        ]
        # Clear the cache in case that memory leaks.
        self._cache.clear()
        return anchors
Esempio n. 2
0
def decode_batch_loop_helper(bboxes, probs, criteria, max_output):
    bboxes_out = torch.jit.annotate(List[Tensor], [])
    scores_out = torch.jit.annotate(List[Tensor], [])
    labels_out = torch.jit.annotate(List[Tensor], [])
    for i in range(probs.size(1)):
        # skip background
        if i != 0:
            scores_per_label = probs[:, i]
            mask = scores_per_label > 0.05
            bboxes_masked, scores_masked = bboxes[
                mask, :], scores_per_label[mask]
            print('decode single iter scores masked:', scores_masked,
                  scores_masked.shape)

            num_selected = operators.shape_as_tensor(
                scores_masked)[0].unsqueeze(0)
            k = torch.min(torch.cat((max_output, num_selected), 0))
            _, sorted_idx = scores_masked.topk(k, dim=0)
            bboxes_masked = bboxes_masked[sorted_idx]
            scores_masked = scores_masked[sorted_idx]

            out_idx = torch.ops.roi_ops.nms(bboxes_masked, scores_masked,
                                            criteria)

            bboxes_out.append(bboxes_masked[out_idx])
            scores_out.append(scores_masked[out_idx])
            labels_out.append(torch.full(out_idx.shape, i, dtype=torch.long))
            print('decode single iter output:', scores_out[-1], labels_out[-1])
    # return top max_output
    bboxes_out = torch.cat(bboxes_out, dim=0)
    labels_out = torch.cat(labels_out, dim=0)
    scores_out = torch.cat(scores_out, dim=0)

    return bboxes_out, labels_out, scores_out
Esempio n. 3
0
def topk(x, k, dim=None, **kwargs):
    from torch.onnx import operators, is_in_onnx_export

    if dim is None:
        dim = x.dim() - 1

    if is_in_onnx_export():
        n = operators.shape_as_tensor(x)[dim].unsqueeze(0)
        if not isinstance(k, torch.Tensor):
            k = torch.tensor([k], dtype=torch.long)
        # Workaround for ONNXRuntime: convert values to int to get minimum.
        n = torch.min(torch.cat((k, n), dim=0).int()).long()
        # ONNX OpSet 10 does not support non-floating point input for TopK.
        original_dtype = x.dtype
        require_cast = original_dtype not in {
            torch.float16, torch.float32, torch.float64
        }
        if require_cast:
            x = x.to(torch.float32)
        values, keep = torch.topk(x, n, dim=dim, **kwargs)
        if require_cast:
            values = values.to(original_dtype)
    else:
        values, keep = torch.topk(
            x, min(int(k), x.shape[dim]), dim=dim, **kwargs)
    return values, keep
Esempio n. 4
0
    def forward(self, feats, rois, roi_scale_factor=None):
        from torch.onnx import operators

        if len(feats) == 1:
            return self.roi_layers[0](feats[0], rois)

        num_levels = len(feats)
        target_lvls = self.map_roi_levels(rois, num_levels)
        if roi_scale_factor is not None:
            rois = self.roi_rescale(rois, roi_scale_factor)

        indices = []
        roi_feats = []
        for level, (feat, extractor) in enumerate(zip(feats, self.roi_layers)):
            # Explicit casting to int is required for ONNXRuntime.
            level_indices = torch.nonzero(
                (target_lvls == level).int()).view(-1)
            level_rois = rois[level_indices]
            indices.append(level_indices)

            level_feats = extractor(feat, level_rois)
            roi_feats.append(level_feats)
        # Concatenate roi features from different pyramid levels
        # and rearrange them to match original ROIs order.
        indices = torch.cat(indices, dim=0)
        k = operators.shape_as_tensor(indices)
        _, indices = topk(indices, k, dim=0, largest=False)
        roi_feats = torch.cat(roi_feats, dim=0)[indices]

        return roi_feats
Esempio n. 5
0
def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target):
    # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
    from torch.onnx import operators
    im_shape = operators.shape_as_tensor(image)[-2:]
    min_size = torch.min(im_shape).to(dtype=torch.float32)
    max_size = torch.max(im_shape).to(dtype=torch.float32)
    scale_factor = torch.min(self_min_size / min_size,
                             self_max_size / max_size)

    image = torch.nn.functional.interpolate(image[None],
                                            scale_factor=scale_factor,
                                            mode='bilinear',
                                            recompute_scale_factor=True,
                                            align_corners=False)[0]

    if target is None:
        return image, target

    if "masks" in target:
        mask = target["masks"]
        mask = F.interpolate(mask[:, None].float(),
                             scale_factor=scale_factor,
                             recompute_scale_factor=True)[:, 0].byte()
        target["masks"] = mask
    return image, target
Esempio n. 6
0
 def forward_export(self, imgs):
     from torch.onnx import operators
     img_shape = operators.shape_as_tensor(imgs[0])
     imgs_per_gpu = int(imgs[0].size(0))
     assert imgs_per_gpu == 1
     self.img_metas[0][0]['img_shape'] = img_shape[2:4]
     return self.simple_test(imgs[0], self.img_metas[0], postprocess=False)
Esempio n. 7
0
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
    # type: (Tensor, int) -> Tuple[int, int]
    from torch.onnx import operators
    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
    pre_nms_top_n = torch.min(torch.cat(
        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),
         num_anchors), 0))

    return num_anchors, pre_nms_top_n
Esempio n. 8
0
    def decode_batch_with_multi_label_nms_trace(self,
                                                bboxes_in,
                                                scores_in,
                                                criteria=0.45,
                                                max_output=200,
                                                device=0):
        bboxes, probs = self.scale_back_batch(bboxes_in, scores_in, device)

        torch.ops.load_library(
            os.path.join(os.path.dirname(__file__), 'lib',
                         'custom_ops.cpython-37m-x86_64-linux-gnu.so'))

        # bboxes shape  [batch, box num, 4]
        # probs shape   [batch, box num, label num]
        probs = probs.permute(0, 2, 1)
        # probs shape   [batch, label num, box num]

        # remove background
        probs = probs[:, 1:, :]
        selected_indices = torch.ops.roi_ops.multi_label_nms(
            bboxes, probs, torch.full((1, ), max_output, dtype=torch.long),
            torch.full((1, ), criteria, dtype=torch.float),
            torch.full((1, ), 0.05, dtype=torch.float))

        labels = selected_indices[:, 1]
        box_indices = selected_indices[:, 2]
        scores_out = probs.reshape(-1)[labels *
                                       operators.shape_as_tensor(probs)[2] +
                                       box_indices]

        # return top max_output
        num_selected = operators.shape_as_tensor(scores_out)[0].unsqueeze(0)
        k = torch.min(
            torch.cat(
                (torch.tensor([max_output], dtype=torch.long), num_selected),
                0))
        _, max_ids = scores_out.topk(k, dim=0)

        bboxes = bboxes.squeeze(0)[
            box_indices.index_select(0, max_ids), :].unsqueeze(0)
        labels = labels.index_select(0, max_ids).unsqueeze(0) + 1
        scores_out = scores_out.index_select(0, max_ids).unsqueeze(0)

        return bboxes, labels, scores_out
Esempio n. 9
0
def _onnx_get_num_anchors_and_pre_nms_top_n(
        ob: Tensor, orig_pre_nms_top_n: int) -> Tuple[int, int]:
    from torch.onnx import operators

    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
    pre_nms_top_n = torch.min(
        torch.cat((torch.tensor([orig_pre_nms_top_n],
                                dtype=num_anchors.dtype), num_anchors), 0))

    # for mypy we cast at runtime
    return cast(int, num_anchors), cast(int, pre_nms_top_n)
Esempio n. 10
0
File: rpn.py Progetto: delldu/Vision
def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
    from torch.onnx import operators
    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
    # TODO : remove cast to IntTensor/num_anchors.dtype when
    #        ONNX Runtime version is updated with ReduceMin int64 support
    pre_nms_top_n = torch.min(
        torch.cat((torch.tensor([orig_pre_nms_top_n],
                                dtype=num_anchors.dtype), num_anchors),
                  0).to(torch.int32)).to(num_anchors.dtype)

    return num_anchors, pre_nms_top_n
Esempio n. 11
0
    def detections_to_keep_onnx(self, scores):
        from torch.onnx import operators
        number_of_detections = operators.shape_as_tensor(scores)
        number_to_keep = torch.min(
            torch.cat((torch.tensor([self.detections_per_img],
                                    dtype=torch.long), number_of_detections),
                      0))

        _, keep = torch.topk(scores, number_to_keep, dim=0, sorted=True)

        return keep
Esempio n. 12
0
    def forward_export(self, imgs):
        from torch.onnx.operators import shape_as_tensor
        assert self.img_metas, 'Error: forward_export should be called inside forward_export_context'

        img_shape = shape_as_tensor(imgs[0])
        imgs_per_gpu = int(imgs[0].size(0))
        assert imgs_per_gpu == 1
        assert len(self.img_metas[0]
                   ) == imgs_per_gpu, f'self.img_metas={self.img_metas}'
        self.img_metas[0][0]['img_shape'] = img_shape[2:4]

        return self.simple_test(imgs[0], self.img_metas[0], postprocess=False)
Esempio n. 13
0
def _resize_image_onnx(image, self_min_size, self_max_size):
    # type: (Tensor, float, float) -> Tensor
    from torch.onnx import operators
    im_shape = operators.shape_as_tensor(image)[-2:]
    min_size = torch.min(im_shape).to(dtype=torch.float32)
    max_size = torch.max(im_shape).to(dtype=torch.float32)
    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)

    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode="bilinear", recompute_scale_factor=True,
        align_corners=False)[0]

    return image
Esempio n. 14
0
def anchor_generator_forward_patch(self, image_list_tensors,
                                   image_list_image_sizes, feature_maps):

    if torchvision._is_tracing():
        from torch.onnx import operators
        grid_sizes = list([
            operators.shape_as_tensor(feature_map)[-2:]
            for feature_map in feature_maps
        ])
        image_size = operators.shape_as_tensor(image_list_tensors)[-2:]
        strides = [image_size / g for g in grid_sizes]
    else:
        grid_sizes = list(
            [feature_map.shape[-2:] for feature_map in feature_maps])
        image_size = image_list_tensors.shape[-2:]
        strides = [[int(image_size[0] / g[0]),
                    int(image_size[1] / g[1])] for g in grid_sizes]
    # TracerWarning: Converting a tensor to a Python integer

    dtype, device = feature_maps[0].dtype, feature_maps[0].device
    self.set_cell_anchors(dtype, device)
    # return self.cell_anchors

    # Ignore cache first because when we exporting, we only run one batch
    # anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)
    anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
    # return anchors_over_all_feature_maps

    anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
    # for i, (image_height, image_width) in enumerate(image_list.image_sizes):
    # num of images is constant?? loop over a dimension, N, so N can not be dynamic dimension
    for hw in image_list_image_sizes:
        anchors_in_image = []
        for anchors_per_feature_map in anchors_over_all_feature_maps:
            anchors_in_image.append(anchors_per_feature_map)
        anchors.append(anchors_in_image)
    anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
    return anchors
Esempio n. 15
0
def get_shape_from_feature_map(x):
    """Get spatial resolution of input feature map considering exporting to
    onnx mode.

    Args:
        x (torch.Tensor): Input tensor, shape (N, C, H, W)
    Returns:
        torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
    """
    if torch.onnx.is_in_onnx_export():
        img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(
            x.device).float()
    else:
        img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(
            x.device).float()
    return img_shape
Esempio n. 16
0
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        # different behavior during training and during testing:
        # during training, post_nms_top_n is over *all* the proposals combined, while
        # during testing, it is over the proposals for each image
        # NOTE: it should be per image, and not per batch. However, to be consistent
        # with Detectron, the default is per batch (see Issue #672)
        if self.training and self.fpn_post_nms_per_batch:
            objectness = torch.cat(
                [boxlist.get_field("objectness") for boxlist in boxlists],
                dim=0)
            box_sizes = [len(boxlist) for boxlist in boxlists]
            post_nms_top_n = min(self.fpn_post_nms_top_n, len(objectness))
            _, inds_sorted = torch.topk(objectness,
                                        post_nms_top_n,
                                        dim=0,
                                        sorted=True)
            inds_mask = torch.zeros_like(objectness, dtype=torch.uint8)
            inds_mask[inds_sorted] = 1
            inds_mask = inds_mask.split(box_sizes)
            for i in range(num_images):
                boxlists[i] = boxlists[i][inds_mask[i]]
        else:
            for i in range(num_images):
                objectness = boxlists[i].get_field("objectness")

                if self.onnx_export:
                    from torch.onnx import operators
                    objectness_len = operators.shape_as_tensor(objectness)
                    post_nms_top_n = torch.min(
                        torch.cat(
                            (torch.tensor([self.fpn_post_nms_top_n],
                                          dtype=torch.long), objectness_len),
                            0))
                else:
                    post_nms_top_n = min(self.fpn_post_nms_top_n,
                                         len(objectness))

                _, inds_sorted = torch.topk(objectness,
                                            post_nms_top_n,
                                            dim=0,
                                            sorted=True)
                boxlists[i] = boxlists[i][inds_sorted]
        return boxlists
            def forward(self, images_tensors, images_image_sizes, features):

                features = list(features.values())

                objectness, pred_bbox_deltas = self.head(features)

                num_anchors_per_level = [
                    o[0].numel() for o in objectness
                ]  # of anchors / feat level before concat

                from torch.onnx.operators import shape_as_tensor
                num_anchors_per_level_shape_tensors = [
                    shape_as_tensor(o[0]) for o in objectness
                ]
                num_anchors_per_level_fixed = [
                    s[0] * s[1] * s[2]
                    for s in num_anchors_per_level_shape_tensors
                ]

                # print(num_anchors_per_level_shape_tensors)
                # num_anchors_per_level_fixed = [s.prod() for s in num_anchors_per_level_shape_tensors]
                # Could not find an implementation for the node ReduceProd(11)
                # print(num_anchors_per_level_fixed) # A list of tensors

                objectness, pred_bbox_deltas = concat_box_prediction_layers(
                    objectness, pred_bbox_deltas)

                anchors = self.anchor_generator(images_tensors,
                                                images_image_sizes, features)

                proposals = self.box_coder.decode(pred_bbox_deltas.detach(),
                                                  anchors)
                num_images = len(images_image_sizes)
                proposals = proposals.view(num_images, -1, 4)

                # PSX exporting debug
                boxes, scores = self.rpn.filter_proposals(
                    proposals, objectness, images_image_sizes,
                    num_anchors_per_level_fixed)

                return boxes
Esempio n. 18
0
def _get_shape_onnx(image: Tensor) -> Tensor:
    from torch.onnx import operators

    return operators.shape_as_tensor(image)[-2:]
Esempio n. 19
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.onnx_export:
            from torch.onnx import operators
            num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0)

            pre_nms_top_n = torch.min(
                torch.cat((torch.tensor([self.pre_nms_top_n],
                                        dtype=torch.long), num_anchors), 0))
        else:
            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        if self.onnx_export:
            # NOTE: for now only batch == 1 is supported for ONNX export.
            assert topk_idx.size(0) == 1
            topk_idx = topk_idx.squeeze(0)
            box_regression = box_regression.index_select(1, topk_idx)
        else:
            box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        if self.onnx_export:
            concat_anchors = concat_anchors.reshape(N, -1, 4).index_select(
                1, topk_idx)
        else:
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size,
                                         self.onnx_export)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Esempio n. 20
0
    def decode_batch_with_nms_trace(self,
                                    bboxes_in,
                                    scores_in,
                                    criteria=0.45,
                                    max_output=200,
                                    device=0):
        bboxes, probs = self.scale_back_batch(bboxes_in, scores_in, device)

        torch.ops.load_library(
            os.path.join(os.path.dirname(__file__), 'lib',
                         'custom_ops.cpython-37m-x86_64-linux-gnu.so'))

        assert bboxes.size(0) == 1, 'batch size must be 1'
        bboxes = bboxes.squeeze(0)
        probs = probs.squeeze(0)
        # for each label
        bboxes_out = []
        scores_out = []
        labels_out = []
        # bboxes shape  [box num, 4]
        # probs shape   [box num, label num]
        for i in range(probs.size(1)):
            # skip background
            if i == 0:
                continue

            scores_per_label = probs[:, i]
            mask = scores_per_label > 0.05
            bboxes_masked, scores_masked = bboxes[
                mask, :], scores_per_label[mask]
            # print('decode single iter scores masked:', scores_masked, scores_masked.shape)

            num_selected = operators.shape_as_tensor(
                scores_masked)[0].unsqueeze(0)
            k = torch.min(
                torch.cat((torch.tensor([max_output],
                                        dtype=torch.long), num_selected), 0))
            _, sorted_idx = scores_masked.topk(k, dim=0)
            bboxes_masked = bboxes_masked[sorted_idx]
            scores_masked = scores_masked[sorted_idx]

            out_idx = torch.ops.roi_ops.nms(bboxes_masked, scores_masked,
                                            criteria)

            bboxes_out.append(bboxes_masked[out_idx])
            scores_out.append(scores_masked[out_idx])
            labels_out.append(torch.full_like(out_idx, i, dtype=torch.long))
            # print('decode single iter output:', scores_out[-1], labels_out[-1])
        # return top max_output
        bboxes_out = torch.cat(bboxes_out, dim=0)
        labels_out = torch.cat(labels_out, dim=0)
        scores_out = torch.cat(scores_out, dim=0)

        num_selected = operators.shape_as_tensor(scores_out)[0].unsqueeze(0)
        k = torch.min(
            torch.cat(
                (torch.tensor([max_output], dtype=torch.long), num_selected),
                0))
        _, max_ids = scores_out.topk(k, dim=0)

        return bboxes_out[max_ids, :].unsqueeze(0), labels_out[
            max_ids].unsqueeze(0), scores_out[max_ids].unsqueeze(0)
Esempio n. 21
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression, cls):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        ###
        # show heat map
        ###
        # import matplotlib.pyplot as plt
        # import cv2
        # import numpy as np
        # img = cv2.imread("/home/w/workspace/onnx/maskrcnn-benchmark/demo/test_yolo.jpg")
        # img = cv2.resize(img, (416, 416))
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # temp = objectness[:, 0].cpu()[0].numpy() * 255
        # temp = temp.astype(np.uint8)
        # temp = cv2.resize(temp, (416, 416))
        # img = cv2.addWeighted(img, 0.5, temp, 0.5, 1)
        #
        # plt.imshow(img)
        # plt.show()

        ###
        # show heat map end
        ###

        N, AXC, H, W = cls.shape

        C = int(AXC / A)

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        cls = permute_and_flatten(cls, N, A, C, H, W)

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        if self.onnx_export:
            from torch.onnx import operators
            num_anchors = operators.shape_as_tensor(objectness)[1].unsqueeze(0)

            pre_nms_top_n = torch.min(
                torch.cat((torch.tensor([self.pre_nms_top_n],
                                        dtype=torch.long), num_anchors), 0))
        else:
            pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        if self.onnx_export:
            # NOTE: for now only batch == 1 is supported for ONNX export.
            assert topk_idx.size(0) == 1
            topk_idx = topk_idx.squeeze(0)
            box_regression = box_regression.index_select(1, topk_idx)
        else:
            box_regression = box_regression[batch_idx, topk_idx]
            cls = cls[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        if self.onnx_export:
            concat_anchors = concat_anchors.reshape(N, -1, 4).index_select(
                1, topk_idx)
        else:
            concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx,
                                                              topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)
        cls = torch.argmax(cls, -1) + 1
        result = []
        for proposal, score, c, im_shape in zip(proposals, objectness, cls,
                                                image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("scores", score)
            boxlist.add_field("labels", c)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size,
                                         self.onnx_export)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="scores",
            )
            result.append(boxlist)
        return result
Esempio n. 22
0
    def forward(self, images, features, targets=None):
        # type: (ImageList, Dict[str, Tensor], Optional[List[Dict[str, Tensor]]])
        """
        Arguments:
            images (ImageList): images for which we want to compute the predictions
            features (List[Tensor]): features computed from the images that are
                used for computing the predictions. Each tensor in the list
                correspond to different feature levels
            targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional).
                If provided, each element in the dict should contain a field `boxes`,
                with the locations of the ground-truth boxes.

        Returns:
            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
                image.
            losses (Dict[Tensor]): the losses for the model during training. During
                testing, it is an empty dict.
        """
        # RPN uses all feature maps that are available
        features = list(features.values())
        objectness, pred_bbox_deltas = self.head(features)
        anchors = self.anchor_generator(images, features)

        num_images = len(anchors)
        if torchvision._is_tracing():
            # For onnx export(Split in _get_top_n_idx)
            from torch.onnx.operators import shape_as_tensor
            num_anchors_per_level_shape_tensors = [
                shape_as_tensor(o[0]) for o in objectness
            ]
            num_anchors_per_level = [
                s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors
            ]
            # tensor.prod() => ReduceProd. ReduceProd can not be run by current runtime.
            # This is a above is a naive WAR
        else:
            num_anchors_per_level = [o[0].numel() for o in objectness]

        objectness, pred_bbox_deltas = \
            concat_box_prediction_layers(objectness, pred_bbox_deltas)
        # apply pred_bbox_deltas to anchors to obtain the decoded proposals
        # note that we detach the deltas because Faster R-CNN do not backprop through
        # the proposals
        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
        proposals = proposals.view(num_images, -1, 4)
        boxes, scores = self.filter_proposals(proposals, objectness,
                                              images.image_sizes,
                                              num_anchors_per_level)

        losses = {}
        if self.training:
            assert targets is not None
            labels, matched_gt_boxes = self.assign_targets_to_anchors(
                anchors, targets)
            regression_targets = self.box_coder.encode(matched_gt_boxes,
                                                       anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(
                objectness, pred_bbox_deltas, labels, regression_targets)
            losses = {
                "loss_objectness": loss_objectness,
                "loss_rpn_box_reg": loss_rpn_box_reg,
            }
        return boxes, losses