Beispiel #1
0
def transform_proposals_seg(dataset_dict,
                            image_shape,
                            transforms,
                            *,
                            proposal_topk,
                            min_box_size=0):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        proposal_topk (int): only keep top-K scoring proposals
        min_box_size (int): proposals with either side smaller than this
            threshold are removed

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """
    boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy()
    boxes = transforms.apply_box(boxes)
    boxes = Boxes(boxes)
    objectness_logits = dataset_dict["proposals"].objectness_logits

    oh_labels = dataset_dict["proposals"].oh_labels
    superpixels = dataset_dict["superpixels"].cpu().numpy()

    boxes.clip(image_shape)

    # keep = boxes.unique_boxes()
    # boxes = boxes[keep]
    # objectness_logits = objectness_logits[keep]

    keep = boxes.nonempty(threshold=min_box_size)
    boxes = boxes[keep]
    objectness_logits = objectness_logits[keep]
    oh_labels = oh_labels[keep]

    proposals = Instances(image_shape)
    proposals.proposal_boxes = boxes[:proposal_topk]
    proposals.objectness_logits = objectness_logits[:proposal_topk]
    proposals.oh_labels = oh_labels[:proposal_topk]
    dataset_dict["proposals"] = proposals

    # for tfm in transforms:
    # if isinstance(tfm, HFlipTransform):
    # superpixels = tfm.apply_segmentation(superpixels)

    superpixels = transforms.apply_segmentation(superpixels.astype("float32"))
    dataset_dict["superpixels"] = torch.as_tensor(
        np.ascontiguousarray(superpixels.astype("int32")))
Beispiel #2
0
def transform_proposals(dataset_dict,
                        image_shape,
                        transforms,
                        *,
                        proposal_topk,
                        min_box_size=0):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        proposal_topk (int): only keep top-K scoring proposals
        min_box_size (int): proposals with either side smaller than this
            threshold are removed

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """
    if "proposal_boxes" in dataset_dict:
        # Transform proposal boxes
        boxes = transforms.apply_box(
            BoxMode.convert(
                dataset_dict.pop("proposal_boxes"),
                dataset_dict.pop("proposal_bbox_mode"),
                BoxMode.XYXY_ABS,
            ))
        boxes = Boxes(boxes)
        objectness_logits = torch.as_tensor(
            dataset_dict.pop("proposal_objectness_logits").astype("float32"))

        boxes.clip(image_shape)

        keep = boxes.unique_boxes()
        boxes = boxes[keep]
        objectness_logits = objectness_logits[keep]

        keep = boxes.nonempty(threshold=min_box_size)
        boxes = boxes[keep]
        objectness_logits = objectness_logits[keep]

        proposals = Instances(image_shape)
        proposals.proposal_boxes = boxes[:proposal_topk]
        proposals.objectness_logits = objectness_logits[:proposal_topk]
        dataset_dict["proposals"] = proposals
Beispiel #3
0
def transform_proposals(dataset_dict,
                        image_shape,
                        transforms,
                        *,
                        proposal_topk,
                        min_box_size=0):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        proposal_topk (int): only keep top-K scoring proposals
        min_box_size (int): proposals with either side smaller than this
            threshold are removed

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """

    if "proposal_file" in dataset_dict:
        return transform_proposals_seg(dataset_dict,
                                       image_shape,
                                       transforms,
                                       proposal_topk=proposal_topk)

    boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy()
    boxes = transforms.apply_box(boxes)
    boxes = Boxes(boxes)
    objectness_logits = dataset_dict["proposals"].objectness_logits

    boxes.clip(image_shape)

    # keep = boxes.unique_boxes()
    # boxes = boxes[keep]
    # objectness_logits = objectness_logits[keep]

    keep = boxes.nonempty(threshold=min_box_size)
    boxes = boxes[keep]
    objectness_logits = objectness_logits[keep]

    proposals = Instances(image_shape)
    proposals.proposal_boxes = boxes[:proposal_topk]
    proposals.objectness_logits = objectness_logits[:proposal_topk]
    dataset_dict["proposals"] = proposals
Beispiel #4
0
def find_top_rpn_proposals(
    proposals,
    pred_objectness_logits,
    images,
    nms_thresh,
    pre_nms_topk,
    post_nms_topk,
    min_box_side_len,
    training,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps if `training` is True,
    otherwise, returns the highest `post_nms_topk` scoring proposals for each
    feature map.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        images (ImageList): Input images as an :class:`ImageList`.
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        proposals (list[Instances]): list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i.
    """
    image_sizes = images.image_sizes  # in (h, w) order
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, proposals_i, logits_i in zip(itertools.count(), proposals,
                                               pred_objectness_logits):
        Hi_Wi_A = logits_i.shape[1]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
        topk_idx = idx[batch_idx, :num_proposals_i]

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        valid_mask = torch.isfinite(
            boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
        if not valid_mask.all():
            boxes = boxes[valid_mask]
            scores_per_img = scores_per_img[valid_mask]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_side_len)
        lvl = level_ids
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], level_ids[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results
def find_top_rpn_proposals(
    proposals: List[torch.Tensor],
    pred_objectness_logits: List[torch.Tensor],
    image_sizes: List[Tuple[int, int]],
    nms_thresh: float,
    pre_nms_topk: int,
    post_nms_topk: int,
    min_box_size: float,
    training: bool,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps for each image.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        image_sizes (list[tuple]): sizes (h, w) for each image
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_size (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        list[Instances]: list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i, sorted by their
            objectness score in descending order.
    """
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, (proposals_i, logits_i) in enumerate(
            zip(proposals, pred_objectness_logits)):
        Hi_Wi_A = logits_i.shape[1]
        if isinstance(Hi_Wi_A, torch.Tensor):  # it's a tensor in tracing
            num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
        else:
            num_proposals_i = min(Hi_Wi_A, pre_nms_topk)

        # sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i.narrow(1, 0, num_proposals_i)
        topk_idx = idx.narrow(1, 0, num_proposals_i)

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results: List[Instances] = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        lvl = level_ids

        valid_mask = torch.isfinite(
            boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
        if not valid_mask.all():
            if training:
                raise FloatingPointError(
                    "Predicted boxes or scores contain Inf/NaN. Training has diverged."
                )
            boxes = boxes[valid_mask]
            scores_per_img = scores_per_img[valid_mask]
            lvl = lvl[valid_mask]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_size)
        if _is_tracing() or keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], lvl[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]  # keep is already sorted

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results
Beispiel #6
0
def boxes_fusion_single_image(boxes,
                              scores,
                              classes,
                              image_shape,
                              nms_thresh=0.5,
                              topk_per_image=-1,
                              method='nms',
                              device="cpu"):
    assert method in ["nms", "wbf"], f"Not implemented method {method}"
    assert len(scores) == len(boxes) and len(scores) == len(classes), \
        f"Length of boxes, scores, classes is not equal!"

    # normalize the boxes
    for i, boxes_per_img in enumerate(boxes):
        boxes_per_img = Boxes(boxes_per_img)
        boxes_per_img.clip(image_shape)
        # filter the width or height < threshold boxes
        keep = boxes_per_img.nonempty(1.0)
        boxes_per_img = boxes_per_img[keep]

        boxes_per_img = boxes_per_img.tensor.cpu().numpy()
        boxes_per_img[:, 0::2] = boxes_per_img[:, 0::2] / image_shape[1]
        boxes_per_img[:, 1::2] = boxes_per_img[:, 1::2] / image_shape[0]

        boxes[i] = boxes_per_img
        scores[i] = scores[i][keep].cpu().numpy()
        classes[i] = classes[i][keep].cpu().numpy()

    # weights = [1.2, 1.2, 1.1, 1.1, 1.0, 1.0]
    if method == 'nms':
        boxes, scores, classes = weighted_boxes_fusion(
            boxes,
            scores,
            classes,
            # weights=weights,
            iou_thr=nms_thresh)
    else:  # "wbf"
        boxes, scores, classes = weighted_boxes_fusion(
            boxes,
            scores,
            classes,
            # weights=weights,
            iou_thr=nms_thresh,  # wbf higher than nms performance better
        )

    if topk_per_image >= 0:
        boxes, scores, classes = boxes[:
                                       topk_per_image], scores[:
                                                               topk_per_image], classes[:
                                                                                        topk_per_image]

    # resize to image shape
    boxes[:, 0::2] = boxes[:, 0::2] * image_shape[1]
    boxes[:, 1::2] = boxes[:, 1::2] * image_shape[0]

    # to tensor
    boxes = torch.from_numpy(boxes).to(device=device)
    scores = torch.from_numpy(scores).to(device=device)
    classes = torch.from_numpy(classes).to(device=device)

    result = Instances(image_shape)
    boxes = Boxes(boxes)
    boxes.clip(image_shape)
    result.pred_boxes = boxes
    result.scores = scores
    result.pred_classes = classes

    return result