Exemple #1
0
    def fast_rcnn_inference_single_image(boxes, scores, image_shape,
                                         score_thresh, nms_thresh,
                                         topk_per_image, box_features):
        valid_mask = torch.isfinite(boxes).all(
            dim=1) & torch.isfinite(scores).all(dim=1)
        if not valid_mask.all():
            boxes = boxes[valid_mask]
            scores = scores[valid_mask]
            box_features = box_features[valid_mask]

        scores = scores[:, :-1]
        num_bbox_reg_classes = boxes.shape[1] // 4
        # Convert to Boxes to use the `clip` function ...
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, 4)  # R x C x 4
        max_conf = torch.zeros((boxes.shape[0])).cuda()
        for cls_ind in range(0, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            # dets = torch.cat([boxes, cls_scores.view(-1, 1)], 1)
            keep = nms(boxes, cls_scores, 0.3)
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                         cls_scores[keep], max_conf[keep])
        keep_boxes = torch.where(max_conf >= 0.2)[0]
        if len(keep_boxes) < 36:
            keep_boxes = torch.argsort(max_conf, descending=True)[:36]
        elif len(keep_boxes) > 36:
            keep_boxes = keep_boxes[:36]
        boxes, scores = boxes[keep_boxes], scores[keep_boxes]
        box_features = box_features[keep_boxes]
        result = Instances(image_shape)
        result.pred_boxes = Boxes(boxes)
        result.scores = scores
        result.pred_classes = keep_boxes
        return result, keep_boxes, box_features
Exemple #2
0
    def _get_class_predictions(self, boxes, scores, image_shape):

        num_bbox_reg_classes = boxes.shape[1] // 4

        # Convert to Boxes to use the `clip` function ...
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

        # Filter results based on detection scores
        filter_mask = scores > self.class_score_thresh_test

        # R' x 2. First column contains indices of the R predictions;
        # Second column contains indices of classes.
        class_inds = filter_mask.nonzero()
        if num_bbox_reg_classes == 1:
            boxes = boxes[class_inds[:, 0], 0]
        else:
            boxes = boxes[filter_mask]
        scores = scores[filter_mask]

        # Apply per-class NMS
        keep_class = batched_nms(boxes, scores, class_inds[:, 1],
                                 self.class_nms_thresh_test)
        if self.topk_per_image_test >= 0:
            keep_class = keep_class[:self.topk_per_image_test]

        boxes, scores, class_inds = boxes[keep_class], scores[
            keep_class], class_inds[keep_class]

        return boxes, scores, class_inds
Exemple #3
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R
    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    idxs = torch.arange(num_objs).cuda() * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = max_classes[keep]

    return result, keep
 def relationnet_inference(boxes, scores, targets, image_shapes):
     """
     Args:
         boxes (Tensor): (batch_images, first_n, num_classes, 4)
         scores (Tensor): (batch_images, first_n, num_classes, num_thresh)
         targets (Tensor): (batch_images, first_n, num_classes, num_thresh)
         image_shapes (List[Tuple]): A list of (width, height) tuples for each image in the batch.
     Return:
         result (List[Instances]): 
           - pred_boxes (Boxes): (num_pred, 4)
           - scores (Tensor): (num_pred, num_classes)
           - pred_classes (Tensor): (num_pred,)
         filter_indices (Tensor)
     """
     thresh_idx = int(
         troch.where(self.iou_thresh == self.nms_thresh_test)[0][0])
     batch_images, first_n, num_classes = boxes.shape[:3]
     scores = scores[..., thresh_idx]
     results, filter_indices = [], []
     for batch_idx in range(batch_images):
         filter_idx = targets[batch_idx, :, :, thresh_idx].nonzero()[:, 0]
         result = Instances(image_shapes[batch_idx])
         mask_idx = filter_idx.split(1, dim=1)
         pred_boxes = Boxes(boxes[batch_idx, ...][mask_idx].view(-1, 4))
         pred_boxes.clip(image_shapes[batch_idx])
         result.pred_boxes = pred_boxes
         result.scores = scores[batch_idx, ...][mask_idx]
         result.pred_classes = filter_idx[:, 1]
         results.append(result)
         filter_indices.append(filter_idx)
     return results, filter_indices
Exemple #5
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    Tscores = scores
    #print (scores)
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    #print (scores)

    # Apply per-class NMS
    uniclass = torch.zeros(len(filter_inds[:, 1].tolist())).cuda()
    #keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    keep = batched_nms(boxes, scores, uniclass, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    #print(filter_inds[:, 0])
    #print(torch.ByteTensor([0,1,0,0,1]))
    #print(filter_inds[:, 1])
    #print(keep)
    #print(Tscores[filter_inds[:, 0]])
    #print (scores)
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    #result.scores = scores
    result.scores = Tscores[filter_inds[:, 0]]
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape,
                                     score_thresh, nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    # Make sure boxes and scores don't contain infinite or Nan
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \
                                                  & torch.isfinite(attr_scores).all(dim=1)

    # Get scores from finite boxes and scores
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attr_scores = attr_scores[valid_mask]

    scores = scores[:, :-1]  # Remove background class?
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # If using Attributes class:
    # attributes = Attributes(attributes.reshape(-1, 295))
    # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[
        keep], attr_scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attr_scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     light=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    if type(light) == type(boxes):
        # print(light)
        light = Boxes(light.reshape(-1, 4))
        # light.clip(image_shape)
        light = light.tensor.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
        if type(light) == type(boxes):
            light = light[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
        if type(light) == type(boxes):
            light = light[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    if type(light) == type(boxes):
        light = light[keep]
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    if type(light) == type(boxes):
        result.pred_light = Boxes(light)
    return result, filter_inds[:, 0]
Exemple #8
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     allow_oob=False):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    if not allow_oob:
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    else:
        boxes = boxes.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    from torchvision.ops import nms

    keep = nms(boxes, scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #9
0
def fast_rcnn_inference_single_image(
    boxes,
    scores,
    image_shape: Tuple[int, int],
    score_thresh: float,
    nms_thresh: float,
    topk_per_image: int,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    if len(category_disabler.global_cat_mask) > 0:
        print('<<<<<< category disabler activated >>>>>>')
        scores *= torch.tensor(category_disabler.global_cat_mask).cuda()
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #10
0
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]


    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    # DIOU NMS commented for now
    # keep = batched_diou_nms(boxes, scores, filter_inds[:, 1], nms_thresh) \
    #        if global_cfg.MODEL.ROI_BOX_HEAD.NMS_TYPE == "diou_nms" \
    #        else \
    #        batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #11
0
def transform_proposals_seg(dataset_dict,
                            image_shape,
                            transforms,
                            *,
                            proposal_topk,
                            min_box_size=0):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        proposal_topk (int): only keep top-K scoring proposals
        min_box_size (int): proposals with either side smaller than this
            threshold are removed

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """
    boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy()
    boxes = transforms.apply_box(boxes)
    boxes = Boxes(boxes)
    objectness_logits = dataset_dict["proposals"].objectness_logits

    oh_labels = dataset_dict["proposals"].oh_labels
    superpixels = dataset_dict["superpixels"].cpu().numpy()

    boxes.clip(image_shape)

    # keep = boxes.unique_boxes()
    # boxes = boxes[keep]
    # objectness_logits = objectness_logits[keep]

    keep = boxes.nonempty(threshold=min_box_size)
    boxes = boxes[keep]
    objectness_logits = objectness_logits[keep]
    oh_labels = oh_labels[keep]

    proposals = Instances(image_shape)
    proposals.proposal_boxes = boxes[:proposal_topk]
    proposals.objectness_logits = objectness_logits[:proposal_topk]
    proposals.oh_labels = oh_labels[:proposal_topk]
    dataset_dict["proposals"] = proposals

    # for tfm in transforms:
    # if isinstance(tfm, HFlipTransform):
    # superpixels = tfm.apply_segmentation(superpixels)

    superpixels = transforms.apply_segmentation(superpixels.astype("float32"))
    dataset_dict["superpixels"] = torch.as_tensor(
        np.ascontiguousarray(superpixels.astype("int32")))
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, fc_box_features=None,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    probs = scores.clone().detach()
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]

    # Compact all fc layers into a single tensor to work nicely with Instance class for now
    if fc_box_features is not None:
        fc_box_features = [fc_layer_box_features[filter_inds[:, 0]] for fc_layer_box_features in fc_box_features]
        # will need to know number of layers and dimensions to unpack
        fc_box_features = torch.cat(fc_box_features, dim=1)
        result.fc_box_features = fc_box_features

    probs = probs[filter_inds[:, 0]]
    result.probs = probs

    return result, filter_inds[:, 0]
Exemple #13
0
    def regress_and_classify(self, image: np.ndarray, tracklets: List[Tracklet]) -> Tuple[np.ndarray, np.ndarray]:
        # Convert boxes to proposals
        height, width = image.shape[:2]
        image = self.transform_gen.get_transform(image).apply_image(image)
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        # Size of feature maps, used in the detector
        feat_height, feat_width = image.shape[1:3]
        scale_x = feat_width / width
        scale_y = feat_height / height
        proposal_boxes = Boxes(torch.tensor([tracklet.last_detection.box for tracklet in tracklets]))

        # Scale proposals to the same size as boxes
        proposal_boxes.scale(scale_x, scale_y)
        proposals = Instances((feat_height, feat_width), proposal_boxes=proposal_boxes)

        inputs = {"image": image, "height": height, "width": width, "proposals": proposals}

        images = self.model.preprocess_image([inputs])
        features = self.model.backbone(images.tensor)
        proposals = [inputs["proposals"].to(self.model.device)]

        # Extract features, perform RoI pooling and perform regression/classification for each RoI
        features_list = [features[f] for f in self.model.roi_heads.in_features]

        box_features = self.model.roi_heads.box_pooler(features_list, [x.proposal_boxes for x in proposals])
        box_features = self.model.roi_heads.box_head(box_features)
        pred_class_logits, pred_proposal_deltas = self.model.roi_heads.box_predictor(box_features)
        del box_features

        raw_outputs = FastRCNNOutputs(
            self.model.roi_heads.box_predictor.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            self.model.roi_heads.box_predictor.smooth_l1_beta,
        )

        # Convert raw outputs to predicted boxes and scores
        boxes = raw_outputs.predict_boxes()[0]
        scores = raw_outputs.predict_probs()[0]

        num_bbox_reg_classes = boxes.shape[1] // 4
        boxes = Boxes(boxes.reshape(-1, 4))
        # Scale regressed boxes to the same size as original image
        boxes.clip((feat_height, feat_width))
        boxes.scale(1 / scale_x, 1 / scale_y)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)
        boxes = boxes[:, 0, :]
        scores = scores[:, 0]

        pred_boxes = boxes.detach().cpu().numpy()
        scores = scores.detach().cpu().numpy()
        return pred_boxes, scores
Exemple #14
0
def transform_proposals(dataset_dict,
                        image_shape,
                        transforms,
                        *,
                        proposal_topk,
                        min_box_size=0):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        proposal_topk (int): only keep top-K scoring proposals
        min_box_size (int): proposals with either side smaller than this
            threshold are removed

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """
    if "proposal_boxes" in dataset_dict:
        # Transform proposal boxes
        boxes = transforms.apply_box(
            BoxMode.convert(
                dataset_dict.pop("proposal_boxes"),
                dataset_dict.pop("proposal_bbox_mode"),
                BoxMode.XYXY_ABS,
            ))
        boxes = Boxes(boxes)
        objectness_logits = torch.as_tensor(
            dataset_dict.pop("proposal_objectness_logits").astype("float32"))

        boxes.clip(image_shape)

        keep = boxes.unique_boxes()
        boxes = boxes[keep]
        objectness_logits = objectness_logits[keep]

        keep = boxes.nonempty(threshold=min_box_size)
        boxes = boxes[keep]
        objectness_logits = objectness_logits[keep]

        proposals = Instances(image_shape)
        proposals.proposal_boxes = boxes[:proposal_topk]
        proposals.objectness_logits = objectness_logits[:proposal_topk]
        dataset_dict["proposals"] = proposals
Exemple #15
0
def transform_proposals(dataset_dict,
                        image_shape,
                        transforms,
                        *,
                        proposal_topk,
                        min_box_size=0):
    """
    Apply transformations to the proposals in dataset_dict, if any.

    Args:
        dataset_dict (dict): a dict read from the dataset, possibly
            contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode"
        image_shape (tuple): height, width
        transforms (TransformList):
        proposal_topk (int): only keep top-K scoring proposals
        min_box_size (int): proposals with either side smaller than this
            threshold are removed

    The input dict is modified in-place, with abovementioned keys removed. A new
    key "proposals" will be added. Its value is an `Instances`
    object which contains the transformed proposals in its field
    "proposal_boxes" and "objectness_logits".
    """

    if "proposal_file" in dataset_dict:
        return transform_proposals_seg(dataset_dict,
                                       image_shape,
                                       transforms,
                                       proposal_topk=proposal_topk)

    boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy()
    boxes = transforms.apply_box(boxes)
    boxes = Boxes(boxes)
    objectness_logits = dataset_dict["proposals"].objectness_logits

    boxes.clip(image_shape)

    # keep = boxes.unique_boxes()
    # boxes = boxes[keep]
    # objectness_logits = objectness_logits[keep]

    keep = boxes.nonempty(threshold=min_box_size)
    boxes = boxes[keep]
    objectness_logits = objectness_logits[keep]

    proposals = Instances(image_shape)
    proposals.proposal_boxes = boxes[:proposal_topk]
    proposals.objectness_logits = objectness_logits[:proposal_topk]
    dataset_dict["proposals"] = proposals
Exemple #16
0
def fast_rcnn_inference_single_image(image_shape,
                                     boxes,
                                     scores,
                                     classes=None,
                                     score_thresh=0.05,
                                     nms_thresh=0.5,
                                     topk_per_image=1000):
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    replace_cls = classes is not None
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    if replace_cls:
        classes = classes[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    if replace_cls:
        result.pred_classes = classes[keep]
    else:
        result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #17
0
def fast_rcnn_inference_single_image(
        boxes, scores, image_shape, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    class_distr_scores = scores.clone()
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R
    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    num_objs = torch.arange(num_objs)
    if torch.cuda.is_available():
        num_objs = num_objs.cuda()
    idxs = num_objs * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    class_distr_scores = class_distr_scores[keep]
    # we set the background probability to 0
    class_distr_scores[:, -1] = 0.0
    result.scores = class_distr_scores

    return result, keep
Exemple #18
0
def trend_rcnn_inference_single_image(boxes, scores, attributes, image_shape,
                                      score_thresh, nms_thresh, topk_per_image,
                                      attr_score_thresh, num_attr_classes,
                                      max_attr_pred):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attributes = attributes[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    #print("Printing the number of classes in the box: ", num_bbox_reg_classes)
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    num_attr_reg_classes = attributes.shape[1] // num_attr_classes
    # [ANMOL] this just prints the number of object classes that we have... here its 46
    attributes = attributes.view(-1, num_attr_reg_classes, num_attr_classes)
    # [ANMOL] reshaped the attributes [proposals, objectclass, attrclass]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # filter mask shape is same as score shape: [proposals, obj classes]
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    # there would be more indices/proposals after this compared as more number of scores might be >
    # greater than threshold would be interesting to check how it would work class agnostic attr classification
    # might fail there.. In the current example: R=1000, but R'=45806
    #print("filter ind shape: ", filter_inds.shape)

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    #before this scores shape was [R,num_classes], after filter mask it will just convert to [R']

    if num_attr_reg_classes == 1:
        attributes = attributes[filter_inds[:, 0], 0]
    else:
        attributes = attributes[filter_mask]
    #BOTH of these should produce attribute of shape [R', attr_classes]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds, attributes = boxes[keep], scores[
        keep], filter_inds[keep], attributes[keep]

    attributes[attributes < attr_score_thresh] = 0
    attr_scores_sorted, attr_indices = torch.sort(attributes,
                                                  1,
                                                  descending=True)
    attr_indices[attr_scores_sorted < attr_score_thresh] = 294
    attributes_inds = attr_indices[:, 0:max_attr_pred]
    #del attr_indices

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attributes
    result.attr_classes = attributes_inds
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #19
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     class_logits=None,
                                     estimate_uncertainty=False,
                                     variance=torch.Tensor([])):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.

    # Get box ID with predicted class label: [box id, class label]
    filter_inds = filter_mask.nonzero()

    import numpy as np
    class_id = np.argmax(scores.cpu().numpy(), axis=1)
    class_id = np.array([np.arange(1000), class_id])
    class_id = np.swapaxes(class_id, 1, 0)
    boxes_one_class = boxes[class_id[:, 0], class_id[:, 1], :].cpu().numpy()
    scores_one_class = np.max(scores.cpu().numpy(), axis=1)

    if not class_logits == None:
        class_logits = class_logits[filter_inds[:, 0]]
        predicted_probs = scores[filter_inds[:, 0]]

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores_filtered = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores_filtered, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]

    boxes_final, scores_final, filter_inds_final = boxes[
        keep], scores_filtered[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes_final)
    result.scores = scores_final
    result.pred_classes = filter_inds_final[:, 1]
    # Jamie
    # Save out logits
    if not class_logits == None:
        #result.class_logits = class_logits[filter_inds_final[:,0]]
        result.class_logits = class_logits[keep]
        result.prob_score = predicted_probs[keep]
        #class_logits = class_logits[filter_inds_final[:,0]]
        #result.class_logits = class_logits[keep]

    if estimate_uncertainty:
        # std from 1000 proposals
        #stds = nms_calc_uncertainty(boxes_final.cpu().numpy(), scores_final.cpu().numpy(), boxes_one_class, scores_one_class, 0.75)
        # std from bbox with class confidence score higher than threshold
        stds = nms_calc_uncertainty(boxes_final.cpu().numpy(),
                                    scores_final.cpu().numpy(),
                                    boxes.cpu().numpy(),
                                    scores_filtered.cpu().numpy(), 0.9)
        result.stds = torch.Tensor(stds).cuda()

    if len(variance) > 0:
        result.vars = variance[keep]

    return result, filter_inds_final[:, 0]
Exemple #20
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     objness_scores,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     use_unknown=False,
                                     num_classes=80,
                                     reverse_label_converter=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)

    if reverse_label_converter is not None:
        ignore_void = reverse_label_converter[-1] == -1
    else:
        ignore_void = scores.shape[1] == num_classes + 1

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        objness_scores = objness_scores[valid_mask]

    original_scores = scores.clone()
    if ignore_void:
        scores = scores[:, :-1]
    else:
        scores = scores[:, :-2]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    if scores.shape[1] > num_classes:
        filter_mask = scores[:, :-1] > score_thresh
    else:
        filter_mask = scores > score_thresh  # R x K
    if use_unknown:
        new_filter_mask = filter_mask.sum(-1) < 1
        if original_scores.shape[1] > num_classes + 1 or not ignore_void:
            new_filter_mask = torch.logical_and(
                new_filter_mask,
                original_scores.argmax(-1) == num_classes)
        objness_scores = objness_scores.sigmoid()
        obj_th = 0.500
        unknown_filter_mask = torch.logical_and(new_filter_mask,
                                                objness_scores > obj_th)
        unknown_filter_inds = unknown_filter_mask.nonzero()
        unknown_boxes = boxes[unknown_filter_inds[:, 0], 0]
        unknown_scores = objness_scores[unknown_filter_inds[:, 0]]
        keep = nms(unknown_boxes, unknown_scores, nms_thresh)
        keep = keep[:int(topk_per_image * 0.5)]
        unknown_boxes = unknown_boxes[keep]
        unknown_scores = unknown_scores[keep]
        unknown_filter_inds = unknown_filter_inds[keep]

    if scores.shape[1] > num_classes:
        scores = scores[:, :-1]

    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    if use_unknown:
        boxes = torch.cat((boxes, unknown_boxes), dim=0)
        scores = torch.cat((scores, unknown_scores), dim=0)
        if ignore_void:
            classes = torch.cat((filter_inds[:, 1], -torch.ones(
                len(unknown_scores), device=filter_inds.device).long()),
                                dim=0)
        else:
            classes = torch.cat((filter_inds[:, 1], -2 * torch.ones(
                len(unknown_scores), device=filter_inds.device).long()),
                                dim=0)

    else:
        classes = filter_inds[:, -1]
    if reverse_label_converter is not None:
        classes = reverse_label_converter.to(classes.device)[classes]

    boxes = boxes[:topk_per_image]
    scores = scores[:topk_per_image]
    classes = classes[:topk_per_image]

    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = classes
    inds = filter_inds[:, 0]
    if use_unknown:
        inds = torch.cat((inds, unknown_filter_inds[:, 0]))
    inds = inds[:topk_per_image]
    return result, inds
Exemple #21
0
def eopsn_inference_single_image(
    boxes, scores, image_shape, objness_scores, score_thresh, nms_thresh, topk_per_image,
    use_unknown=False, num_classes=80, reverse_label_converter=None
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)


    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        objness_scores = objness_scores[valid_mask]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K

    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    classes = filter_inds[:,-1]
    classes[classes > len(reverse_label_converter)-1] = -1
    filter_inds[:,-1] = classes

    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    classes = filter_inds[:,-1]
    if reverse_label_converter is not None:
        classes = reverse_label_converter.to(classes.device)[classes]

    boxes = boxes[:topk_per_image]
    scores = scores[:topk_per_image]
    classes = classes[:topk_per_image]

    inds = filter_inds[:,0]
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = classes
    inds = inds[:topk_per_image]
    return result, inds
Exemple #22
0
def find_top_rpn_proposals(
    proposals,
    pred_objectness_logits,
    images,
    nms_thresh,
    pre_nms_topk,
    post_nms_topk,
    min_box_side_len,
    training,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps if `training` is True,
    otherwise, returns the highest `post_nms_topk` scoring proposals for each
    feature map.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        images (ImageList): Input images as an :class:`ImageList`.
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        proposals (list[Instances]): list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i.
    """
    image_sizes = images.image_sizes  # in (h, w) order
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, proposals_i, logits_i in zip(itertools.count(), proposals,
                                               pred_objectness_logits):
        Hi_Wi_A = logits_i.shape[1]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
        topk_idx = idx[batch_idx, :num_proposals_i]

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        valid_mask = torch.isfinite(
            boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
        if not valid_mask.all():
            boxes = boxes[valid_mask]
            scores_per_img = scores_per_img[valid_mask]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_side_len)
        lvl = level_ids
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], level_ids[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results
Exemple #23
0
    def forward_for_single_feature_map(self, locations, box_cls, reg_pred,
                                       image_sizes):
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        # ctrness = ctrness.view(N, 1, H, W).permute(0, 2, 3, 1)
        # ctrness = ctrness.reshape(N, -1).sigmoid()

        # if self.thresh_with_ctr is True, we multiply the classification
        # scores with centerness scores before applying the threshold.
        # if self.thresh_with_ctr:
        #     box_cls = box_cls * ctrness[:, :, None]
        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        # if not self.thresh_with_ctr:
        #     box_cls = box_cls * ctrness[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            boxlist = Instances(image_sizes[i])
            boxes = Boxes(detections)
            boxes.clip(image_sizes[i])
            boxlist.pred_boxes = boxes
            boxlist.scores = torch.sqrt(per_box_cls)
            boxlist.pred_classes = per_class
            boxlist.locations = per_locations

            results.append(boxlist)

        return results
Exemple #24
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     vp_bins=None,
                                     vp=None,
                                     vp_res=None,
                                     rotated_box_training=False,
                                     h=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    if not rotated_box_training or len(boxes) == 0:
        keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    else:
        # BBox with encoding ctr_x,ctr_y,w,l
        if vp is not None and vp_bins is not None:
            _vp = vp.view(-1, num_bbox_reg_classes, vp_bins)  # R x C x bins
            _vp = _vp[filter_mask]
            if len(_vp) > 0:
                _, vp_max = torch.max(_vp, 1)
                vp_filtered = vp_max
                if vp_res is not None:
                    _vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins)
                    _vp_res = _vp_res[filter_mask]
                    vp_res_filtered = list()
                    for i, k in enumerate(vp_max):
                        vp_res_filtered.append(_vp_res[i, k])
                else:
                    vp_filtered = _vp
            rboxes = []
            for i in range(boxes.shape[0]):
                box = boxes[i]
                angle = anglecorrection(vp_res_filtered[i] * 180 / math.pi).to(
                    box.device) if vp_res is not None else bin2ang(
                        vp_filtered[i], vp_bins).to(box.device)
                box = torch.cat((box, angle))
                rboxes.append(box)
            rboxes = torch.cat(rboxes).reshape(-1, 5).to(vp_filtered.device)
            #keep = nms_rotated(rboxes, scores, nms_thresh)
            keep = batched_nms_rotated(rboxes, scores, filter_inds[:, 1],
                                       nms_thresh)
        else:
            boxes[:, :, 2] = boxes[:, :, 2] + boxes[:, :, 0]  #x2
            boxes[:, :, 3] = boxes[:, :, 3] + boxes[:, :, 1]  #y2
            keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    if vp is not None and vp_bins is not None:
        vp = vp.view(-1, num_bbox_reg_classes, vp_bins)  # R x C x bins
        vp = vp[filter_mask]
        vp = vp[keep]
        if vp_res is not None:
            vp_res = vp_res.view(-1, num_bbox_reg_classes, vp_bins)
            vp_res = vp_res[filter_mask]
            vp_res = vp_res[keep]
        if len(vp) > 0:
            _, vp_max = torch.max(vp, 1)
            result.viewpoint = vp_max
            if vp_res is not None:
                vp_res_filtered = list()
                for i, k in enumerate(vp_max):
                    vp_res_filtered.append(vp_res[i, k])
                # This result is directly the yaw orientation predicted
                result.viewpoint_residual = torch.tensor(vp_res_filtered).to(
                    vp_max.device)
        else:
            result.viewpoint = vp
            result.viewpoint_residual = vp_res
    if h is not None:
        h = h.view(-1, num_bbox_reg_classes, 2)  # R x C x bins
        h = h[filter_mask]
        h = h[keep]
        result.height = h
    return result, filter_inds[:, 0]
Exemple #25
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    all_scores = scores.clone()
    all_scores = torch.unsqueeze(all_scores, 0)
    all_boxes = boxes.clone()
    all_boxes = torch.unsqueeze(all_boxes, 0)

    pred_inds = torch.unsqueeze(torch.arange(scores.size(0),
                                             device=scores.device,
                                             dtype=torch.long),
                                dim=1).repeat(1, scores.size(1))

    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        pred_inds = pred_inds[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    pred_inds = pred_inds[:, :-1]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    pred_inds = pred_inds[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    pred_inds = pred_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.pred_inds = pred_inds
    return result, filter_inds[:, 0], all_scores, all_boxes
Exemple #26
0
def fast_rcnn_inference_single_image_with_overlap(
    boxes,
    scores,
    overlap_boxes,
    overlap_probs,
    image_shape,
    score_thresh,
    nms_thresh,
    topk_per_image,
    allow_oob=False,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        overlap_boxes = overlap_boxes[valid_mask]
        overlap_probs = overlap_probs[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    if not allow_oob:
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

        assert (overlap_boxes.size(1) == 4
                ), "overlap boxes prediction has no category, but: {}".format(
                    overlap_boxes.size())
        overlap_boxes = Boxes(overlap_boxes)
        overlap_boxes.clip(image_shape)
        overlap_boxes = overlap_boxes.tensor
    else:
        boxes = boxes.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
        overlap_boxes = overlap_boxes[filter_inds[:, 0]]
    else:
        boxes = boxes[filter_mask]
        overlap_boxes = overlap_boxes[filter_inds[:, 0]]
    scores = scores[filter_mask]
    overlap_probs = overlap_probs[filter_mask]

    # Apply per-class NMS
    self_defined_nms_on = True  # False
    if self_defined_nms_on:
        boxes = np.ascontiguousarray(boxes.cpu())
        scores = np.ascontiguousarray(scores.cpu())
        overlap_probs = np.ascontiguousarray(overlap_probs.cpu())
        overlap_boxes = np.ascontiguousarray(overlap_boxes.cpu())

        keep = batched_noh_nms(boxes,
                               scores,
                               overlap_probs,
                               overlap_boxes,
                               Nt=nms_thresh,
                               thresh=0.01,
                               method=3)

        boxes = torch.from_numpy(boxes).cuda()
        scores = torch.from_numpy(scores).cuda()
        overlap_probs = torch.from_numpy(overlap_probs).cuda()
        overlap_boxes = torch.from_numpy(overlap_boxes).cuda()
        keep = keep[scores[keep].argsort(descending=True)]
    else:
        from torchvision.ops import nms

        keep = nms(boxes, scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, overlap_boxes, overlap_probs, filter_inds = (
        boxes[keep],
        scores[keep],
        overlap_boxes[keep],
        overlap_probs[keep],
        filter_inds[keep],
    )

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.overlap_boxes = Boxes(overlap_boxes)
    result.overlap_probs = overlap_probs
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemple #27
0
    def test_caffe2_pytorch_eq(self):
        ims_per_batch = 8
        post_nms_topk = 100
        detections_per_im = 10
        num_class = 80
        score_thresh = 0.05
        nms_thresh = 0.5

        image_shapes = [torch.Size([800, 800])] * ims_per_batch
        batch_splits = [post_nms_topk] * ims_per_batch

        # NOTE: There're still some unsure minor implementation differences
        # (eg. ordering when equal score across classes) causing some seeds
        # don't pass the test.
        # Thus set a fixed seed to make sure this test passes consistantly.
        rng = torch.Generator()
        rng.manual_seed(42)
        boxes = []
        for n in batch_splits:
            box = 1000.0 * 0.5 * torch.rand(n, num_class, 4,
                                            generator=rng) + 0.001
            box[:, :, -2:] += box[:, :, :2]
            box = box.view(n, num_class * 4)
            boxes.append(box)
        scores = [
            torch.rand(n, num_class + 1, generator=rng) for n in batch_splits
        ]

        ref_results, ref_kept_indices = fast_rcnn_inference(
            boxes,
            scores,
            image_shapes,
            score_thresh=score_thresh,
            nms_thresh=nms_thresh,
            topk_per_image=detections_per_im)
        for result, kept_index, score in zip(ref_results, ref_kept_indices,
                                             scores):
            torch.testing.assert_allclose(
                score[kept_index, result.pred_classes],
                result.scores,
            )

        # clip is done in BBoxTransformOp
        c2_boxes = []
        for box, image_shape in zip(boxes, image_shapes):
            num_bbox_reg_classes = box.shape[1] // 4
            clipped_box = Boxes(box.reshape(-1, 4))
            clipped_box.clip(image_shape)
            clipped_box = clipped_box.tensor.view(-1, num_bbox_reg_classes * 4)
            c2_boxes.append(clipped_box)

        c2_boxes = cat(c2_boxes)
        c2_scores = cat(scores)
        c2_batch_splits = torch.Tensor(batch_splits)

        nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
            c2_scores,
            c2_boxes,
            c2_batch_splits,
            score_thresh=float(score_thresh),
            nms=float(nms_thresh),
            detections_per_im=int(detections_per_im),
            soft_nms_enabled=False,
            soft_nms_method="linear",
            soft_nms_sigma=0.5,
            soft_nms_min_score_thres=0.001,
            rotated=False,
            cls_agnostic_bbox_reg=False,
            input_boxes_include_bg_cls=False,
            output_classes_include_bg_cls=False,
            legacy_plus_one=False,
        )
        roi_score_nms, roi_bbox_nms, roi_class_nms, roi_batch_splits_nms, roi_keeps_nms, roi_keeps_size_nms = nms_outputs  # noqa

        roi_score_nms = roi_score_nms.split(
            roi_batch_splits_nms.int().tolist())
        roi_bbox_nms = roi_bbox_nms.split(roi_batch_splits_nms.int().tolist())
        roi_class_nms = roi_class_nms.split(
            roi_batch_splits_nms.int().tolist())
        roi_keeps_nms = roi_keeps_nms.split(
            roi_batch_splits_nms.int().tolist())

        for _score_nms, _class_nms, _keeps_nms, _score in zip(
                roi_score_nms, roi_class_nms, roi_keeps_nms, scores):
            torch.testing.assert_allclose(
                _score[_keeps_nms.to(torch.int64),
                       _class_nms.to(torch.int64)],
                _score_nms,
            )

        for ref, s, b, c in zip(ref_results, roi_score_nms, roi_bbox_nms,
                                roi_class_nms):
            s1, i1 = s.sort()
            s2, i2 = ref.scores.sort()
            torch.testing.assert_allclose(s1, s2)
            torch.testing.assert_allclose(b[i1], ref.pred_boxes.tensor[i2])
            torch.testing.assert_allclose(
                c.to(torch.int64)[i1], ref.pred_classes[i2])

        for ref, k in zip(ref_kept_indices, roi_keeps_nms):
            # NOTE: order might be different due to implementation
            ref_set = set(ref.tolist())
            k_set = set(k.tolist())
            self.assertEqual(ref_set, k_set)
Exemple #28
0
def fsod_fast_rcnn_inference_single_image(pred_cls, boxes, scores, image_shape,
                                          score_thresh, nms_thresh,
                                          topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fsod_fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fsod_fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        pred_cls = pred_cls[valid_mask]

    scores = scores[:, :-1]

    cls_num = pred_cls.unique().shape[0]
    box_num = int(scores.shape[0] / cls_num)

    scores = scores.reshape(cls_num, box_num).permute(1, 0)
    boxes = boxes.reshape(cls_num, box_num, 4).permute(1, 0,
                                                       2).reshape(box_num, -1)
    pred_cls = pred_cls.reshape(cls_num, box_num).permute(1, 0)

    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    pred_cls = pred_cls[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    #boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    boxes, scores, filter_inds, pred_cls = boxes[keep], scores[
        keep], filter_inds[keep], pred_cls[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    #result.pred_classes = filter_inds[:, 1]
    result.pred_classes = pred_cls

    return result, filter_inds[:, 0]
Exemple #29
0
def fast_rcnn_inference_single_image_recon_recls(boxes,
                                                 scores,
                                                 image_shape,
                                                 score_thresh,
                                                 nms_thresh,
                                                 topk_per_image,
                                                 features,
                                                 mask_pooler,
                                                 mask_head,
                                                 recon_net=None,
                                                 alpha=2,
                                                 recls=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]

    scores = scores[filter_mask]

    # apply recon net
    mask_features = mask_pooler(features, [Boxes(boxes)])
    if mask_head.cfg.MODEL.ROI_HEADS.NAME == "StandardROIHeads":
        pred_mask_logits = mask_head(mask_features)
    else:
        results = Instances(image_shape)
        results.pred_classes = filter_inds[:, 1]

        pred_mask_logits, _, = mask_head(mask_features, [results])

    n = 1
    if recls and pred_mask_logits[0][1].size(0) != 0:
        if recls.rescoring:
            pred_visible_mask_logits = pred_mask_logits[1][1] if len(
                pred_mask_logits) > 1 else pred_mask_logits[0][1]
            pred_visible_mask_logits = get_pred_masks_logits_by_cls(
                pred_visible_mask_logits, filter_inds[:, 1])
            if recls.attention_mode == "mask":
                recls_logits = recls(mask_features * F.avg_pool2d(
                    (pred_visible_mask_logits > 0).float(), 2))
            else:
                recls_logits = recls(mask_features *
                                     F.avg_pool2d(pred_visible_mask_logits, 2))

            recls_prob = torch.softmax(recls_logits, dim=1)

            indices = torch.arange(recls_prob.size(0),
                                   device=recls_prob.device)
            # filter_inds[:, 1] = torch.argmax(recls_logits, dim=1)
            # scores = scores * (recls_logits[0][indices, filter_inds[:, 1]] * 0.3 + 0.7)
            scores = scores * (recls_prob[indices, filter_inds[:, 1]] * 0.4 +
                               0.6)
            n += 1

    if recon_net and pred_mask_logits[0][0].size(0):
        if recon_net.rescoring:
            mode = "normal"

            select = 1 if len(pred_mask_logits) == 2 else 0
            indices = torch.arange(pred_mask_logits[select][0].size(0),
                                   device=pred_mask_logits[select][0].device)
            pred_masks = (pred_mask_logits[select][0][indices,
                                                      filter_inds[:, 1]] >
                          0).unsqueeze(1).float()
            similiarity, recon_logits = get_similarity(pred_masks,
                                                       recon_net,
                                                       filter_inds,
                                                       post_process=mode)

            # similiarity_filter_l = ((scores > 0.6) * (similiarity > 0.8)).nonzero()
            # similiarity_filter_s = ((scores > 0.6) * (similiarity < 0.5)).nonzero()
            # if 64 > len(similiarity_filter_l) > 0:
            #     vis.images(cat([pred_masks[similiarity_filter_l[:, 0]], recon_logits[similiarity_filter_l[:, 0]]], dim=0),
            #                win_name="large similiarity:{}".format(len(similiarity_filter_l)),
            #                nrow=len(similiarity_filter_l[:, 0]))
            # if 64 > len(similiarity_filter_s) > 0:
            #     vis.images(cat([pred_masks[similiarity_filter_s[:, 0]], recon_logits[similiarity_filter_s[:, 0]]], dim=0),
            #                win_name="small similiarity:{}".format(len(similiarity_filter_s)),
            #                nrow=len(similiarity_filter_s[:, 0]))

            # Apply per-class NMS
            # print("sorted simi:{}".format(sorted(np.array(similiarity.cpu()))))
            # print("Scores changed")
            scores = scores * torch.relu(
                torch.log(
                    torch.FloatTensor([alpha]).to(similiarity.device) -
                    similiarity) /
                torch.log(torch.FloatTensor([alpha]).to(similiarity.device)))
            n += 1

    scores = scores**(1 / n)
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    results = Instances(image_shape)
    results.pred_boxes = Boxes(boxes)
    results.scores = scores
    results.pred_classes = filter_inds[:, 1]

    return results, filter_inds[:, 0]
def find_top_rpn_proposals(
    proposals: List[torch.Tensor],
    pred_objectness_logits: List[torch.Tensor],
    image_sizes: List[Tuple[int, int]],
    nms_thresh: float,
    pre_nms_topk: int,
    post_nms_topk: int,
    min_box_size: float,
    training: bool,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps for each image.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        image_sizes (list[tuple]): sizes (h, w) for each image
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_size (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        list[Instances]: list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i, sorted by their
            objectness score in descending order.
    """
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, (proposals_i, logits_i) in enumerate(
            zip(proposals, pred_objectness_logits)):
        Hi_Wi_A = logits_i.shape[1]
        if isinstance(Hi_Wi_A, torch.Tensor):  # it's a tensor in tracing
            num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
        else:
            num_proposals_i = min(Hi_Wi_A, pre_nms_topk)

        # sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i.narrow(1, 0, num_proposals_i)
        topk_idx = idx.narrow(1, 0, num_proposals_i)

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results: List[Instances] = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        lvl = level_ids

        valid_mask = torch.isfinite(
            boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
        if not valid_mask.all():
            if training:
                raise FloatingPointError(
                    "Predicted boxes or scores contain Inf/NaN. Training has diverged."
                )
            boxes = boxes[valid_mask]
            scores_per_img = scores_per_img[valid_mask]
            lvl = lvl[valid_mask]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_size)
        if _is_tracing() or keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], lvl[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]  # keep is already sorted

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results