def transform_proposals_seg(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): proposal_topk (int): only keep top-K scoring proposals min_box_size (int): proposals with either side smaller than this threshold are removed The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy() boxes = transforms.apply_box(boxes) boxes = Boxes(boxes) objectness_logits = dataset_dict["proposals"].objectness_logits oh_labels = dataset_dict["proposals"].oh_labels superpixels = dataset_dict["superpixels"].cpu().numpy() boxes.clip(image_shape) # keep = boxes.unique_boxes() # boxes = boxes[keep] # objectness_logits = objectness_logits[keep] keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] oh_labels = oh_labels[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] proposals.oh_labels = oh_labels[:proposal_topk] dataset_dict["proposals"] = proposals # for tfm in transforms: # if isinstance(tfm, HFlipTransform): # superpixels = tfm.apply_segmentation(superpixels) superpixels = transforms.apply_segmentation(superpixels.astype("float32")) dataset_dict["superpixels"] = torch.as_tensor( np.ascontiguousarray(superpixels.astype("int32")))
def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): proposal_topk (int): only keep top-K scoring proposals min_box_size (int): proposals with either side smaller than this threshold are removed The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_boxes" in dataset_dict: # Transform proposal boxes boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, )) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32")) boxes.clip(image_shape) keep = boxes.unique_boxes() boxes = boxes[keep] objectness_logits = objectness_logits[keep] keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): proposal_topk (int): only keep top-K scoring proposals min_box_size (int): proposals with either side smaller than this threshold are removed The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_file" in dataset_dict: return transform_proposals_seg(dataset_dict, image_shape, transforms, proposal_topk=proposal_topk) boxes = dataset_dict["proposals"].proposal_boxes.tensor.cpu().numpy() boxes = transforms.apply_box(boxes) boxes = Boxes(boxes) objectness_logits = dataset_dict["proposals"].objectness_logits boxes.clip(image_shape) # keep = boxes.unique_boxes() # boxes = boxes[keep] # objectness_logits = objectness_logits[keep] keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def find_top_rpn_proposals( proposals, pred_objectness_logits, images, nms_thresh, pre_nms_topk, post_nms_topk, min_box_side_len, training, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps if `training` is True, otherwise, returns the highest `post_nms_topk` scoring proposals for each feature map. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). images (ImageList): Input images as an :class:`ImageList`. nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_side_len (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: proposals (list[Instances]): list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i. """ image_sizes = images.image_sizes # in (h, w) order num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip(itertools.count(), proposals, pred_objectness_logits): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i, ), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] valid_mask = torch.isfinite( boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_side_len) lvl = level_ids if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[ keep], level_ids[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results
def find_top_rpn_proposals( proposals: List[torch.Tensor], pred_objectness_logits: List[torch.Tensor], image_sizes: List[Tuple[int, int]], nms_thresh: float, pre_nms_topk: int, post_nms_topk: int, min_box_size: float, training: bool, ): """ For each feature map, select the `pre_nms_topk` highest scoring proposals, apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` highest scoring proposals among all the feature maps for each image. Args: proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). All proposal predictions on the feature maps. pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). image_sizes (list[tuple]): sizes (h, w) for each image nms_thresh (float): IoU threshold to use for NMS pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is per feature map. post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. When RPN is run on multiple feature maps (as in FPN) this number is total, over all feature maps. min_box_size (float): minimum proposal box side length in pixels (absolute units wrt input images). training (bool): True if proposals are to be used in training, otherwise False. This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." comment. Returns: list[Instances]: list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i, sorted by their objectness score in descending order. """ num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, (proposals_i, logits_i) in enumerate( zip(proposals, pred_objectness_logits)): Hi_Wi_A = logits_i.shape[1] if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk) else: num_proposals_i = min(Hi_Wi_A, pre_nms_topk) # sort is faster than topk: https://github.com/pytorch/pytorch/issues/22812 # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i.narrow(1, 0, num_proposals_i) topk_idx = idx.narrow(1, 0, num_proposals_i) # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i, ), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results: List[Instances] = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] lvl = level_ids valid_mask = torch.isfinite( boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): if training: raise FloatingPointError( "Predicted boxes or scores contain Inf/NaN. Training has diverged." ) boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] lvl = lvl[valid_mask] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_size) if _is_tracing() or keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[ keep], lvl[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) # In Detectron1, there was different behavior during training vs. testing. # (https://github.com/facebookresearch/Detectron/issues/459) # During training, topk is over the proposals from *all* images in the training batch. # During testing, it is over the proposals for each image separately. # As a result, the training behavior becomes batch-dependent, # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. # This bug is addressed in Detectron2 to make the behavior independent of batch size. keep = keep[:post_nms_topk] # keep is already sorted res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results
def boxes_fusion_single_image(boxes, scores, classes, image_shape, nms_thresh=0.5, topk_per_image=-1, method='nms', device="cpu"): assert method in ["nms", "wbf"], f"Not implemented method {method}" assert len(scores) == len(boxes) and len(scores) == len(classes), \ f"Length of boxes, scores, classes is not equal!" # normalize the boxes for i, boxes_per_img in enumerate(boxes): boxes_per_img = Boxes(boxes_per_img) boxes_per_img.clip(image_shape) # filter the width or height < threshold boxes keep = boxes_per_img.nonempty(1.0) boxes_per_img = boxes_per_img[keep] boxes_per_img = boxes_per_img.tensor.cpu().numpy() boxes_per_img[:, 0::2] = boxes_per_img[:, 0::2] / image_shape[1] boxes_per_img[:, 1::2] = boxes_per_img[:, 1::2] / image_shape[0] boxes[i] = boxes_per_img scores[i] = scores[i][keep].cpu().numpy() classes[i] = classes[i][keep].cpu().numpy() # weights = [1.2, 1.2, 1.1, 1.1, 1.0, 1.0] if method == 'nms': boxes, scores, classes = weighted_boxes_fusion( boxes, scores, classes, # weights=weights, iou_thr=nms_thresh) else: # "wbf" boxes, scores, classes = weighted_boxes_fusion( boxes, scores, classes, # weights=weights, iou_thr=nms_thresh, # wbf higher than nms performance better ) if topk_per_image >= 0: boxes, scores, classes = boxes[: topk_per_image], scores[: topk_per_image], classes[: topk_per_image] # resize to image shape boxes[:, 0::2] = boxes[:, 0::2] * image_shape[1] boxes[:, 1::2] = boxes[:, 1::2] * image_shape[0] # to tensor boxes = torch.from_numpy(boxes).to(device=device) scores = torch.from_numpy(scores).to(device=device) classes = torch.from_numpy(classes).to(device=device) result = Instances(image_shape) boxes = Boxes(boxes) boxes.clip(image_shape) result.pred_boxes = boxes result.scores = scores result.pred_classes = classes return result