def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): if "proposal_boxes" in dataset_dict: boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, )) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32")) boxes.clip(image_shape) keep = boxes.nonempty(threshold=min_box_size) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def _create_proposals_from_boxes(self, boxes, image_sizes): boxes = [Boxes(b.detach()) for b in boxes] proposals = [] for boxes_per_image, image_size in zip(boxes, image_sizes): boxes_per_image.clip(image_size) if self.training: boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] prop = Instances(image_size) prop.proposal_boxes = boxes_per_image proposals.append(prop) return proposals
def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): device = proposals.objectness_logits.device gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) gt_proposal = Instances(proposals.image_size) gt_proposal.proposal_boxes = gt_boxes gt_proposal.objectness_logits = gt_logits new_proposals = Instances.cat([proposals, gt_proposal]) return new_proposals
def find_top_rpn_proposals( proposals: List[torch.Tensor], pred_objectness_logits: List[torch.Tensor], image_sizes: List[Tuple[int, int]], nms_thresh: float, pre_nms_topk: int, post_nms_topk: int, min_box_size: int, training: bool, ): num_images = len(image_sizes) device = proposals[0].device topk_scores = [] topk_proposals = [] level_ids = [] batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip( itertools.count(), proposals, pred_objectness_logits ): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append( torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device) ) topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) results = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] lvl = level_ids valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): if training: raise FloatingPointError( "Predicted boxes or scores contain Inf/NaN. Training has diverged." ) boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] lvl = lvl[valid_mask] boxes.clip(image_size) keep = boxes.nonempty(threshold=min_box_size) if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) keep = keep[:post_nms_topk] res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results