def select_instances(self, proposals): """ Implement random sample and intance-level sample for subsequent mask segmentation. These two method proposed by Condinst(conference version), Condinst(journal version) and Boxinst paper. Notes: 1. Random sample method indicates instances are random select ``per batch`` from foreground pixels. Though setting self.max_proposals to a positive number and self.topk_proposals_per_im to -1 value, random sample method is adopted. Default setting is 500 max proposals in Condinst (conference version). 2. Instance-level sample indicates instances are selected ``per image`` depends on topk score of foreground pixels, but each instance at least generates one predicted mask. For one pixel, the score could utilize max score (pred_cls * pred_ctr) across all classes or score at the index of gt class label. Though setting self.max_proposals to -1 and self.topk_proposals_per_im to a positive number, instance-level sample method is adopted. Default setting is 64 proposals per image in Condinst (journal version) and Boxinst paper. """ if self.max_proposals != -1 and len( proposals) > self.max_proposals: # random per batch inds = torch.randperm(len(proposals), device=self.device) proposals = proposals[inds[:self.max_proposals]] elif self.topk_proposals_per_im != -1: # instance-balanced sample per image instances_list_per_gt = [] num_images = max(proposals.im_inds.unique()) + 1 for i in range(num_images): instances_per_image = proposals[proposals.im_inds == i] if len(instances_per_image) == 0: instances_list_per_gt.append(instances_per_image) continue unique_gt_inds = instances_per_image.gt_inds.unique() num_instances_per_gt = max( int(self.topk_proposals_per_im / len(unique_gt_inds)), 1) for gt_ind in unique_gt_inds: instances_per_gt = instances_per_image[ instances_per_image.gt_inds == gt_ind] if len(instances_per_gt) > num_instances_per_gt: # balanced_with_max_score strategy scores = instances_per_gt.pred_logits.sigmoid().max( dim=1)[0] # balanced_with_class_score strategy # gt_cls = instances_per_gt.gt_cls[0] # scores = instances_per_gt.pred_logits.sigmoid()[:, gt_cls] ctrness_pred = instances_per_gt.pred_centerness.sigmoid( )[:, 0] inds = (scores * ctrness_pred).topk( k=num_instances_per_gt, dim=0)[1] instances_per_gt = instances_per_gt[inds] instances_list_per_gt.append(instances_per_gt) proposals = Instances.cat(instances_list_per_gt) return proposals
def proposals_inference(self, box_cls, box_delta, box_center, box_param, shifts, images): proposals = [] box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls] box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta] box_center = [permute_to_N_HWA_K(x, 1) for x in box_center] box_param = [ permute_to_N_HWA_K(x, self.num_gen_params) for x in box_param ] # list[Tensor], one per level, each has shape (N, Hi x Wi, K or 4) for img_idx, shifts_per_image in enumerate(shifts): image_size = images.image_sizes[img_idx] box_cls_per_image = [ box_cls_per_level[img_idx] for box_cls_per_level in box_cls ] box_reg_per_image = [ box_reg_per_level[img_idx] for box_reg_per_level in box_delta ] box_ctr_per_image = [ box_ctr_per_level[img_idx] for box_ctr_per_level in box_center ] box_param_per_image = [ box_param_per_level[img_idx] for box_param_per_level in box_param ] fpn_level_per_image = [ loc.new_ones(len(loc), dtype=torch.long) * level for level, loc in enumerate(shifts_per_image) ] proposals_per_image = self.inference_single_image( box_cls_per_image, box_reg_per_image, box_ctr_per_image, box_param_per_image, shifts_per_image, tuple(image_size), fpn_level_per_image, img_idx) proposals.append(proposals_per_image) proposals = Instances.cat(proposals) return proposals
def merge_branch_instances(instances, num_branch, nms_thrsh, topk_per_image): """ Merge detection results from different branches of TridentNet. Return detection results by applying non-maximum suppression (NMS) on bounding boxes and keep the unsuppressed boxes and other instances (e.g mask) if any. Args: instances (list[Instances]): A list of N * num_branch instances that store detection results. Contain N images and each image has num_branch instances. num_branch (int): Number of branches used for merging detection results for each image. nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. topk_per_image (int): The number of top scoring detections to return. Set < 0 to return all detections. Returns: results: (list[Instances]): A list of N instances, one for each image in the batch, that stores the topk most confidence detections after merging results from multiple branches. """ if num_branch == 1: return instances batch_size = len(instances) // num_branch results = [] for i in range(batch_size): instance = Instances.cat( [instances[i + batch_size * j] for j in range(num_branch)]) # Apply per-class NMS keep = batched_nms(instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thrsh) keep = keep[:topk_per_image] result = instance[keep] results.append(result) return results
def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): """ Augment `proposals` with ground-truth boxes from `gt_boxes`. Args: Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals per image. Returns: Same as `add_ground_truth_to_proposals`, but for only one image. """ device = proposals.objectness_logits.device # Concatenating gt_boxes with proposals requires them to have the same fields # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) gt_proposal = Instances(proposals.image_size) gt_proposal.proposal_boxes = gt_boxes gt_proposal.objectness_logits = gt_logits new_proposals = Instances.cat([proposals, gt_proposal]) return new_proposals