def __init__( self, box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta=0, ): self.box2box_transform = box2box_transform self.num_preds_per_image = [len(p) for p in proposals] self.pred_class_logits = pred_class_logits self.pred_proposal_deltas = pred_proposal_deltas self.smooth_l1_beta = smooth_l1_beta self.image_shapes = [x.image_size for x in proposals] if len(proposals): box_type = type(proposals[0].proposal_boxes) # cat(..., dim=0) concatenates over all images in the batch self.proposals = box_type.cat( [p.proposal_boxes for p in proposals]) assert (not self.proposals.tensor.requires_grad ), "Proposals should not require gradients!" # The following fields should exist only when training. if proposals[0].has("gt_boxes"): self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals]) assert proposals[0].has("gt_classes") self.gt_classes = cat([p.gt_classes for p in proposals], dim=0) else: self.proposals = Boxes( torch.zeros(0, 4, device=self.pred_proposal_deltas.device)) self._no_instances = len(proposals) == 0 # no instances found
def _create_proposals_from_boxes(self, boxes, image_sizes): # Just like RPN, the proposals should not have gradients boxes = [Boxes(b.detach()) for b in boxes] proposals = [] for boxes_per_image, image_size in zip(boxes, image_sizes): boxes_per_image.clip(image_size) if self.training: # do not filter empty boxes at inference time, # because the scores from each stage need to be aligned and added later boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] prop = Instances(image_size) prop.proposal_boxes = boxes_per_image proposals.append(prop) return proposals
def label_and_sample_proposals( self, proposals: List[Instances], targets: List[Instances]) -> List[Instances]: gt_boxes = [x.gt_boxes for x in targets] # concat gt_boxes, this is a trick you can use to prevent no matched roi in early training. # So, it may due to zero loss in your case. if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) matched_idxs, matched_labels = self.proposal_matcher( match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes) # Set target attributes of the sampled proposals: proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes # We index all the attributes of targets that start with "gt_" # and have not been added to proposals yet (="gt_classes"). if has_gt: sampled_targets = matched_idxs[sampled_idxs] for (trg_name, trg_value) in targets_per_image.get_fields().items(): if trg_name.startswith( "gt_") and not proposals_per_image.has(trg_name): proposals_per_image.set(trg_name, trg_value[sampled_targets]) else: gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(sampled_idxs), 4))) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append( (gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) return proposals_with_gt
def collate_fn(self, batch): img_list, box_list, cls_list, mask_list, meta_list = zip(*batch) assert len(img_list) == len(cls_list) == len(box_list) == len( mask_list) == len(meta_list) batched_inputs = [] for i in range(len(img_list)): _dict = {} _dict['file_name'] = meta_list[i]['file_name'] _dict['height'] = meta_list[i]['height'] _dict['width'] = meta_list[i]['width'] _dict['image'] = img_list[i] _dict['instances'] = Instances( image_size=(meta_list[i]['resize'][0], meta_list[i]['resize'][0])) _dict['instances'].gt_boxes = Boxes(box_list[i]) _dict['instances'].gt_classes = cls_list[i].long() _dict['instances'].gt_masks = BitMasks(mask_list[i]) batched_inputs.append(_dict) return batched_inputs
def _match_and_label_boxes(self, proposals, stage, targets): num_fg_samples, num_bg_samples = [], [] for proposals_per_image, targets_per_image in zip(proposals, targets): match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) # proposal_labels are 0 or 1 matched_idxs, proposal_labels = self.proposal_matchers[stage]( match_quality_matrix) if len(targets_per_image) > 0: gt_classes = targets_per_image.gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[proposal_labels == 0] = self.num_classes gt_boxes = targets_per_image.gt_boxes[matched_idxs] else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(proposals_per_image), 4))) proposals_per_image.gt_classes = gt_classes proposals_per_image.gt_boxes = gt_boxes num_fg_samples.append((proposal_labels == 1).sum().item()) num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) return proposals
def label_and_sample_anchors(self, anchors: List[Boxes], gt_instances: List[Instances]): anchors = Boxes.cat(anchors) gt_boxes = [x.gt_boxes for x in gt_instances] image_sizes = [x.image_size for x in gt_instances] del gt_instances gt_labels = [] matched_gt_boxes = [] for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): """ image_size_i: (h, w) for the i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = pairwise_iou(gt_boxes_i.to('cpu'), anchors.to('cpu')) matched_idxs, gt_labels_i = self.anchor_matcher(match_quality_matrix) # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) del match_quality_matrix if self.boundary_threshold >= 0: anchors_inside_image = anchors.inside_box(image_size_i, self.boundary_threshold) gt_labels_i[~anchors_inside_image] = -1 # A vector of labels (-1, 0, 1) for each anchor gt_labels_i = self._subsample_labels(gt_labels_i) if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background matched_gt_boxes_i = torch.zeros_like(anchors.tensor) else: matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor gt_labels.append(gt_labels_i) # N,AHW matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, matched_gt_boxes
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image): valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all( dim=1) if not valid_mask.all(): boxes = boxes[valid_mask] scores = scores[valid_mask] scores = scores[:, :-1] num_bbox_reg_classes = boxes.shape[1] // 4 # Convert to Boxes to use the `clip` function ... boxes = Boxes(boxes.reshape(-1, 4)) boxes.clip(image_shape) boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 # Filter results based on detection scores filter_mask = scores > score_thresh # R x K # R' x 2. First column contains indices of the R predictions; # Second column contains indices of classes. filter_inds = filter_mask.nonzero() if num_bbox_reg_classes == 1: boxes = boxes[filter_inds[:, 0], 0] else: boxes = boxes[filter_mask] scores = scores[filter_mask] # Apply per-class NMS keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) if topk_per_image >= 0: keep = keep[:topk_per_image] boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] result = Instances(image_shape) result.pred_boxes = Boxes(boxes) result.scores = scores result.pred_classes = filter_inds[:, 1] return result, filter_inds[:, 0]
def find_top_rpn_proposals( proposals: List[torch.Tensor], pred_objectness_logits: List[torch.Tensor], image_sizes: List[Tuple[int, int]], nms_thresh: float, pre_nms_topk: int, post_nms_topk: int, min_box_side_len: int, training: bool, ): """ Returns: list[Instances]: list of N Instances. The i-th Instances stores post_nms_topk object proposals for image i, sorted by their objectness score in descending order. """ num_images = len(image_sizes) device = proposals[0].device # 1. Select top-k anchor for every level and every image topk_scores = [] # #lvl Tensor, each of shape N x topk topk_proposals = [] level_ids = [] # #lvl Tensor, each of shape (topk,) batch_idx = torch.arange(num_images, device=device) for level_id, proposals_i, logits_i in zip( itertools.count(), proposals, pred_objectness_logits ): Hi_Wi_A = logits_i.shape[1] num_proposals_i = min(pre_nms_topk, Hi_Wi_A) # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) logits_i, idx = logits_i.sort(descending=True, dim=1) topk_scores_i = logits_i[batch_idx, :num_proposals_i] topk_idx = idx[batch_idx, :num_proposals_i] # each is N x topk topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 topk_proposals.append(topk_proposals_i) topk_scores.append(topk_scores_i) level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) # 2. Concat all levels together topk_scores = cat(topk_scores, dim=1) topk_proposals = cat(topk_proposals, dim=1) level_ids = cat(level_ids, dim=0) # 3. For each image, run a per-level NMS, and choose topk results. results = [] for n, image_size in enumerate(image_sizes): boxes = Boxes(topk_proposals[n]) scores_per_img = topk_scores[n] lvl = level_ids valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) if not valid_mask.all(): if training: raise FloatingPointError( "Predicted boxes or scores contain Inf/NaN. Training has diverged." ) boxes = boxes[valid_mask] scores_per_img = scores_per_img[valid_mask] lvl = lvl[valid_mask] boxes.clip(image_size) # filter empty boxes keep = boxes.nonempty(threshold=min_box_side_len) if keep.sum().item() != len(boxes): boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep] keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) keep = keep[:post_nms_topk] # keep is already sorted res = Instances(image_size) res.proposal_boxes = boxes[keep] res.objectness_logits = scores_per_img[keep] results.append(res) return results