def _get_ground_truth_per_level(self): gt_objectness_logits = [] gt_anchor_deltas = [] for image_idx, (image_size_i, anchors_i, gt_boxes_i) in enumerate( zip(self.image_sizes, self.anchors, self.gt_boxes) ): gt_objectness_logits_i = [] gt_anchor_deltas_i = [] for lvl_anchors in anchors_i: match_quality_matrix_lvl = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, lvl_anchors) matched_idxs_lvl, gt_objectness_logits_lvl = retry_if_cuda_oom(self.anchor_matcher)( match_quality_matrix_lvl ) gt_objectness_logits_lvl = gt_objectness_logits_lvl.to(device=gt_boxes_i.device) del match_quality_matrix_lvl if len(gt_boxes_i) == 0: gt_anchor_deltas_lvl = torch.zeros_like(lvl_anchors.tensor) else: matched_gt_boxes_lvl = gt_boxes_i[matched_idxs_lvl] gt_anchor_deltas_lvl = self.box2box_transform.get_deltas( lvl_anchors.tensor, matched_gt_boxes_lvl.tensor ) gt_objectness_logits_i.append(gt_objectness_logits_lvl) gt_anchor_deltas_i.append(gt_anchor_deltas_lvl) gt_anchor_deltas_i = torch.cat(gt_anchor_deltas_i) gt_objectness_logits_i = torch.cat(gt_objectness_logits_i) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas
def label_and_sample_anchors( self, anchors: List[Boxes], gt_instances: List[Instances] ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: """ Args: anchors (list[Boxes]): anchors for each feature map. gt_instances: the ground-truth instances for each image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across all feature maps R = sum(Hi * Wi * A). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. list[Tensor]: i-th element is a Rx4 tensor. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as 1. """ anchors = Boxes.cat(anchors) gt_boxes = [x.gt_boxes for x in gt_instances] image_sizes = [x.image_size for x in gt_instances] del gt_instances gt_labels = [] matched_gt_boxes = [] for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): """ image_size_i: (h, w) for the i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors) matched_idxs, gt_labels_i = retry_if_cuda_oom( self.anchor_matcher)(match_quality_matrix) # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) del match_quality_matrix if self.anchor_boundary_thresh >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors.inside_box( image_size_i, self.anchor_boundary_thresh) gt_labels_i[~anchors_inside_image] = -1 # A vector of labels (-1, 0, 1) for each anchor gt_labels_i = self._subsample_labels(gt_labels_i) if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background matched_gt_boxes_i = torch.zeros_like(anchors.tensor) else: # TODO wasted indexing computation for ignored boxes matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor gt_labels.append(gt_labels_i) # N,AHW matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, matched_gt_boxes
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4). """ gt_objectness_logits = [] gt_anchor_deltas = [] # Concatenate anchors from all feature maps into a single Boxes per image # anchors_i 是第 i 个 image 上的所有 feature maps 的 anchors, list(Boxes) # 把每个 image 上所有 feature maps 的 anchors 连接起来 anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors] for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes, anchors, self.gt_boxes): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors_i) # [N, ] matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom(self.anchor_matcher)( match_quality_matrix ) # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_objectness_logits_i = gt_objectness_logits_i.to(device=gt_boxes_i.device) del match_quality_matrix if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors_i.inside_box(image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes = gt_boxes_i[matched_idxs] # [N, 4] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes.tensor ) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas
def label_and_sample_anchors( self, anchors: List[RotatedBoxes], gt_instances: List[Instances] ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: """ Args: anchors (list[RotatedBoxes]): anchors for each feature map. gt_instances: the ground-truth instances for each image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across feature maps. Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. list[Tensor]: i-th element is a Nx5 tensor, where N is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as 1. """ anchors = RotatedBoxes.cat(anchors) gt_boxes = [x.gt_boxes for x in gt_instances] del gt_instances gt_labels = [] matched_gt_boxes = [] for gt_boxes_i in gt_boxes: """ gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)( gt_boxes_i, anchors) matched_idxs, gt_labels_i = retry_if_cuda_oom( self.anchor_matcher)(match_quality_matrix) # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) # A vector of labels (-1, 0, 1) for each anchor gt_labels_i = self._subsample_labels(gt_labels_i) if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background matched_gt_boxes_i = torch.zeros_like(anchors.tensor) else: # TODO wasted indexing computation for ignored boxes matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor gt_labels.append(gt_labels_i) # N,AHW matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, matched_gt_boxes
def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) results = Instances((output_height, output_width), **results.get_fields()) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_masks"): results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( results.pred_masks[:, 0, :, :], # N, 1, M, M results.pred_boxes, results.image_size, threshold=mask_threshold, ) if results.has("pred_keypoints"): results.pred_keypoints[:, :, 0] *= scale_x results.pred_keypoints[:, :, 1] *= scale_y return results
def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5): """ Args: """ from detectron2.layers import paste_masks_in_image paste = retry_if_cuda_oom(paste_masks_in_image) bitmasks = paste( self.tensor, boxes, (height, width), threshold=threshold, ) return BitMasks(bitmasks)
def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5): """ Args: see documentation of :func:`paste_masks_in_image`. """ from detectron2.layers.mask_ops import paste_masks_in_image, _paste_masks_tensor_shape if torch.jit.is_tracing(): if isinstance(height, torch.Tensor): paste_func = _paste_masks_tensor_shape else: paste_func = paste_masks_in_image else: paste_func = retry_if_cuda_oom(paste_masks_in_image) bitmasks = paste_func(self.tensor, boxes.tensor, (height, width), threshold=threshold) return BitMasks(bitmasks)
def detector_postprocess(results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ # Change to 'if is_tracing' after PT1.7 if isinstance(output_height, torch.Tensor): # Converts integer tensors to float temporaries to ensure true # division is performed when computing scale_x and scale_y. output_width_tmp = output_width.float() output_height_tmp = output_height.float() new_size = torch.stack([output_height, output_width]) else: new_size = (output_height, output_width) output_width_tmp = output_width output_height_tmp = output_height scale_x, scale_y = ( output_width_tmp / results.image_size[1], output_height_tmp / results.image_size[0], ) results = Instances(new_size, **results.get_fields()) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes else: output_boxes = None assert output_boxes is not None, "Predictions must contain boxes!" output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_masks"): results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( results.pred_masks[:, 0, :, :], # N, 1, M, M results.pred_boxes, results.image_size, threshold=mask_threshold, ) if results.has("pred_keypoints"): results.pred_keypoints[:, :, 0] *= scale_x results.pred_keypoints[:, :, 1] *= scale_y return results
def label_and_sample_anchors(self, anchors: List[Boxes], gt_instances: List[Instances]): """ Args: anchors (list[Boxes]): anchors for each feature map. gt_instances: the ground-truth instances for each image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across feature maps. Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. list[Tensor]: i-th element is a Nx4 tensor, where N is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as 1. """ anchors = Boxes.cat( anchors) ## Boxes obj 2d contains all of anchors of an image ## list[[tensor]...]: each ele represnets gt_boxes in an image. gt_boxes = [x.gt_boxes for x in gt_instances] image_sizes = [x.image_size for x in gt_instances] del gt_instances gt_labels = [] matched_gt_boxes = [] for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): """ image_size_i: (h, w) for the i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors) ## get gt_label gt_box_index of every anchor. ## gt_label==1 concluding two situations: ## 1. iou>0.5 ## 2. max iou with gt_box ## result is the second when the first is confilt with the second. matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)( match_quality_matrix) ## the len of both is N # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) del match_quality_matrix if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors.inside_box( image_size_i, self.boundary_threshold) gt_labels_i[~anchors_inside_image] = -1 # A vector of labels (-1, 0, 1) for each anchor gt_labels_i = self._subsample_labels(gt_labels_i) if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background matched_gt_boxes_i = torch.zeros_like(anchors.tensor) else: # TODO wasted indexing computation for ignored boxes matched_gt_boxes_i = gt_boxes_i[ matched_idxs].tensor ## matched gt_boxes for all of anchors in an image gt_labels.append(gt_labels_i) # N,AHW matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, matched_gt_boxes
def detector_postprocess( results, output_height, output_width, mask_threshold=0.5, box_score_threshold=0.7, nms=False, ): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ # Converts integer tensors to float temporaries # to ensure true division is performed when # computing scale_x and scale_y. if isinstance(output_width, torch.Tensor): output_width_tmp = output_width.float() else: output_width_tmp = output_width if isinstance(output_height, torch.Tensor): output_height_tmp = output_height.float() else: output_height_tmp = output_height scale_x, scale_y = ( output_width_tmp / results.image_size[1], output_height_tmp / results.image_size[0], ) tmp_dict = results.get_fields() selected = tmp_dict["scores"] >= box_score_threshold for key in tmp_dict.keys(): tmp_dict[key] = tmp_dict[key][selected] results = Instances((output_height, output_width), **tmp_dict) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_masks"): results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( results.pred_masks[:, 0, :, :], # N, 1, M, M results.pred_boxes, results.image_size, threshold=mask_threshold, nms=nms, ) if results.has("pred_keypoints"): results.pred_keypoints[:, :, 0] *= scale_x results.pred_keypoints[:, :, 1] *= scale_y return results
def detector_postprocess_with_anchor(results, output_height, output_width, mask_threshold=0.5): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) results = Instances((output_height, output_width), **results.get_fields()) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes else: raise KeyError("key{pred_boxes/proposal_boxes} not found!" "Please check your output boxes.") # add if results.has("anchors"): valid_mask = torch.isfinite(results.anchors.tensor).all(dim=1) if not valid_mask.all(): print(results.anchors.tensor) anchor_boxes = results.anchors anchor_boxes.scale(scale_x, scale_y) anchor_boxes.clip(results.image_size) if results.has("proposals"): valid_mask = torch.isfinite(results.proposals.tensor).all(dim=1) if not valid_mask.all(): print(results.proposals.tensor) proposal_boxes = results.proposals proposal_boxes.scale(scale_x, scale_y) proposal_boxes.clip(results.image_size) output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_masks"): results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( results.pred_masks[:, 0, :, :], # N, 1, M, M results.pred_boxes, results.image_size, threshold=mask_threshold, ) if results.has("pred_keypoints"): results.pred_keypoints[:, :, 0] *= scale_x results.pred_keypoints[:, :, 1] *= scale_y return results
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4). """ gt_objectness_logits = [] gt_anchor_deltas = [] matched_gt_boxes = [] matched_idx_all = [] # Concatenate anchors from all feature maps into a single Boxes per image anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors] for image_i, (image_size_i, anchors_i, gt_boxes_i) in enumerate( zip(self.image_sizes, anchors, self.gt_boxes)): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors_i) matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom( self.anchor_matcher)(match_quality_matrix) matched_idx_all.append(matched_idxs) # Matching is memory-expensive and may result in CPU tensors. But the result is small gt_objectness_logits_i = gt_objectness_logits_i.to( device=gt_boxes_i.device) del match_quality_matrix if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors_i.inside_box( image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) matched_gt_boxes_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes_i = gt_boxes_i[matched_idxs] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes_i.tensor) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) matched_gt_boxes.append(matched_gt_boxes_i) if self.paa: with torch.no_grad(): temp_ious = self.ious self.ious = None losses = self.losses(use_resample=False, loss_sum=False, gt_objectness_logits=gt_objectness_logits, gt_anchor_deltas=gt_anchor_deltas, matched_gt_boxes=matched_gt_boxes) self.ious = temp_ious N = len(gt_objectness_logits) gt_objectness_logits = torch.cat(gt_objectness_logits).view( N, -1) # code to reshape losses (L x N x H x W x A) to # (N x L x H x W x A) to align with gts and anchors num_anchors_per_map = [ len(anchors_per_level) for anchors_per_level in self.anchors[0] ] gt_objectness_logits_tmp = torch.split(gt_objectness_logits, num_anchors_per_map, dim=1) gt_objectness_logits_tmp = cat( [x.flatten() for x in gt_objectness_logits_tmp], dim=0) pos_idx = gt_objectness_logits_tmp == 1 ignore_idx = gt_objectness_logits_tmp == -1 if ignore_idx.sum().item() > 0: print( "For PAA, anchors with ignore label are turned into negatives" ) gt_objectness_logits_tmp[ignore_idx] = 0 loc_loss = losses["loss_rpn_loc"].sum(1) loc_loss_full = torch.full( (gt_objectness_logits_tmp.numel(), ), float('inf')).to(device=pos_idx.device) loc_loss_full[pos_idx] = loc_loss num_anchors_per_map_N = [ len(anchors_per_level) * N for anchors_per_level in self.anchors[0] ] cls_loss = torch.split(losses["loss_rpn_cls"], num_anchors_per_map_N, dim=0) cls_loss = torch.cat([cl.view(N, -1) for cl in cls_loss], dim=1) loc_loss_full = torch.split(loc_loss_full, num_anchors_per_map_N, dim=0) loc_loss_full = torch.cat( [ll.view(N, -1) for ll in loc_loss_full], dim=1) # end of code to reshape/align losses with gts/anchors combined_loss = cls_loss + loc_loss_full gt_box_labels = [ torch.full((gt_boxes_i.tensor.shape[0], ), 1).to(torch.long) for gt_boxes_i in self.gt_boxes ] (gt_objectness_logits, gt_anchor_deltas, matched_gt_boxes) = self.paa.compute_paa( self.gt_boxes, gt_box_labels, self.anchors, gt_objectness_logits, combined_loss, matched_idx_all) matched_gt_boxes = [ Boxes(gt_boxes_i) for gt_boxes_i in matched_gt_boxes ] return gt_objectness_logits, gt_anchor_deltas, matched_gt_boxes
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4). """ gt_objectness_logits = [] gt_anchor_deltas = [] # Concatenate anchors from all feature maps into a single Boxes per image anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors] for image_idx, (image_size_i, anchors_i, gt_boxes_i) in enumerate( zip(self.image_sizes, anchors, self.gt_boxes) ): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors_i) matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom(self.anchor_matcher)( match_quality_matrix ) if self.ignore_ioa: if self.ignore_gt_boxes[image_idx].tensor.numel() > 0: if self.ignore_gt_boxes[image_idx].tensor.size(0) > 0: ignore_overlaps = retry_if_cuda_oom(pairwise_ioa)( self.ignore_gt_boxes[image_idx], anchors_i ) ignore_overlaps_vals, _ = ignore_overlaps.max(dim=0) gt_objectness_logits_i[ (gt_objectness_logits_i != 1) & (ignore_overlaps_vals > 0.5) ] = -1 # Matching is memory-expensive and may result in CPU tensors. But the result is small if self.ignore_ambiguous_sample and match_quality_matrix.size(0) > 1: matched_vals, sorted_idx = match_quality_matrix.sort(0, descending=True) if len(gt_boxes_i) > 1: # overlap_iou = matched_vals[1, :] overlap_gt_idx = sorted_idx[1, :] gt_density_matrix = pairwise_iou(gt_boxes_i, gt_boxes_i) sorted_matrix, _ = gt_density_matrix.sort(0, descending=True) gt_density = sorted_matrix[1, :] gt_ioa_matrix = retry_if_cuda_oom(pairwise_ioa)(gt_boxes_i, gt_boxes_i) ioa_vals, _ = gt_ioa_matrix.sort(0, descending=True) gt_ioa = ioa_vals[1, :] overlap_iog = calculate_iog(gt_boxes_i.tensor[overlap_gt_idx], anchors_i.tensor) gt_objectness_logits_i[ (overlap_iog > 0.5) & (overlap_iog > gt_ioa[matched_idxs]) & (gt_density[matched_idxs] > 0.5) & (gt_objectness_logits_i == 1) ] = -1 gt_objectness_logits_i = gt_objectness_logits_i.to(device=gt_boxes_i.device) del match_quality_matrix if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors_i.inside_box(image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes = gt_boxes_i[matched_idxs] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes.tensor ) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas