def _get_ground_truth_per_level(self):
        gt_objectness_logits = []
        gt_anchor_deltas = []

        for image_idx, (image_size_i, anchors_i, gt_boxes_i) in enumerate(
            zip(self.image_sizes, self.anchors, self.gt_boxes)
        ):
            gt_objectness_logits_i = []
            gt_anchor_deltas_i = []
            for lvl_anchors in anchors_i:
                match_quality_matrix_lvl = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, lvl_anchors)
                matched_idxs_lvl, gt_objectness_logits_lvl = retry_if_cuda_oom(self.anchor_matcher)(
                    match_quality_matrix_lvl
                )
                gt_objectness_logits_lvl = gt_objectness_logits_lvl.to(device=gt_boxes_i.device)
                del match_quality_matrix_lvl

                if len(gt_boxes_i) == 0:
                    gt_anchor_deltas_lvl = torch.zeros_like(lvl_anchors.tensor)
                else:
                    matched_gt_boxes_lvl = gt_boxes_i[matched_idxs_lvl]
                    gt_anchor_deltas_lvl = self.box2box_transform.get_deltas(
                        lvl_anchors.tensor, matched_gt_boxes_lvl.tensor
                    )
                gt_objectness_logits_i.append(gt_objectness_logits_lvl)
                gt_anchor_deltas_i.append(gt_anchor_deltas_lvl)

            gt_anchor_deltas_i = torch.cat(gt_anchor_deltas_i)
            gt_objectness_logits_i = torch.cat(gt_objectness_logits_i)

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas
Beispiel #2
0
    def label_and_sample_anchors(
        self, anchors: List[Boxes], gt_instances: List[Instances]
    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
        """
        Args:
            anchors (list[Boxes]): anchors for each feature map.
            gt_instances: the ground-truth instances for each image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across all feature maps R = sum(Hi * Wi * A).
                Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative
                class; 1 = positive class.
            list[Tensor]:
                i-th element is a Rx4 tensor. The values are the matched gt boxes for each
                anchor. Values are undefined for those anchors not labeled as 1.
        """
        anchors = Boxes.cat(anchors)

        gt_boxes = [x.gt_boxes for x in gt_instances]
        image_sizes = [x.image_size for x in gt_instances]
        del gt_instances

        gt_labels = []
        matched_gt_boxes = []
        for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """

            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i,
                                                                   anchors)
            matched_idxs, gt_labels_i = retry_if_cuda_oom(
                self.anchor_matcher)(match_quality_matrix)
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
            del match_quality_matrix

            if self.anchor_boundary_thresh >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors.inside_box(
                    image_size_i, self.anchor_boundary_thresh)
                gt_labels_i[~anchors_inside_image] = -1

            # A vector of labels (-1, 0, 1) for each anchor
            gt_labels_i = self._subsample_labels(gt_labels_i)

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
            else:
                # TODO wasted indexing computation for ignored boxes
                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor

            gt_labels.append(gt_labels_i)  # N,AHW
            matched_gt_boxes.append(matched_gt_boxes_i)
        return gt_labels, matched_gt_boxes
Beispiel #3
0
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        # Concatenate anchors from all feature maps into a single Boxes per image
        # anchors_i 是第 i 个 image 上的所有 feature maps 的 anchors, list(Boxes)
        # 把每个 image 上所有 feature maps 的 anchors 连接起来
        anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes, anchors, self.gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors_i)
            # [N, ]
            matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom(self.anchor_matcher)(
                match_quality_matrix
            )
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_objectness_logits_i = gt_objectness_logits_i.to(device=gt_boxes_i.device)
            del match_quality_matrix

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors_i.inside_box(image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes = gt_boxes_i[matched_idxs]
                # [N, 4]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes.tensor
                )

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas
Beispiel #4
0
    def label_and_sample_anchors(
        self, anchors: List[RotatedBoxes], gt_instances: List[Instances]
    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
        """
        Args:
            anchors (list[RotatedBoxes]): anchors for each feature map.
            gt_instances: the ground-truth instances for each image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            list[Tensor]:
                i-th element is a Nx5 tensor, where N is the total number of anchors across
                feature maps.  The values are the matched gt boxes for each anchor.
                Values are undefined for those anchors not labeled as 1.
        """
        anchors = RotatedBoxes.cat(anchors)

        gt_boxes = [x.gt_boxes for x in gt_instances]
        del gt_instances

        gt_labels = []
        matched_gt_boxes = []
        for gt_boxes_i in gt_boxes:
            """
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(
                gt_boxes_i, anchors)
            matched_idxs, gt_labels_i = retry_if_cuda_oom(
                self.anchor_matcher)(match_quality_matrix)
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)

            # A vector of labels (-1, 0, 1) for each anchor
            gt_labels_i = self._subsample_labels(gt_labels_i)

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
            else:
                # TODO wasted indexing computation for ignored boxes
                matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor

            gt_labels.append(gt_labels_i)  # N,AHW
            matched_gt_boxes.append(matched_gt_boxes_i)
        return gt_labels, matched_gt_boxes
Beispiel #5
0
def detector_postprocess(results,
                         output_height,
                         output_width,
                         mask_threshold=0.5):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    scale_x, scale_y = (output_width / results.image_size[1],
                        output_height / results.image_size[0])
    results = Instances((output_height, output_width), **results.get_fields())

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks"):
        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results
Beispiel #6
0
    def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5):
        """
        Args:

        """
        from detectron2.layers import paste_masks_in_image

        paste = retry_if_cuda_oom(paste_masks_in_image)
        bitmasks = paste(
            self.tensor,
            boxes,
            (height, width),
            threshold=threshold,
        )
        return BitMasks(bitmasks)
Beispiel #7
0
    def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5):
        """
        Args: see documentation of :func:`paste_masks_in_image`.
        """
        from detectron2.layers.mask_ops import paste_masks_in_image, _paste_masks_tensor_shape

        if torch.jit.is_tracing():
            if isinstance(height, torch.Tensor):
                paste_func = _paste_masks_tensor_shape
            else:
                paste_func = paste_masks_in_image
        else:
            paste_func = retry_if_cuda_oom(paste_masks_in_image)
        bitmasks = paste_func(self.tensor,
                              boxes.tensor, (height, width),
                              threshold=threshold)
        return BitMasks(bitmasks)
Beispiel #8
0
def detector_postprocess(results: Instances,
                         output_height: int,
                         output_width: int,
                         mask_threshold: float = 0.5):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    # Change to 'if is_tracing' after PT1.7
    if isinstance(output_height, torch.Tensor):
        # Converts integer tensors to float temporaries to ensure true
        # division is performed when computing scale_x and scale_y.
        output_width_tmp = output_width.float()
        output_height_tmp = output_height.float()
        new_size = torch.stack([output_height, output_width])
    else:
        new_size = (output_height, output_width)
        output_width_tmp = output_width
        output_height_tmp = output_height

    scale_x, scale_y = (
        output_width_tmp / results.image_size[1],
        output_height_tmp / results.image_size[0],
    )
    results = Instances(new_size, **results.get_fields())

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes
    else:
        output_boxes = None
    assert output_boxes is not None, "Predictions must contain boxes!"

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks"):
        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results
Beispiel #9
0
    def label_and_sample_anchors(self, anchors: List[Boxes],
                                 gt_instances: List[Instances]):
        """
        Args:
            anchors (list[Boxes]): anchors for each feature map.
            gt_instances: the ground-truth instances for each image.

        Returns:
            list[Tensor]:
                List of #img tensors. i-th element is a vector of labels whose length is
                the total number of anchors across feature maps. Label values are in {-1, 0, 1},
                with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            list[Tensor]:
                i-th element is a Nx4 tensor, where N is the total number of anchors across
                feature maps.  The values are the matched gt boxes for each anchor.
                Values are undefined for those anchors not labeled as 1.
        """
        anchors = Boxes.cat(
            anchors)  ## Boxes obj 2d contains  all of anchors of an image

        ## list[[tensor]...]: each ele represnets gt_boxes in an image.
        gt_boxes = [x.gt_boxes for x in gt_instances]
        image_sizes = [x.image_size for x in gt_instances]
        del gt_instances

        gt_labels = []
        matched_gt_boxes = []
        for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """

            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i,
                                                                   anchors)
            ## get gt_label gt_box_index of every anchor.
            ## gt_label==1 concluding two situations:
            ## 1. iou>0.5
            ## 2. max iou with gt_box
            ## result is the second when the first is confilt with the second.
            matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(
                match_quality_matrix)  ## the len of both is N
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device)
            del match_quality_matrix

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors.inside_box(
                    image_size_i, self.boundary_threshold)
                gt_labels_i[~anchors_inside_image] = -1

            # A vector of labels (-1, 0, 1) for each anchor
            gt_labels_i = self._subsample_labels(gt_labels_i)

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                matched_gt_boxes_i = torch.zeros_like(anchors.tensor)
            else:
                # TODO wasted indexing computation for ignored boxes
                matched_gt_boxes_i = gt_boxes_i[
                    matched_idxs].tensor  ## matched gt_boxes for all of anchors in an image

            gt_labels.append(gt_labels_i)  # N,AHW
            matched_gt_boxes.append(matched_gt_boxes_i)
        return gt_labels, matched_gt_boxes
Beispiel #10
0
def detector_postprocess(
    results,
    output_height,
    output_width,
    mask_threshold=0.5,
    box_score_threshold=0.7,
    nms=False,
):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.
    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.
    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.
    Returns:
        Instances: the resized output from the model, based on the output resolution
    """

    # Converts integer tensors to float temporaries
    #   to ensure true division is performed when
    #   computing scale_x and scale_y.
    if isinstance(output_width, torch.Tensor):
        output_width_tmp = output_width.float()
    else:
        output_width_tmp = output_width

    if isinstance(output_height, torch.Tensor):
        output_height_tmp = output_height.float()
    else:
        output_height_tmp = output_height

    scale_x, scale_y = (
        output_width_tmp / results.image_size[1],
        output_height_tmp / results.image_size[0],
    )
    tmp_dict = results.get_fields()
    selected = tmp_dict["scores"] >= box_score_threshold
    for key in tmp_dict.keys():
        tmp_dict[key] = tmp_dict[key][selected]
    results = Instances((output_height, output_width), **tmp_dict)

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks"):
        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
            nms=nms,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results
Beispiel #11
0
def detector_postprocess_with_anchor(results,
                                     output_height,
                                     output_width,
                                     mask_threshold=0.5):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    scale_x, scale_y = (output_width / results.image_size[1],
                        output_height / results.image_size[0])
    results = Instances((output_height, output_width), **results.get_fields())

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes
    else:
        raise KeyError("key{pred_boxes/proposal_boxes} not found!"
                       "Please check your output boxes.")

    # add
    if results.has("anchors"):
        valid_mask = torch.isfinite(results.anchors.tensor).all(dim=1)
        if not valid_mask.all():
            print(results.anchors.tensor)
        anchor_boxes = results.anchors
        anchor_boxes.scale(scale_x, scale_y)
        anchor_boxes.clip(results.image_size)
    if results.has("proposals"):
        valid_mask = torch.isfinite(results.proposals.tensor).all(dim=1)
        if not valid_mask.all():
            print(results.proposals.tensor)
        proposal_boxes = results.proposals
        proposal_boxes.scale(scale_x, scale_y)
        proposal_boxes.clip(results.image_size)

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks"):
        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results
Beispiel #12
0
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        matched_gt_boxes = []
        matched_idx_all = []
        # Concatenate anchors from all feature maps into a single Boxes per image
        anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_i, (image_size_i, anchors_i, gt_boxes_i) in enumerate(
                zip(self.image_sizes, anchors, self.gt_boxes)):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i,
                                                                   anchors_i)
            matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom(
                self.anchor_matcher)(match_quality_matrix)
            matched_idx_all.append(matched_idxs)
            # Matching is memory-expensive and may result in CPU tensors. But the result is small
            gt_objectness_logits_i = gt_objectness_logits_i.to(
                device=gt_boxes_i.device)
            del match_quality_matrix

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors_i.inside_box(
                    image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
                matched_gt_boxes_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes_i = gt_boxes_i[matched_idxs]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes_i.tensor)

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)
            matched_gt_boxes.append(matched_gt_boxes_i)

        if self.paa:
            with torch.no_grad():
                temp_ious = self.ious
                self.ious = None
                losses = self.losses(use_resample=False,
                                     loss_sum=False,
                                     gt_objectness_logits=gt_objectness_logits,
                                     gt_anchor_deltas=gt_anchor_deltas,
                                     matched_gt_boxes=matched_gt_boxes)
                self.ious = temp_ious

                N = len(gt_objectness_logits)
                gt_objectness_logits = torch.cat(gt_objectness_logits).view(
                    N, -1)

                # code to reshape losses (L x N x H x W x A) to
                # (N x L x H x W x A) to align with gts and anchors
                num_anchors_per_map = [
                    len(anchors_per_level)
                    for anchors_per_level in self.anchors[0]
                ]
                gt_objectness_logits_tmp = torch.split(gt_objectness_logits,
                                                       num_anchors_per_map,
                                                       dim=1)
                gt_objectness_logits_tmp = cat(
                    [x.flatten() for x in gt_objectness_logits_tmp], dim=0)
                pos_idx = gt_objectness_logits_tmp == 1
                ignore_idx = gt_objectness_logits_tmp == -1
                if ignore_idx.sum().item() > 0:
                    print(
                        "For PAA, anchors with ignore label are turned into negatives"
                    )
                    gt_objectness_logits_tmp[ignore_idx] = 0
                loc_loss = losses["loss_rpn_loc"].sum(1)
                loc_loss_full = torch.full(
                    (gt_objectness_logits_tmp.numel(), ),
                    float('inf')).to(device=pos_idx.device)
                loc_loss_full[pos_idx] = loc_loss

                num_anchors_per_map_N = [
                    len(anchors_per_level) * N
                    for anchors_per_level in self.anchors[0]
                ]
                cls_loss = torch.split(losses["loss_rpn_cls"],
                                       num_anchors_per_map_N,
                                       dim=0)
                cls_loss = torch.cat([cl.view(N, -1) for cl in cls_loss],
                                     dim=1)
                loc_loss_full = torch.split(loc_loss_full,
                                            num_anchors_per_map_N,
                                            dim=0)
                loc_loss_full = torch.cat(
                    [ll.view(N, -1) for ll in loc_loss_full], dim=1)
                # end of code to reshape/align losses with gts/anchors

                combined_loss = cls_loss + loc_loss_full
                gt_box_labels = [
                    torch.full((gt_boxes_i.tensor.shape[0], ),
                               1).to(torch.long)
                    for gt_boxes_i in self.gt_boxes
                ]
                (gt_objectness_logits, gt_anchor_deltas,
                 matched_gt_boxes) = self.paa.compute_paa(
                     self.gt_boxes, gt_box_labels, self.anchors,
                     gt_objectness_logits, combined_loss, matched_idx_all)
                matched_gt_boxes = [
                    Boxes(gt_boxes_i) for gt_boxes_i in matched_gt_boxes
                ]

        return gt_objectness_logits, gt_anchor_deltas, matched_gt_boxes
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        # Concatenate anchors from all feature maps into a single Boxes per image
        anchors = [Boxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_idx, (image_size_i, anchors_i, gt_boxes_i) in enumerate(
            zip(self.image_sizes, anchors, self.gt_boxes)
        ):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors_i)
            matched_idxs, gt_objectness_logits_i = retry_if_cuda_oom(self.anchor_matcher)(
                match_quality_matrix
            )
            if self.ignore_ioa:
                if self.ignore_gt_boxes[image_idx].tensor.numel() > 0:
                    if self.ignore_gt_boxes[image_idx].tensor.size(0) > 0:
                        ignore_overlaps = retry_if_cuda_oom(pairwise_ioa)(
                            self.ignore_gt_boxes[image_idx], anchors_i
                        )
                        ignore_overlaps_vals, _ = ignore_overlaps.max(dim=0)
                        gt_objectness_logits_i[
                            (gt_objectness_logits_i != 1) & (ignore_overlaps_vals > 0.5)
                        ] = -1
            # Matching is memory-expensive and may result in CPU tensors. But the result is small

            if self.ignore_ambiguous_sample and match_quality_matrix.size(0) > 1:
                matched_vals, sorted_idx = match_quality_matrix.sort(0, descending=True)
                if len(gt_boxes_i) > 1:
                    # overlap_iou = matched_vals[1, :]
                    overlap_gt_idx = sorted_idx[1, :]
                    gt_density_matrix = pairwise_iou(gt_boxes_i, gt_boxes_i)
                    sorted_matrix, _ = gt_density_matrix.sort(0, descending=True)
                    gt_density = sorted_matrix[1, :]

                    gt_ioa_matrix = retry_if_cuda_oom(pairwise_ioa)(gt_boxes_i, gt_boxes_i)
                    ioa_vals, _ = gt_ioa_matrix.sort(0, descending=True)
                    gt_ioa = ioa_vals[1, :]

                    overlap_iog = calculate_iog(gt_boxes_i.tensor[overlap_gt_idx], anchors_i.tensor)
                    gt_objectness_logits_i[
                        (overlap_iog > 0.5)
                        & (overlap_iog > gt_ioa[matched_idxs])
                        & (gt_density[matched_idxs] > 0.5)
                        & (gt_objectness_logits_i == 1)
                    ] = -1

            gt_objectness_logits_i = gt_objectness_logits_i.to(device=gt_boxes_i.device)
            del match_quality_matrix

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors_i.inside_box(image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes = gt_boxes_i[matched_idxs]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes.tensor
                )

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas