Beispiel #1
0
 def test_find_rpn_proposals_inf(self):
     N, Hi, Wi, A = 3, 3, 3, 3
     proposals = [torch.rand(N, Hi * Wi * A, 4)]
     pred_logits = [torch.rand(N, Hi * Wi * A)]
     pred_logits[0][1][3:5].fill_(float("inf"))
     find_top_rpn_proposals(proposals, pred_logits, [(10, 10)], 0.5, 1000,
                            1000, 0, False)
Beispiel #2
0
 def func(proposal, logit, image_size):
     r = find_top_rpn_proposals([proposal], [logit], [image_size], 0.7,
                                1000, 1000, 0, False)[0]
     size = r.image_size
     if not isinstance(size, torch.Tensor):
         size = torch.tensor(size)
     return (size, r.proposal_boxes.tensor, r.objectness_logits)
Beispiel #3
0
    def predict_proposals(
            self,
            anchors,
            pred_objectness_logits: List[torch.Tensor],
            pred_anchor_deltas: List[torch.Tensor],
            image_sizes: List[Tuple[int, int]],
    ):
        """
        Decode all the predicted box regression deltas to proposals. Find the top proposals
        by applying NMS and removing boxes that are too small.

        Returns:
            proposals (list[Instances]): list of N Instances. The i-th Instances
                stores post_nms_topk object proposals for image i, sorted by their
                objectness score in descending order.
        """
        # The proposals are treated as fixed for approximate joint training with roi heads.
        # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that
        # are also network responses, so is approximate.
        pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
        return find_top_rpn_proposals(
            pred_proposals,
            pred_objectness_logits,
            image_sizes,
            self.nms_thresh,
            self.pre_nms_topk[self.training],
            self.post_nms_topk[self.training],
            self.min_box_size,
            self.training,
        )
Beispiel #4
0
    def forward(self, images, features, gt_instances=None, sizes=None):
        """
        Args:
            images (ImageList): input images of length `N`
            features (dict[str: Tensor]): input data as a mapping from feature
                map name to tensor. Axis 0 represents the number of images `N` in
                the input data; axes 1-3 are channels, height, and width, which may
                vary between feature maps (e.g., if a feature pyramid is used).
            gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
                Each `Instances` stores ground-truth instances for the corresponding image.

        Returns:
            proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits"
            loss: dict[Tensor] or None
        """

        features = [features[f] for f in self.in_features]
        pred_in_object_logits = []
        pred_offsets = []
        for x in features:
            t = F.relu(self.conv(x))
            pred_in_object_logits.append(self.in_object_logits(t))
            pred_offsets.append(self.rescale(self.offsets(t), t, images))

        outputs = PointsProposalOutputs(images,
                                        pred_in_object_logits,
                                        pred_offsets,
                                        sizes,
                                        strides=self.strides)

        if self.training:
            # losses = {k: v * self.loss_weight for k, v in outputs.losses().items()}
            losses = {k: v for k, v in outputs.losses().items()}
        else:
            losses = {}

        with torch.no_grad():
            # Find the top proposals by applying NMS and removing boxes that
            # are too small. The proposals are treated as fixed for approximate
            # joint training with roi heads. This approach ignores the derivative
            # w.r.t. the proposal boxes’ coordinates that are also network
            # responses, so is approximate.
            proposals = find_top_rpn_proposals(
                outputs.predict_proposals(),
                outputs.predict_objectness_logits(),
                images,
                self.nms_thresh,
                self.pre_nms_topk[self.training],
                self.post_nms_topk[self.training],
                self.min_box_side_len,
                self.training,
            )

        storage = get_event_storage()
        storage.clear_images()
        return None, losses
Beispiel #5
0
    def forward(self, images, features, gt_instances=None):
        """
        Args:
            images (ImageList): input images of length `N`
            features (dict[str: Tensor]): input data as a mapping from feature
                map name to tensor. Axis 0 represents the number of images `N` in
                the input data; axes 1-3 are channels, height, and width, which may
                vary between feature maps (e.g., if a feature pyramid is used).
            gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
                Each `Instances` stores ground-truth instances for the corresponding image.

        Returns:
            proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits"
            loss: dict[Tensor] or None
        """
        features = [features[f] for f in self.in_features]

        pred_objectness_logits, pred_deltas = self.init_head(features)
        torch.cuda.synchronize()

        point_centers, strides = self.get_center_grid(features)
        point_centers = point_centers.to(pred_deltas[0].device)
        strides = strides.to(pred_deltas[0].device)
        pred_boxes = self.init_head.points2bbox(point_centers, pred_deltas)
        if self.training:
            # (N, H*W*L), (N, H*W*L, 4)
            gt_labels, gt_boxes = self.label_and_sample_points(
                point_centers, gt_instances, strides)
        else:
            gt_labels, gt_boxes = None, None

        outputs = RepPointsGeneratorResult(
            pred_objectness_logits,
            pred_boxes,
            gt_labels,
            gt_boxes
        )

        if self.training:
            losses = {k: v * self.loss_weight for k, v in outputs.losses(strides).items()}
        else:
            losses = {}

        proposals = outputs.predict_proposals()
        logits = outputs.predict_objectness_logits()
        if self.debug:
            storage = get_event_storage()
            start = 0
            for i, f in enumerate(features):
                h, w = f.shape[-2:]
                centers = point_centers[start:start + h * w].view(h, w, 2)
                stride = strides[start:start + h * w].view(h, w)
                storage.put_image("centers_x-%d" % i, (centers[..., 0:1] / centers[..., 0:1].max()).permute(2, 0, 1))
                storage.put_image("centers_y-%d" % i, (centers[..., 1:] / centers[..., 1:].max()).permute(2, 0, 1))
                storage.put_image("strides-%d" % i, (stride[None] / 64).float())

                gt_label = gt_labels[0, start:start + h * w].view(1, h, w)
                storage.put_image("gt-labels-%d" % i, gt_label.float())

                storage.put_image("pred-logits-%d" % i, torch.sigmoid(logits[i][0].view(1, h, w)))

                start += h * w
            # storage.clear_images()

        with torch.no_grad():
            # Find the top proposals by applying NMS and removing boxes that
            # are too small. The proposals are treated as fixed for approximate
            # joint training with roi heads. This approach ignores the derivative
            # w.r.t. the proposal boxes’ coordinates that are also network
            # responses, so is approximate.
            proposals = find_top_rpn_proposals(
                proposals,
                logits,
                images,
                self.nms_thresh,
                self.pre_nms_topk[self.training],
                self.post_nms_topk[self.training],
                self.min_box_side_len,
                self.training,
            )

        return proposals, losses