Beispiel #1
0
 def preprocess_image(self, batched_inputs):
     r"""
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     # images = [img / 255 for img in images]
     images = [self.normalizer(img / 255.0) for img in images]
     images = ImageList.from_tensors(images,
                                     self.backbone.size_divisibility)
     return images
Beispiel #2
0
    def test_rroi_heads(self):
        torch.manual_seed(121)
        cfg = RCNNConfig()
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        # PROPOSAL_GENERATOR: "RRPN"
        # ROI_HEADS: "RROIHeads"
        # ROI_BOX_HEAD.NAME: "FastRCNNConvFCHead"

        def build_box_head(cfg, input_shape):
            return FastRCNNConvFCHead(cfg, input_shape)
        cfg.build_box_head = build_box_head

        cfg.MODEL.RESNETS.DEPTH = 50
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        # currently using DefaultAnchorGenerator in RRPN
        proposal_generator = RRPN(cfg, backbone.output_shape())
        roi_heads = RROIHeads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals, gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.381618499755859),
            "loss_box_reg": torch.tensor(0.0011829272843897343),
        }
        for name in expected_losses.keys():
            err_msg = "detector_losses[{}] = {}, expected losses = {}".format(
                name, detector_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
Beispiel #3
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     images = [x["image"].to(self.device) for x in batched_inputs]
     images = [self.normalizer(x) for x in images]
     images = ImageList.from_tensors(images,
                                     self.backbone.size_divisibility,
                                     pad_ref_long=True,
                                     pad_value=0.0)
     return images
Beispiel #4
0
    def forward(self, images, features, gt_instances=None):
        """
        See :class:`RPN.forward`.
        """
        num_branch = self.num_branch if self.training or not self.trident_fast else 1
        # Duplicate images and gt_instances for all branches in TridentNet.
        all_images = ImageList(torch.cat([images.tensor] * num_branch),
                               images.image_sizes * num_branch)
        all_gt_instances = gt_instances * num_branch if gt_instances is not None else None

        return super(TridentRPN, self).forward(all_images, features,
                                               all_gt_instances)
Beispiel #5
0
    def _caffe2_preprocess_image(self, inputs):
        """
        Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward.
        It normalizes the input images, and the final caffe2 graph assumes the
        inputs have been batched already.
        """
        data, im_info = inputs
        data = alias(data, "data")
        im_info = alias(im_info, "im_info")
        normalized_data = self._wrapped_model.normalizer(data)
        normalized_data = alias(normalized_data, "normalized_data")

        # Pack (data, im_info) into ImageList which is recognized by self.inference.
        images = ImageList(tensor=normalized_data, image_sizes=im_info)
        return images
Beispiel #6
0
    def test_roi_heads(self):
        torch.manual_seed(121)
        cfg = RCNNConfig()
        # PROPOSAL_GENERATOR: "RPN"
        # ROI_HEADS: "StandardROIHeads"
        # ROI_BOX_HEAD: "FastRCNNConvFCHead"
        cfg.MODEL.RESNETS.DEPTH = 50
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)

        def build_box_head(cfg, input_shape):
            return FastRCNNConvFCHead(cfg, input_shape)
        cfg.build_box_head = build_box_head

        backbone = build_backbone(cfg)
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}

        image_shape = (15, 15)
        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
        gt_instance0 = Instances(image_shape)
        gt_instance0.gt_boxes = Boxes(gt_boxes0)
        gt_instance0.gt_classes = torch.tensor([2, 1])
        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32)
        gt_instance1 = Instances(image_shape)
        gt_instance1.gt_boxes = Boxes(gt_boxes1)
        gt_instance1.gt_classes = torch.tensor([1, 2])
        gt_instances = [gt_instance0, gt_instance1]

        proposal_generator = RPN(cfg, backbone.output_shape())
        roi_heads = StandardROIHeads(cfg, backbone.output_shape())

        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
            _, detector_losses = roi_heads(images, features, proposals, gt_instances)

        expected_losses = {
            "loss_cls": torch.tensor(4.4236516953),
            "loss_box_reg": torch.tensor(0.0091214813),
        }
        for name in expected_losses.keys():
            self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]))
Beispiel #7
0
    def forward(self, batched_inputs):
        """
        Args:
            Same as in :class:`GeneralizedRCNN.forward`

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "proposals" whose value is a
                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        features = self.backbone(images.tensor)

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN,
                "'targets' in the model inputs is now renamed to 'instances'!",
                n=10)
            gt_instances = [
                x["targets"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None
        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances)
        # In training, the proposals are not useful at all but we generate them anyway.
        # This makes RPN-only models about 5% slower.
        if self.training:
            return proposal_losses

        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                proposals, batched_inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"proposals": r})
        return processed_results
Beispiel #8
0
    def preprocess_image(self, batched_inputs):
        """
        Normalize, pad and batch the input images.
        """
        images = [x["image"].float().to(self.device) for x in batched_inputs]
        images = [self.normalizer(img) for img in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)

        images_whwh = list()
        for bi in batched_inputs:
            h, w = bi["image"].shape[-2:]
            images_whwh.append(
                torch.tensor([w, h, w, h],
                             dtype=torch.float32,
                             device=self.device))
        images_whwh = torch.stack(images_whwh)

        return images, images_whwh
Beispiel #9
0
    def test_rpn_scriptability(self):
        cfg = RCNNConfig()
        proposal_generator = RPN(cfg, {"res4": ShapeSpec(channels=1024, stride=16)}).eval()
        num_images = 2
        images_tensor = torch.rand(num_images, 30, 40)
        image_sizes = [(32, 32), (30, 40)]
        images = ImageList(images_tensor, image_sizes)
        features = {"res4": torch.rand(num_images, 1024, 1, 2)}

        fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"}
        proposal_generator_ts = export_torchscript_with_instances(proposal_generator, fields)  # noqa

        proposals, _ = proposal_generator(images, features)
        proposals_ts, _ = proposal_generator_ts(images, features)

        for proposal, proposal_ts in zip(proposals, proposals_ts):
            self.assertEqual(proposal.image_size, proposal_ts.image_size)
            self.assertTrue(
                torch.equal(proposal.proposal_boxes.tensor, proposal_ts.proposal_boxes.tensor)
            )
            self.assertTrue(torch.equal(proposal.objectness_logits, proposal_ts.objectness_logits))
Beispiel #10
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
        For now, each item in the list is a dict that contains:
            image: Tensor, image in (C, H, W) format.
            sem_seg: semantic segmentation ground truth
            Other information that's included in the original dicts, such as:
                "height", "width" (int): the output resolution of the model, used in inference.
                    See :meth:`postprocess` for details.
        Returns:
            list[dict]: Each dict is the output for one input image.
                The dict contains one key "sem_seg" whose value is a
                Tensor of the output resolution that represents the
                per-pixel segmentation prediction.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)

        # step_rate: a float, calculated by current_step/total_step,
        #         This parameter is used for Scheduled Drop Path.
        step_rate = self.iter * 1.0 / self.max_iter
        self.iter += 1
        features, expt_flops, real_flops = self.backbone(
            images.tensor, step_rate)

        if "sem_seg" in batched_inputs[0]:
            targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
            targets = ImageList.from_tensors(
                targets, self.backbone.size_divisibility, False,
                self.sem_seg_head.ignore_value).tensor
        else:
            targets = None

        results, losses = self.sem_seg_head(features, targets)
        # calculate flops
        real_flops += self.sem_seg_head.flops
        # remove grad, avoid adding flops to the loss sum
        real_flops = real_flops.detach().requires_grad_(False)
        expt_flops = expt_flops.detach().requires_grad_(False)
        flops = {'real_flops': real_flops, 'expt_flops': expt_flops}
        # use budget constraint for training
        if self.training:
            if self.constrain_on and step_rate >= self.unupdate_rate:
                warm_up_rate = min(1.0,
                                   (step_rate - self.unupdate_rate) / 0.02)
                loss_budget = self.budget_constrint(expt_flops,
                                                    warm_up_rate=warm_up_rate)
                losses.update({'loss_budget': loss_budget})

            losses.update(flops)
            return losses

        processed_results = []
        for result, input_per_image, image_size in zip(results, batched_inputs,
                                                       images.image_sizes):
            height = input_per_image.get("height")
            width = input_per_image.get("width")
            r = sem_seg_postprocess(result, image_size, height, width)
            processed_results.append({"sem_seg": r, "flops": flops})
        return processed_results
Beispiel #11
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
                Each item in the list contains the inputs for one image.

                For now, each item in the list is a dict that contains:

                * "image": Tensor, image in (C, H, W) format.
                * "instances": Instances
                * "sem_seg": semantic segmentation ground truth.
                * Other information that's included in the original dicts, such as:
                  "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                each dict is the results for one image. The dict contains the following keys:

                * "instances": see :meth:`GeneralizedRCNN.forward` for its format.
                * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format.
                * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`.
                  See the return value of
                  :func:`combine_semantic_and_instance_outputs` for its format.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        images = [self.normalizer(x) for x in images]
        images = ImageList.from_tensors(images,
                                        self.backbone.size_divisibility)
        features = self.backbone(images.tensor)

        if "proposals" in batched_inputs[0]:
            proposals = [
                x["proposals"].to(self.device) for x in batched_inputs
            ]
            proposal_losses = {}

        if "sem_seg" in batched_inputs[0]:
            gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
            gt_sem_seg = ImageList.from_tensors(
                gt_sem_seg,
                self.backbone.size_divisibility,
                pad_value=self.sem_seg_head.ignore_value).tensor
        else:
            gt_sem_seg = None
        sem_seg_results, sem_seg_losses = self.sem_seg_head(
            features, gt_sem_seg)

        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None
        if self.proposal_generator:
            proposals, proposal_losses = self.proposal_generator(
                images, features, gt_instances)
        detector_results, detector_losses = self.roi_heads(
            images, features, proposals, gt_instances)

        if self.training:
            losses = {}
            losses.update(sem_seg_losses)
            losses.update({
                k: v * self.instance_loss_weight
                for k, v in detector_losses.items()
            })
            losses.update(proposal_losses)
            return losses

        processed_results = []
        for sem_seg_result, detector_result, input_per_image, image_size in zip(
                sem_seg_results, detector_results, batched_inputs,
                images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height,
                                            width)
            detector_r = detector_postprocess(detector_result, height, width)

            processed_results.append({
                "sem_seg": sem_seg_r,
                "instances": detector_r
            })

            if self.combine_on:
                panoptic_r = combine_semantic_and_instance_outputs(
                    detector_r,
                    sem_seg_r.argmax(dim=0),
                    self.combine_overlap_threshold,
                    self.combine_stuff_area_limit,
                    self.combine_instances_confidence_threshold,
                )
                processed_results[-1]["panoptic_seg"] = panoptic_r
        return processed_results
Beispiel #12
0
    def preprocess_image(self, batched_inputs, training):
        """
        Normalize, pad and batch the input images.
        """
        images = [x["image"].to(self.device) for x in batched_inputs]
        bs = len(images)
        images = [self.normalizer(x) for x in images]

        images = ImageList.from_tensors(images,
                                        size_divisibility=0,
                                        pad_ref_long=True)

        # sync image size for all gpus
        comm.synchronize()
        if training and self.iter % self.change_iter == 0:
            if self.iter < self.max_iter - 20000:
                meg = torch.LongTensor(1).to(self.device)
                comm.synchronize()
                if comm.is_main_process():
                    size = np.random.choice(self.multi_size)
                    meg.fill_(size)

                if comm.get_world_size() > 1:
                    comm.synchronize()
                    dist.broadcast(meg, 0)
                self.size = meg.item()

                comm.synchronize()
            else:
                self.size = 608

        if training:

            # resize image inputs
            modes = ['bilinear', 'nearest', 'bicubic', 'area']
            mode = modes[random.randrange(4)]
            if mode == 'bilinear' or mode == 'bicubic':
                images.tensor = F.interpolate(images.tensor,
                                              size=[self.size, self.size],
                                              mode=mode,
                                              align_corners=False)
            else:
                images.tensor = F.interpolate(images.tensor,
                                              size=[self.size, self.size],
                                              mode=mode)

            if "instances" in batched_inputs[0]:
                gt_instances = [
                    x["instances"].to(self.device) for x in batched_inputs
                ]
            elif "targets" in batched_inputs[0]:
                log_first_n(
                    logging.WARN,
                    "'targets' in the model inputs is now renamed to 'instances'!",
                    n=10)
                gt_instances = [
                    x["targets"].to(self.device) for x in batched_inputs
                ]
            else:
                gt_instances = None

            targets = [
                torch.cat([
                    instance.gt_classes.float().unsqueeze(-1),
                    instance.gt_boxes.tensor
                ],
                          dim=-1) for instance in gt_instances
            ]
            labels = torch.zeros((bs, 100, 5))
            for i, target in enumerate(targets):
                labels[i][:target.shape[0]] = target
            labels[:, :, 1:] = labels[:, :, 1:] / 512. * self.size
        else:
            labels = None

        self.iter += 1
        return images, labels
Beispiel #13
0
    def test_rpn(self):
        torch.manual_seed(121)
        cfg = RCNNConfig()
        # PROPOSAL_GENERATOR: "RPN"
        # ANCHOR_GENERATOR: "DefaultAnchorGenerator"
        cfg.MODEL.RESNETS.DEPTH = 50
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1)
        backbone = build_backbone(cfg)
        proposal_generator = RPN(cfg, backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = Boxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]]
            )

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.0804563984),
            "loss_rpn_loc": torch.tensor(0.0990132466),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)

        expected_proposal_boxes = [
            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
            Boxes(
                torch.tensor(
                    [
                        [0, 0, 30, 20],
                        [0, 0, 16.7862777710, 13.1362524033],
                        [0, 0, 30, 13.3173446655],
                        [0, 0, 10.8602609634, 20],
                        [7.7165775299, 0, 27.3875980377, 20],
                    ]
                )
            ),
        ]

        expected_objectness_logits = [
            torch.tensor([0.1225359365, -0.0133192837]),
            torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]),
        ]

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
        ):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            self.assertTrue(
                torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)
            )
            self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))
Beispiel #14
0
    def test_rrpn(self):
        torch.manual_seed(121)
        cfg = RCNNConfig()
        # PROPOSAL_GENERATOR: "RRPN"
        # ANCHOR_GENERATOR: "RotatedAnchorGenerator"
        cfg.MODEL.RESNETS.DEPTH = 50
        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        # cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        backbone = build_backbone(cfg)
        # currently using DefaultAnchorGenerator in RRPN
        proposal_generator = RRPN(cfg, backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]]
            )

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.043263837695121765),
            "loss_rpn_loc": torch.tensor(0.14432406425476074),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)

        expected_proposal_boxes = [
            RotatedBoxes(
                torch.tensor(
                    [
                        [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873],
                        [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475],
                        [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040],
                        [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227],
                        [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738],
                        [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409],
                        [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737],
                        [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970],
                        [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134],
                        [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086],
                        [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125],
                        [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789],
                    ]
                )
            ),
            RotatedBoxes(
                torch.tensor(
                    [
                        [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899],
                        [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234],
                        [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494],
                        [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994],
                        [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251],
                        [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217],
                        [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078],
                        [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463],
                        [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767],
                        [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884],
                        [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270],
                        [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991],
                        [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784],
                        [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201],
                    ]
                )
            ),
        ]

        expected_objectness_logits = [
            torch.tensor(
                [
                    0.10111768,
                    0.09112845,
                    0.08466332,
                    0.07589971,
                    0.06650183,
                    0.06350251,
                    0.04299347,
                    0.01864817,
                    0.00986163,
                    0.00078543,
                    -0.04573630,
                    -0.04799230,
                ]
            ),
            torch.tensor(
                [
                    0.11373727,
                    0.09377633,
                    0.05281663,
                    0.05143715,
                    0.04040275,
                    0.03250912,
                    0.01307789,
                    0.01177734,
                    0.00038105,
                    -0.00540255,
                    -0.01194804,
                    -0.01461012,
                    -0.03061717,
                    -0.03599222,
                ]
            ),
        ]

        torch.set_printoptions(precision=8, sci_mode=False)

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
        ):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            # It seems that there's some randomness in the result across different machines:
            # This test can be run on a local machine for 100 times with exactly the same result,
            # However, a different machine might produce slightly different results,
            # thus the atol here.
            err_msg = "computed proposal boxes = {}, expected {}".format(
                proposal.proposal_boxes.tensor, expected_proposal_box.tensor
            )
            self.assertTrue(
                torch.allclose(
                    proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5
                ),
                err_msg,
            )

            err_msg = "computed objectness logits = {}, expected {}".format(
                proposal.objectness_logits, expected_objectness_logit
            )
            self.assertTrue(
                torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5),
                err_msg,
            )
Beispiel #15
0
 def f(image_tensor, image_shape: List[Tuple[int, int]]):
     return ImageList(image_tensor, image_shape)