Exemple #1
0
    def test_fast_rcnn_rotated(self):
        torch.manual_seed(132)
        box_head_output_size = 8

        box_predictor = RotatedFastRCNNOutputLayers(
            ShapeSpec(channels=box_head_output_size),
            box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5,
                                                               1)),
            num_classes=5,
        )
        feature_pooled = torch.rand(2, box_head_output_size)
        predictions = box_predictor(feature_pooled)
        proposal_boxes = torch.tensor(
            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]],
            dtype=torch.float32)
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]],
                                dtype=torch.float32)
        proposal = Instances((10, 10))
        proposal.proposal_boxes = RotatedBoxes(proposal_boxes)
        proposal.gt_boxes = RotatedBoxes(gt_boxes)
        proposal.gt_classes = torch.tensor([1, 2])

        with EventStorage():  # capture events in a new storage to discard them
            losses = box_predictor.losses(predictions, [proposal])

        # Note: the expected losses are slightly different even if
        # the boxes are essentially the same as in the FastRCNNOutput test, because
        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
        # between the two cases.
        expected_losses = {
            "loss_cls": torch.tensor(1.7920907736),
            "loss_box_reg": torch.tensor(4.0410838127),
        }
        for name in expected_losses.keys():
            assert torch.allclose(losses[name], expected_losses[name])
    def test_reconstruction(self):
        weights = (5, 5, 10, 10, 1)
        b2b_transform = Box2BoxTransformRotated(weights=weights)
        src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
        dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)

        devices = [torch.device("cpu")]
        if torch.cuda.is_available():
            devices.append(torch.device("cuda"))
        for device in devices:
            src_boxes = src_boxes.to(device=device)
            dst_boxes = dst_boxes.to(device=device)
            deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
            dst_boxes_reconstructed = b2b_transform.apply_deltas(
                deltas, src_boxes)
            assert torch.allclose(dst_boxes[:, :4],
                                  dst_boxes_reconstructed[:, :4],
                                  atol=1e-5)
            # angle difference has to be normalized
            assert torch.allclose(
                (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) %
                360.0 - 180.0,
                torch.zeros_like(dst_boxes[:, 4]),
                atol=1e-4,
            )
Exemple #3
0
 def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
     super().__init__(cfg, input_shape)
     self.box2box_transform = Box2BoxTransformRotated(
         weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
     if self.boundary_threshold >= 0:
         raise NotImplementedError(
             "boundary_threshold is a legacy option not implemented for RRPN."
         )
    def test_fast_rcnn_rotated(self):
        torch.manual_seed(132)
        cfg = get_cfg()
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
        box2box_transform = Box2BoxTransformRotated(
            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)

        box_head_output_size = 8
        num_classes = 5
        cls_agnostic_bbox_reg = False

        box_predictor = FastRCNNOutputLayers(box_head_output_size,
                                             num_classes,
                                             cls_agnostic_bbox_reg,
                                             box_dim=5)
        feature_pooled = torch.rand(2, box_head_output_size)
        pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled)
        image_shape = (10, 10)
        proposal_boxes = torch.tensor(
            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]],
            dtype=torch.float32)
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]],
                                dtype=torch.float32)
        result = Instances(image_shape)
        result.proposal_boxes = RotatedBoxes(proposal_boxes)
        result.gt_boxes = RotatedBoxes(gt_boxes)
        result.gt_classes = torch.tensor([1, 2])
        proposals = []
        proposals.append(result)
        smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA

        outputs = FastRCNNOutputs(box2box_transform, pred_class_logits,
                                  pred_proposal_deltas, proposals,
                                  smooth_l1_beta)
        with EventStorage():  # capture events in a new storage to discard them
            losses = outputs.losses()

        # Note: the expected losses are slightly different even if
        # the boxes are essentially the same as in the FastRCNNOutput test, because
        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
        # between the two cases.
        expected_losses = {
            "loss_cls": torch.tensor(1.7920907736),
            "loss_box_reg": torch.tensor(4.0410838127),
        }
        for name in expected_losses.keys():
            assert torch.allclose(losses[name], expected_losses[name])
Exemple #5
0
    def __init__(self, cfg):
        super().__init__()
        # fmt: off
        self.num_classes              = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features              = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha         = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma         = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta      = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        self.box_reg_loss_type        = cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE
        # Inference parameters:
        self.score_threshold          = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates          = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold            = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period               = cfg.VIS_PERIOD
        self.input_format             = cfg.INPUT.FORMAT
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaNetHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS)
        self.anchor_matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        """
        In Detectron1, loss is normalized by number of foreground samples in the batch.
        When batch size is 1 per GPU, #foreground has a large variance and
        using it lead to lower performance. Here we maintain an EMA of #foreground to
        stabilize the normalizer.
        """
        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
Exemple #6
0
    def test_reconstruction(self):
        weights = (5, 5, 10, 10, 1)
        b2b_transform = Box2BoxTransformRotated(weights=weights)
        src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
        dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)

        devices = [torch.device("cpu")]
        if torch.cuda.is_available():
            devices.append(torch.device("cuda"))
        for device in devices:
            src_boxes = src_boxes.to(device=device)
            dst_boxes = dst_boxes.to(device=device)
            deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
            dst_boxes_reconstructed = b2b_transform.apply_deltas(
                deltas, src_boxes)
            assert torch.allclose(dst_boxes,
                                  dst_boxes_reconstructed,
                                  atol=1e-5)
Exemple #7
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.head = RetinaNetHead(cfg, feature_shapes)

        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransformRotated(
            weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #8
0
 def from_config(cls, cfg, input_shape):
     args = super().from_config(cfg, input_shape)
     args["box2box_transform"] = Box2BoxTransformRotated(
         weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
     return args