def test_fast_rcnn_rotated(self): torch.manual_seed(132) box_head_output_size = 8 box_predictor = RotatedFastRCNNOutputLayers( ShapeSpec(channels=box_head_output_size), box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)), num_classes=5, ) feature_pooled = torch.rand(2, box_head_output_size) predictions = box_predictor(feature_pooled) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) proposal = Instances((10, 10)) proposal.proposal_boxes = RotatedBoxes(proposal_boxes) proposal.gt_boxes = RotatedBoxes(gt_boxes) proposal.gt_classes = torch.tensor([1, 2]) with EventStorage(): # capture events in a new storage to discard them losses = box_predictor.losses(predictions, [proposal]) # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def test_reconstruction(self): weights = (5, 5, 10, 10, 1) b2b_transform = Box2BoxTransformRotated(weights=weights) src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) devices = [torch.device("cpu")] if torch.cuda.is_available(): devices.append(torch.device("cuda")) for device in devices: src_boxes = src_boxes.to(device=device) dst_boxes = dst_boxes.to(device=device) deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) dst_boxes_reconstructed = b2b_transform.apply_deltas( deltas, src_boxes) assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5) # angle difference has to be normalized assert torch.allclose( (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0, torch.zeros_like(dst_boxes[:, 4]), atol=1e-4, )
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__(cfg, input_shape) self.box2box_transform = Box2BoxTransformRotated( weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) if self.boundary_threshold >= 0: raise NotImplementedError( "boundary_threshold is a legacy option not implemented for RRPN." )
def test_fast_rcnn_rotated(self): torch.manual_seed(132) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) box2box_transform = Box2BoxTransformRotated( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) box_head_output_size = 8 num_classes = 5 cls_agnostic_bbox_reg = False box_predictor = FastRCNNOutputLayers(box_head_output_size, num_classes, cls_agnostic_bbox_reg, box_dim=5) feature_pooled = torch.rand(2, box_head_output_size) pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled) image_shape = (10, 10) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) result = Instances(image_shape) result.proposal_boxes = RotatedBoxes(proposal_boxes) result.gt_boxes = RotatedBoxes(gt_boxes) result.gt_classes = torch.tensor([1, 2]) proposals = [] proposals.append(result) smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA outputs = FastRCNNOutputs(box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta) with EventStorage(): # capture events in a new storage to discard them losses = outputs.losses() # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def __init__(self, cfg): super().__init__() # fmt: off self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA self.box_reg_loss_type = cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # Vis parameters self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT # fmt: on self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaNetHead(cfg, feature_shapes) self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS) self.anchor_matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) """ In Detectron1, loss is normalized by number of foreground samples in the batch. When batch size is 1 per GPU, #foreground has a large variance and using it lead to lower performance. Here we maintain an EMA of #foreground to stabilize the normalizer. """ self.loss_normalizer = 100 # initialize with any reasonable #fg that's not too small self.loss_normalizer_momentum = 0.9
def test_reconstruction(self): weights = (5, 5, 10, 10, 1) b2b_transform = Box2BoxTransformRotated(weights=weights) src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) devices = [torch.device("cpu")] if torch.cuda.is_available(): devices.append(torch.device("cuda")) for device in devices: src_boxes = src_boxes.to(device=device) dst_boxes = dst_boxes.to(device=device) deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) dst_boxes_reconstructed = b2b_transform.apply_deltas( deltas, src_boxes) assert torch.allclose(dst_boxes, dst_boxes_reconstructed, atol=1e-5)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # fmt: off self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # fmt: on self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaNetHead(cfg, feature_shapes) self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransformRotated( weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def from_config(cls, cfg, input_shape): args = super().from_config(cfg, input_shape) args["box2box_transform"] = Box2BoxTransformRotated( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) return args