def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.381443977355957), "loss_box_reg": torch.tensor(0.0011560433777049184), } for name in expected_losses.keys(): err_msg = "detector_losses[{}] = {}, expected losses = {}".format( name, detector_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
def get_regular_bitmask_instances(h, w): inst = Instances((h, w)) inst.gt_boxes = Boxes(torch.rand(3, 4)) inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2] inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64) inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5)) return inst
def annotations_to_instances_rotated(annos, image_size): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Compared to `annotations_to_instances`, this function is for rotated boxes only Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: Containing fields "gt_boxes", "gt_classes", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [obj["bbox"] for obj in annos] target = Instances(image_size) boxes = target.gt_boxes = RotatedBoxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes return target
def test_roi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.ROI_HEADS.NAME = "StandardROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = Boxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = Boxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.4236516953), "loss_box_reg": torch.tensor(0.0091214813), } for name in expected_losses.keys(): self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]))
def test_fast_rcnn_rotated(self): torch.manual_seed(132) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) box_head_output_size = 8 num_classes = 5 cls_agnostic_bbox_reg = False box_predictor = FastRCNNOutputLayers( box_head_output_size, num_classes, cls_agnostic_bbox_reg, box_dim=5 ) feature_pooled = torch.rand(2, box_head_output_size) pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled) image_shape = (10, 10) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32 ) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) result = Instances(image_shape) result.proposal_boxes = RotatedBoxes(proposal_boxes) result.gt_boxes = RotatedBoxes(gt_boxes) result.gt_classes = torch.tensor([1, 2]) proposals = [] proposals.append(result) smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA outputs = RotatedFastRCNNOutputs( box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta ) with EventStorage(): # capture events in a new storage to discard them losses = outputs.losses() # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def test_fast_rcnn(self): torch.manual_seed(132) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) box2box_transform = Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) box_head_output_size = 8 num_classes = 5 cls_agnostic_bbox_reg = False box_predictor = FastRCNNOutputLayers( box_head_output_size, num_classes, cls_agnostic_bbox_reg, box_dim=4 ) feature_pooled = torch.rand(2, box_head_output_size) pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled) image_shape = (10, 10) proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32) gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) result = Instances(image_shape) result.proposal_boxes = Boxes(proposal_boxes) result.gt_boxes = Boxes(gt_boxes) result.gt_classes = torch.tensor([1, 2]) proposals = [] proposals.append(result) smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA outputs = FastRCNNOutputs( box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta ) with EventStorage(): # capture events in a new storage to discard them losses = outputs.losses() expected_losses = { "loss_cls": torch.tensor(1.7951188087), "loss_box_reg": torch.tensor(4.0357131958), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def get_empty_instance(h, w): inst = Instances((h, w)) inst.gt_boxes = Boxes(torch.rand(0, 4)) inst.gt_classes = torch.tensor([]).to(dtype=torch.int64) inst.gt_masks = BitMasks(torch.rand(0, h, w)) return inst
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target