Beispiel #1
0
    def __init__(self, cfg):
        super().__init__()

        # get the deice of the model
        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.TENSOR_MASK.NUM_CLASSES
        self.in_features = cfg.MODEL.TENSOR_MASK.IN_FEATURES
        self.anchor_sizes = cfg.MODEL.ANCHOR_GENERATOR.SIZES
        self.num_levels = len(cfg.MODEL.ANCHOR_GENERATOR.SIZES)
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST
        self.nms_type = cfg.MODEL.NMS_TYPE
        self.detections_im = cfg.TEST.DETECTIONS_PER_IMAGE
        # Mask parameters:
        self.mask_on = cfg.MODEL.MASK_ON
        self.mask_loss_weight = cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT
        self.mask_pos_weight = torch.tensor(
            cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT,
            dtype=torch.float32,
            device=self.device)
        self.bipyramid_on = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON
        # fmt: on

        # build the backbone
        self.backbone = cfg.build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        feature_strides = [x.stride for x in feature_shapes]
        # build anchors
        self.anchor_generator = TensorMaskAnchorGenerator(cfg, feature_shapes)
        self.num_anchors = self.anchor_generator.num_cell_anchors[0]
        anchors_min_level = cfg.MODEL.ANCHOR_GENERATOR.SIZES[0]
        self.mask_sizes = [
            size // feature_strides[0] for size in anchors_min_level
        ]
        self.min_anchor_size = min(anchors_min_level) - feature_strides[0]

        # head of the TensorMask
        self.head = TensorMaskHead(cfg, self.num_levels, self.num_anchors,
                                   self.mask_sizes, feature_shapes)
        # box transform
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #2
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.image_size = cfg.MODEL.SSD.IMAGE_SIZE
        self.num_classes = cfg.MODEL.SSD.NUM_CLASSES
        self.in_features = cfg.MODEL.SSD.IN_FEATURES
        self.extra_layer_arch = cfg.MODEL.SSD.EXTRA_LAYER_ARCH[str(self.image_size)]
        self.l2norm_scale = cfg.MODEL.SSD.L2NORM_SCALE
        # Loss parameters:
        self.loss_alpha = cfg.MODEL.SSD.LOSS_ALPHA
        self.smooth_l1_loss_beta = cfg.MODEL.SSD.SMOOTH_L1_LOSS_BETA
        self.negative_positive_ratio = cfg.MODEL.SSD.NEGATIVE_POSITIVE_RATIO
        # Inference parameters:
        self.score_threshold = cfg.MODEL.SSD.SCORE_THRESH_TEST
        self.nms_threshold = cfg.MODEL.SSD.NMS_THRESH_TEST
        self.nms_type = cfg.MODEL.NMS_TYPE
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # build extra layers
        self.extra_layers = self._make_extra_layers(
            feature_shapes[-1].channels, self.extra_layer_arch)
        extra_layer_channels = [c for c in self.extra_layer_arch if isinstance(c, int)]
        feature_shapes += [ShapeSpec(channels=c) for c in extra_layer_channels[1::2]]

        # ssd head
        self.head = SSDHead(cfg, feature_shapes)
        self.l2norm = L2Norm(512, self.l2norm_scale)
        self.default_box_generator = cfg.build_default_box_generator(cfg)
        self.default_boxes = self.default_box_generator()

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.SSD.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.SSD.IOU_THRESHOLDS,
            cfg.MODEL.SSD.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)

        # Initialization
        self._init_weights()
Beispiel #3
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES
        self.in_features = cfg.MODEL.EFFICIENTDET.IN_FEATURES
        self.freeze_bn = cfg.MODEL.EFFICIENTDET.FREEZE_BN
        self.freeze_backbone = cfg.MODEL.EFFICIENTDET.FREEZE_BACKBONE
        self.input_size = cfg.MODEL.BIFPN.INPUT_SIZE
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.EFFICIENTDET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.EFFICIENTDET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.EFFICIENTDET.SMOOTH_L1_LOSS_BETA
        self.box_loss_weight = cfg.MODEL.EFFICIENTDET.BOX_LOSS_WEIGHT
        self.regress_norm = cfg.MODEL.EFFICIENTDET.REG_NORM
        # Inference parameters:
        self.score_threshold = cfg.MODEL.EFFICIENTDET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.EFFICIENTDET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.EFFICIENTDET.NMS_THRESH_TEST
        self.nms_type = cfg.MODEL.NMS_TYPE
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE

        self.backbone = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = EfficientDetHead(cfg, feature_shapes)
        self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.EFFICIENTDET.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.EFFICIENTDET.IOU_THRESHOLDS,
            cfg.MODEL.EFFICIENTDET.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x / 255. - pixel_mean) / pixel_std

        if self.freeze_bn:
            for layer in self.modules():
                if isinstance(layer, nn.BatchNorm2d):
                    layer.eval()

        if self.freeze_backbone:
            for name, params in self.named_parameters():
                if name.startswith("backbone.bottom_up"):
                    params.requires_grad = False

        self.to(self.device)
Beispiel #4
0
    def get_outputs_converter(predict_net, init_net):
        self = types.SimpleNamespace()
        serialized_anchor_generator = io.BytesIO(
            get_pb_arg_vals(predict_net, "serialized_anchor_generator", None))
        self.anchor_generator = torch.load(serialized_anchor_generator)
        bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights",
                                             None)
        self.box2box_transform = Box2BoxTransform(
            weights=tuple(bbox_reg_weights))
        self.score_threshold = get_pb_arg_valf(predict_net, "score_threshold",
                                               None)
        self.topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates",
                                               None)
        self.nms_threshold = get_pb_arg_valf(predict_net, "nms_threshold",
                                             None)
        self.max_detections_per_image = get_pb_arg_vali(
            predict_net, "max_detections_per_image", None)

        # hack to reuse inference code from RetinaNet
        self.inference = functools.partial(meta_arch.RetinaNet.inference, self)
        self.inference_single_image = functools.partial(
            meta_arch.RetinaNet.inference_single_image, self)

        def f(batched_inputs, c2_inputs, c2_results):
            image_sizes = [[int(im[0]), int(im[1])]
                           for im in c2_inputs["im_info"]]

            num_features = len(
                [x for x in c2_results.keys() if x.startswith("box_cls_")])
            box_cls = [
                c2_results["box_cls_{}".format(i)] for i in range(num_features)
            ]
            box_delta = [
                c2_results["box_delta_{}".format(i)]
                for i in range(num_features)
            ]

            # For each feature level, feature should have the same batch size and
            # spatial dimension as the box_cls and box_delta.
            dummy_features = [
                box_delta[i].clone()[:, 0:0, :, :] for i in range(num_features)
            ]
            anchors = self.anchor_generator(dummy_features)

            # self.num_classess can be inferred
            self.num_classes = box_cls[0].shape[1] // (box_delta[0].shape[1] //
                                                       4)

            results = self.inference(box_cls, box_delta, anchors, image_sizes)
            return meta_arch.GeneralizedRCNN._postprocess(
                results, batched_inputs, image_sizes)

        return f
Beispiel #5
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.nms_type = cfg.MODEL.NMS_TYPE
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaNetHead(cfg, feature_shapes)
        self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
        """
        In Detectron1, loss is normalized by number of foreground samples in the batch.
        When batch size is 1 per GPU, #foreground has a large variance and
        using it lead to lower performance. Here we maintain an EMA of #foreground to
        stabilize the normalizer.
        """
        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
Beispiel #6
0
    def test_reconstruction(self):
        weights = (5, 5, 10, 10)
        b2b_tfm = Box2BoxTransform(weights=weights)
        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)

        devices = [torch.device("cpu")]
        if torch.cuda.is_available():
            devices.append(torch.device("cuda"))
        for device in devices:
            src_boxes = src_boxes.to(device=device)
            dst_boxes = dst_boxes.to(device=device)
            deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes)
            dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes)
            assert torch.allclose(dst_boxes, dst_boxes_reconstructed)
Beispiel #7
0
    def test_fast_rcnn_empty_batch(self, device="cpu"):
        cfg = RCNNConfig()
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        box2box_transform = Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)

        logits = torch.randn(0, 100, requires_grad=True, device=device)
        deltas = torch.randn(0, 4, requires_grad=True, device=device)

        smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA
        outputs = FastRCNNOutputs(
            box2box_transform, logits, deltas, [], smooth_l1_beta
        )
        with EventStorage():  # capture events in a new storage to discard them
            losses = outputs.losses()

        for value in losses.values():
            self.assertTrue(torch.allclose(value, torch.zeros_like(value)))
Beispiel #8
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.YOLOF.DECODER.NUM_CLASSES
        self.in_features = cfg.MODEL.YOLOF.ENCODER.IN_FEATURES
        self.pos_ignore_thresh = cfg.MODEL.YOLOF.POS_IGNORE_THRESHOLD
        self.neg_ignore_thresh = cfg.MODEL.YOLOF.NEG_IGNORE_THRESHOLD
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.YOLOF.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.YOLOF.FOCAL_LOSS_GAMMA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.YOLOF.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.YOLOF.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.YOLOF.NMS_THRESH_TEST
        self.nms_type = cfg.MODEL.NMS_TYPE
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.encoder = cfg.build_encoder(cfg, backbone_shape)
        self.decoder = cfg.build_decoder(cfg)
        self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.YOLOF.BBOX_REG_WEIGHTS,
            add_ctr_clamp=cfg.MODEL.YOLOF.ADD_CTR_CLAMP,
            ctr_clamp=cfg.MODEL.YOLOF.CTR_CLAMP)
        self.matcher = UniformMatcher(cfg.MODEL.YOLOF.MATCHER_TOPK)

        self.register_buffer(
            "pixel_mean",
            torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1))
        self.register_buffer(
            "pixel_std",
            torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1))
        self.to(self.device)
Beispiel #9
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.nms_type = cfg.MODEL.NMS_TYPE
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = cfg.build_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaNetHead(cfg, feature_shapes)
        self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Beispiel #10
0
    def test_fast_rcnn(self):
        torch.manual_seed(132)
        cfg = RCNNConfig()
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        box2box_transform = Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)

        box_head_output_size = 8
        num_classes = 5
        cls_agnostic_bbox_reg = False

        box_predictor = FastRCNNOutputLayers(
            box_head_output_size, num_classes, cls_agnostic_bbox_reg, box_dim=4
        )
        feature_pooled = torch.rand(2, box_head_output_size)
        pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled)
        image_shape = (10, 10)
        proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32)
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
        result = Instances(image_shape)
        result.proposal_boxes = Boxes(proposal_boxes)
        result.gt_boxes = Boxes(gt_boxes)
        result.gt_classes = torch.tensor([1, 2])
        proposals = []
        proposals.append(result)
        smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA

        outputs = FastRCNNOutputs(
            box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta
        )
        with EventStorage():  # capture events in a new storage to discard them
            losses = outputs.losses()

        expected_losses = {
            "loss_cls": torch.tensor(1.7951188087),
            "loss_box_reg": torch.tensor(4.0357131958),
        }
        for name in expected_losses.keys():
            assert torch.allclose(losses[name], expected_losses[name])