コード例 #1
0
    def test_serialize_deserialize(self, include_mask):
        input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3])
        backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
        decoder = fpn.FPN(min_level=3,
                          max_level=7,
                          input_specs=backbone.output_specs)
        rpn_head = dense_prediction_heads.RPNHead(min_level=3,
                                                  max_level=7,
                                                  num_anchors_per_location=3)
        detection_head = instance_heads.DetectionHead(num_classes=2)
        roi_generator_obj = roi_generator.MultilevelROIGenerator()
        roi_sampler_obj = roi_sampler.ROISampler()
        roi_aligner_obj = roi_aligner.MultilevelROIAligner()
        detection_generator_obj = detection_generator.DetectionGenerator()
        if include_mask:
            mask_head = instance_heads.MaskHead(num_classes=2,
                                                upsample_factor=2)
            mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28,
                                                        num_sampled_masks=1)
            mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
                crop_size=14)
        else:
            mask_head = None
            mask_sampler_obj = None
            mask_roi_aligner_obj = None
        model = maskrcnn_model.MaskRCNNModel(backbone,
                                             decoder,
                                             rpn_head,
                                             detection_head,
                                             roi_generator_obj,
                                             roi_sampler_obj,
                                             roi_aligner_obj,
                                             detection_generator_obj,
                                             mask_head,
                                             mask_sampler_obj,
                                             mask_roi_aligner_obj,
                                             min_level=3,
                                             max_level=7,
                                             num_scales=3,
                                             aspect_ratios=[1.0],
                                             anchor_size=3)

        config = model.get_config()
        new_model = maskrcnn_model.MaskRCNNModel.from_config(config)

        # Validate that the config can be forced to JSON.
        _ = new_model.to_json()

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(model.get_config(), new_model.get_config())
コード例 #2
0
def build_maskrcnn(input_specs: tf.keras.layers.InputSpec,
                   model_config: maskrcnn_cfg.MaskRCNN,
                   l2_regularizer: Optional[
                       tf.keras.regularizers.Regularizer] = None,
                   backbone: Optional[tf.keras.Model] = None,
                   decoder: Optional[tf.keras.Model] = None) -> tf.keras.Model:
    """Builds Mask R-CNN model."""
    norm_activation_config = model_config.norm_activation
    if not backbone:
        backbone = backbones.factory.build_backbone(
            input_specs=input_specs,
            backbone_config=model_config.backbone,
            norm_activation_config=norm_activation_config,
            l2_regularizer=l2_regularizer)
    backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))

    if not decoder:
        decoder = decoders.factory.build_decoder(
            input_specs=backbone.output_specs,
            model_config=model_config,
            l2_regularizer=l2_regularizer)

    rpn_head_config = model_config.rpn_head
    roi_generator_config = model_config.roi_generator
    roi_sampler_config = model_config.roi_sampler
    roi_aligner_config = model_config.roi_aligner
    detection_head_config = model_config.detection_head
    generator_config = model_config.detection_generator
    num_anchors_per_location = (len(model_config.anchor.aspect_ratios) *
                                model_config.anchor.num_scales)

    rpn_head = dense_prediction_heads.RPNHead(
        min_level=model_config.min_level,
        max_level=model_config.max_level,
        num_anchors_per_location=num_anchors_per_location,
        num_convs=rpn_head_config.num_convs,
        num_filters=rpn_head_config.num_filters,
        use_separable_conv=rpn_head_config.use_separable_conv,
        activation=norm_activation_config.activation,
        use_sync_bn=norm_activation_config.use_sync_bn,
        norm_momentum=norm_activation_config.norm_momentum,
        norm_epsilon=norm_activation_config.norm_epsilon,
        kernel_regularizer=l2_regularizer)

    detection_head = instance_heads.DetectionHead(
        num_classes=model_config.num_classes,
        num_convs=detection_head_config.num_convs,
        num_filters=detection_head_config.num_filters,
        use_separable_conv=detection_head_config.use_separable_conv,
        num_fcs=detection_head_config.num_fcs,
        fc_dims=detection_head_config.fc_dims,
        class_agnostic_bbox_pred=detection_head_config.
        class_agnostic_bbox_pred,
        activation=norm_activation_config.activation,
        use_sync_bn=norm_activation_config.use_sync_bn,
        norm_momentum=norm_activation_config.norm_momentum,
        norm_epsilon=norm_activation_config.norm_epsilon,
        kernel_regularizer=l2_regularizer,
        name='detection_head')

    if decoder:
        decoder_features = decoder(backbone_features)
        rpn_head(decoder_features)

    if roi_sampler_config.cascade_iou_thresholds:
        detection_head_cascade = [detection_head]
        for cascade_num in range(len(
                roi_sampler_config.cascade_iou_thresholds)):
            detection_head = instance_heads.DetectionHead(
                num_classes=model_config.num_classes,
                num_convs=detection_head_config.num_convs,
                num_filters=detection_head_config.num_filters,
                use_separable_conv=detection_head_config.use_separable_conv,
                num_fcs=detection_head_config.num_fcs,
                fc_dims=detection_head_config.fc_dims,
                class_agnostic_bbox_pred=detection_head_config.
                class_agnostic_bbox_pred,
                activation=norm_activation_config.activation,
                use_sync_bn=norm_activation_config.use_sync_bn,
                norm_momentum=norm_activation_config.norm_momentum,
                norm_epsilon=norm_activation_config.norm_epsilon,
                kernel_regularizer=l2_regularizer,
                name='detection_head_{}'.format(cascade_num + 1))

            detection_head_cascade.append(detection_head)
        detection_head = detection_head_cascade

    roi_generator_obj = roi_generator.MultilevelROIGenerator(
        pre_nms_top_k=roi_generator_config.pre_nms_top_k,
        pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold,
        pre_nms_min_size_threshold=(
            roi_generator_config.pre_nms_min_size_threshold),
        nms_iou_threshold=roi_generator_config.nms_iou_threshold,
        num_proposals=roi_generator_config.num_proposals,
        test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k,
        test_pre_nms_score_threshold=(
            roi_generator_config.test_pre_nms_score_threshold),
        test_pre_nms_min_size_threshold=(
            roi_generator_config.test_pre_nms_min_size_threshold),
        test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold,
        test_num_proposals=roi_generator_config.test_num_proposals,
        use_batched_nms=roi_generator_config.use_batched_nms)

    roi_sampler_cascade = []
    roi_sampler_obj = roi_sampler.ROISampler(
        mix_gt_boxes=roi_sampler_config.mix_gt_boxes,
        num_sampled_rois=roi_sampler_config.num_sampled_rois,
        foreground_fraction=roi_sampler_config.foreground_fraction,
        foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold,
        background_iou_high_threshold=(
            roi_sampler_config.background_iou_high_threshold),
        background_iou_low_threshold=(
            roi_sampler_config.background_iou_low_threshold))
    roi_sampler_cascade.append(roi_sampler_obj)
    # Initialize additional roi simplers for cascade heads.
    if roi_sampler_config.cascade_iou_thresholds:
        for iou in roi_sampler_config.cascade_iou_thresholds:
            roi_sampler_obj = roi_sampler.ROISampler(
                mix_gt_boxes=False,
                num_sampled_rois=roi_sampler_config.num_sampled_rois,
                foreground_iou_threshold=iou,
                background_iou_high_threshold=iou,
                background_iou_low_threshold=0.0,
                skip_subsampling=True)
            roi_sampler_cascade.append(roi_sampler_obj)

    roi_aligner_obj = roi_aligner.MultilevelROIAligner(
        crop_size=roi_aligner_config.crop_size,
        sample_offset=roi_aligner_config.sample_offset)

    detection_generator_obj = detection_generator.DetectionGenerator(
        apply_nms=generator_config.apply_nms,
        pre_nms_top_k=generator_config.pre_nms_top_k,
        pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
        nms_iou_threshold=generator_config.nms_iou_threshold,
        max_num_detections=generator_config.max_num_detections,
        nms_version=generator_config.nms_version,
        use_cpu_nms=generator_config.use_cpu_nms,
        soft_nms_sigma=generator_config.soft_nms_sigma)

    if model_config.include_mask:
        mask_head = instance_heads.MaskHead(
            num_classes=model_config.num_classes,
            upsample_factor=model_config.mask_head.upsample_factor,
            num_convs=model_config.mask_head.num_convs,
            num_filters=model_config.mask_head.num_filters,
            use_separable_conv=model_config.mask_head.use_separable_conv,
            activation=model_config.norm_activation.activation,
            norm_momentum=model_config.norm_activation.norm_momentum,
            norm_epsilon=model_config.norm_activation.norm_epsilon,
            kernel_regularizer=l2_regularizer,
            class_agnostic=model_config.mask_head.class_agnostic)

        mask_sampler_obj = mask_sampler.MaskSampler(
            mask_target_size=(model_config.mask_roi_aligner.crop_size *
                              model_config.mask_head.upsample_factor),
            num_sampled_masks=model_config.mask_sampler.num_sampled_masks)

        mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
            crop_size=model_config.mask_roi_aligner.crop_size,
            sample_offset=model_config.mask_roi_aligner.sample_offset)
    else:
        mask_head = None
        mask_sampler_obj = None
        mask_roi_aligner_obj = None

    model = maskrcnn_model.MaskRCNNModel(
        backbone=backbone,
        decoder=decoder,
        rpn_head=rpn_head,
        detection_head=detection_head,
        roi_generator=roi_generator_obj,
        roi_sampler=roi_sampler_cascade,
        roi_aligner=roi_aligner_obj,
        detection_generator=detection_generator_obj,
        mask_head=mask_head,
        mask_sampler=mask_sampler_obj,
        mask_roi_aligner=mask_roi_aligner_obj,
        class_agnostic_bbox_pred=detection_head_config.
        class_agnostic_bbox_pred,
        cascade_class_ensemble=detection_head_config.cascade_class_ensemble,
        min_level=model_config.min_level,
        max_level=model_config.max_level,
        num_scales=model_config.anchor.num_scales,
        aspect_ratios=model_config.anchor.aspect_ratios,
        anchor_size=model_config.anchor.anchor_size)
    return model
コード例 #3
0
    def test_build_model(self, include_mask, use_separable_conv,
                         build_anchor_boxes, is_training):
        num_classes = 3
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [1.0]
        anchor_size = 3
        resnet_model_id = 50
        num_anchors_per_location = num_scales * len(aspect_ratios)
        image_size = 384
        images = np.random.rand(2, image_size, image_size, 3)
        image_shape = np.array([[image_size, image_size],
                                [image_size, image_size]])

        if build_anchor_boxes:
            anchor_boxes = anchor.Anchor(
                min_level=min_level,
                max_level=max_level,
                num_scales=num_scales,
                aspect_ratios=aspect_ratios,
                anchor_size=3,
                image_size=(image_size, image_size)).multilevel_boxes
            for l in anchor_boxes:
                anchor_boxes[l] = tf.tile(
                    tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1])
        else:
            anchor_boxes = None

        backbone = resnet.ResNet(model_id=resnet_model_id)
        decoder = fpn.FPN(input_specs=backbone.output_specs,
                          min_level=min_level,
                          max_level=max_level,
                          use_separable_conv=use_separable_conv)
        rpn_head = dense_prediction_heads.RPNHead(
            min_level=min_level,
            max_level=max_level,
            num_anchors_per_location=num_anchors_per_location,
            num_convs=1)
        detection_head = instance_heads.DetectionHead(num_classes=num_classes)
        roi_generator_obj = roi_generator.MultilevelROIGenerator()
        roi_sampler_obj = roi_sampler.ROISampler()
        roi_aligner_obj = roi_aligner.MultilevelROIAligner()
        detection_generator_obj = detection_generator.DetectionGenerator()
        if include_mask:
            mask_head = instance_heads.MaskHead(num_classes=num_classes,
                                                upsample_factor=2)
            mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28,
                                                        num_sampled_masks=1)
            mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
                crop_size=14)
        else:
            mask_head = None
            mask_sampler_obj = None
            mask_roi_aligner_obj = None
        model = maskrcnn_model.MaskRCNNModel(backbone,
                                             decoder,
                                             rpn_head,
                                             detection_head,
                                             roi_generator_obj,
                                             roi_sampler_obj,
                                             roi_aligner_obj,
                                             detection_generator_obj,
                                             mask_head,
                                             mask_sampler_obj,
                                             mask_roi_aligner_obj,
                                             min_level=min_level,
                                             max_level=max_level,
                                             num_scales=num_scales,
                                             aspect_ratios=aspect_ratios,
                                             anchor_size=anchor_size)

        gt_boxes = np.array(
            [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]],
             [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]],
            dtype=np.float32)
        gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32)
        if include_mask:
            gt_masks = np.ones((2, 3, 100, 100))
        else:
            gt_masks = None

        # Results will be checked in test_forward.
        _ = model(images,
                  image_shape,
                  anchor_boxes,
                  gt_boxes,
                  gt_classes,
                  gt_masks,
                  training=is_training)
コード例 #4
0
    def test_checkpoint(self, include_mask):
        input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3])
        backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
        decoder = fpn.FPN(min_level=3,
                          max_level=7,
                          input_specs=backbone.output_specs)
        rpn_head = dense_prediction_heads.RPNHead(min_level=3,
                                                  max_level=7,
                                                  num_anchors_per_location=3)
        detection_head = instance_heads.DetectionHead(num_classes=2)
        roi_generator_obj = roi_generator.MultilevelROIGenerator()
        roi_sampler_obj = roi_sampler.ROISampler()
        roi_aligner_obj = roi_aligner.MultilevelROIAligner()
        detection_generator_obj = detection_generator.DetectionGenerator()
        if include_mask:
            mask_head = instance_heads.MaskHead(num_classes=2,
                                                upsample_factor=2)
            mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28,
                                                        num_sampled_masks=1)
            mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
                crop_size=14)
        else:
            mask_head = None
            mask_sampler_obj = None
            mask_roi_aligner_obj = None
        model = maskrcnn_model.MaskRCNNModel(backbone,
                                             decoder,
                                             rpn_head,
                                             detection_head,
                                             roi_generator_obj,
                                             roi_sampler_obj,
                                             roi_aligner_obj,
                                             detection_generator_obj,
                                             mask_head,
                                             mask_sampler_obj,
                                             mask_roi_aligner_obj,
                                             min_level=3,
                                             max_level=7,
                                             num_scales=3,
                                             aspect_ratios=[1.0],
                                             anchor_size=3)
        expect_checkpoint_items = dict(backbone=backbone,
                                       decoder=decoder,
                                       rpn_head=rpn_head,
                                       detection_head=[detection_head])
        if include_mask:
            expect_checkpoint_items['mask_head'] = mask_head
        self.assertAllEqual(expect_checkpoint_items, model.checkpoint_items)

        # Test save and load checkpoints.
        ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items)
        save_dir = self.create_tempdir().full_path
        ckpt.save(os.path.join(save_dir, 'ckpt'))

        partial_ckpt = tf.train.Checkpoint(backbone=backbone)
        partial_ckpt.read(tf.train.latest_checkpoint(
            save_dir)).expect_partial().assert_existing_objects_matched()

        if include_mask:
            partial_ckpt_mask = tf.train.Checkpoint(backbone=backbone,
                                                    mask_head=mask_head)
            partial_ckpt_mask.restore(tf.train.latest_checkpoint(
                save_dir)).expect_partial().assert_existing_objects_matched()
コード例 #5
0
    def test_forward(self, strategy, include_mask, build_anchor_boxes,
                     training, use_cascade_heads):
        num_classes = 3
        min_level = 3
        max_level = 4
        num_scales = 3
        aspect_ratios = [1.0]
        anchor_size = 3
        if use_cascade_heads:
            cascade_iou_thresholds = [0.6]
            class_agnostic_bbox_pred = True
            cascade_class_ensemble = True
        else:
            cascade_iou_thresholds = None
            class_agnostic_bbox_pred = False
            cascade_class_ensemble = False

        image_size = (256, 256)
        images = np.random.rand(2, image_size[0], image_size[1], 3)
        image_shape = np.array([[224, 100], [100, 224]])
        with strategy.scope():
            if build_anchor_boxes:
                anchor_boxes = anchor.Anchor(
                    min_level=min_level,
                    max_level=max_level,
                    num_scales=num_scales,
                    aspect_ratios=aspect_ratios,
                    anchor_size=anchor_size,
                    image_size=image_size).multilevel_boxes
            else:
                anchor_boxes = None
            num_anchors_per_location = len(aspect_ratios) * num_scales

            input_specs = tf.keras.layers.InputSpec(
                shape=[None, None, None, 3])
            backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
            decoder = fpn.FPN(min_level=min_level,
                              max_level=max_level,
                              input_specs=backbone.output_specs)
            rpn_head = dense_prediction_heads.RPNHead(
                min_level=min_level,
                max_level=max_level,
                num_anchors_per_location=num_anchors_per_location)
            detection_head = instance_heads.DetectionHead(
                num_classes=num_classes,
                class_agnostic_bbox_pred=class_agnostic_bbox_pred)
            roi_generator_obj = roi_generator.MultilevelROIGenerator()

            roi_sampler_cascade = []
            roi_sampler_obj = roi_sampler.ROISampler()
            roi_sampler_cascade.append(roi_sampler_obj)
            if cascade_iou_thresholds:
                for iou in cascade_iou_thresholds:
                    roi_sampler_obj = roi_sampler.ROISampler(
                        mix_gt_boxes=False,
                        foreground_iou_threshold=iou,
                        background_iou_high_threshold=iou,
                        background_iou_low_threshold=0.0,
                        skip_subsampling=True)
                    roi_sampler_cascade.append(roi_sampler_obj)
            roi_aligner_obj = roi_aligner.MultilevelROIAligner()
            detection_generator_obj = detection_generator.DetectionGenerator()
            if include_mask:
                mask_head = instance_heads.MaskHead(num_classes=num_classes,
                                                    upsample_factor=2)
                mask_sampler_obj = mask_sampler.MaskSampler(
                    mask_target_size=28, num_sampled_masks=1)
                mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
                    crop_size=14)
            else:
                mask_head = None
                mask_sampler_obj = None
                mask_roi_aligner_obj = None
            model = maskrcnn_model.MaskRCNNModel(
                backbone,
                decoder,
                rpn_head,
                detection_head,
                roi_generator_obj,
                roi_sampler_obj,
                roi_aligner_obj,
                detection_generator_obj,
                mask_head,
                mask_sampler_obj,
                mask_roi_aligner_obj,
                class_agnostic_bbox_pred=class_agnostic_bbox_pred,
                cascade_class_ensemble=cascade_class_ensemble,
                min_level=min_level,
                max_level=max_level,
                num_scales=num_scales,
                aspect_ratios=aspect_ratios,
                anchor_size=anchor_size)

            gt_boxes = np.array(
                [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]],
                 [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]],
                dtype=np.float32)
            gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32)
            if include_mask:
                gt_masks = np.ones((2, 3, 100, 100))
            else:
                gt_masks = None

            results = model(images,
                            image_shape,
                            anchor_boxes,
                            gt_boxes,
                            gt_classes,
                            gt_masks,
                            training=training)

        self.assertIn('rpn_boxes', results)
        self.assertIn('rpn_scores', results)
        if training:
            self.assertIn('class_targets', results)
            self.assertIn('box_targets', results)
            self.assertIn('class_outputs', results)
            self.assertIn('box_outputs', results)
            if include_mask:
                self.assertIn('mask_outputs', results)
        else:
            self.assertIn('detection_boxes', results)
            self.assertIn('detection_scores', results)
            self.assertIn('detection_classes', results)
            self.assertIn('num_detections', results)
            if include_mask:
                self.assertIn('detection_masks', results)