def test_serialize_deserialize(self, include_mask): input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=3, max_level=7, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead(min_level=3, max_level=7, num_anchors_per_location=3) detection_head = instance_heads.DetectionHead(num_classes=2) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, min_level=3, max_level=7, num_scales=3, aspect_ratios=[1.0], anchor_size=3) config = model.get_config() new_model = maskrcnn_model.MaskRCNNModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def build_maskrcnn(input_specs: tf.keras.layers.InputSpec, model_config: maskrcnn_cfg.MaskRCNN, l2_regularizer: Optional[ tf.keras.regularizers.Regularizer] = None, backbone: Optional[tf.keras.Model] = None, decoder: Optional[tf.keras.Model] = None) -> tf.keras.Model: """Builds Mask R-CNN model.""" norm_activation_config = model_config.norm_activation if not backbone: backbone = backbones.factory.build_backbone( input_specs=input_specs, backbone_config=model_config.backbone, norm_activation_config=norm_activation_config, l2_regularizer=l2_regularizer) backbone_features = backbone(tf.keras.Input(input_specs.shape[1:])) if not decoder: decoder = decoders.factory.build_decoder( input_specs=backbone.output_specs, model_config=model_config, l2_regularizer=l2_regularizer) rpn_head_config = model_config.rpn_head roi_generator_config = model_config.roi_generator roi_sampler_config = model_config.roi_sampler roi_aligner_config = model_config.roi_aligner detection_head_config = model_config.detection_head generator_config = model_config.detection_generator num_anchors_per_location = (len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) rpn_head = dense_prediction_heads.RPNHead( min_level=model_config.min_level, max_level=model_config.max_level, num_anchors_per_location=num_anchors_per_location, num_convs=rpn_head_config.num_convs, num_filters=rpn_head_config.num_filters, use_separable_conv=rpn_head_config.use_separable_conv, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) detection_head = instance_heads.DetectionHead( num_classes=model_config.num_classes, num_convs=detection_head_config.num_convs, num_filters=detection_head_config.num_filters, use_separable_conv=detection_head_config.use_separable_conv, num_fcs=detection_head_config.num_fcs, fc_dims=detection_head_config.fc_dims, class_agnostic_bbox_pred=detection_head_config. class_agnostic_bbox_pred, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer, name='detection_head') if decoder: decoder_features = decoder(backbone_features) rpn_head(decoder_features) if roi_sampler_config.cascade_iou_thresholds: detection_head_cascade = [detection_head] for cascade_num in range(len( roi_sampler_config.cascade_iou_thresholds)): detection_head = instance_heads.DetectionHead( num_classes=model_config.num_classes, num_convs=detection_head_config.num_convs, num_filters=detection_head_config.num_filters, use_separable_conv=detection_head_config.use_separable_conv, num_fcs=detection_head_config.num_fcs, fc_dims=detection_head_config.fc_dims, class_agnostic_bbox_pred=detection_head_config. class_agnostic_bbox_pred, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer, name='detection_head_{}'.format(cascade_num + 1)) detection_head_cascade.append(detection_head) detection_head = detection_head_cascade roi_generator_obj = roi_generator.MultilevelROIGenerator( pre_nms_top_k=roi_generator_config.pre_nms_top_k, pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold, pre_nms_min_size_threshold=( roi_generator_config.pre_nms_min_size_threshold), nms_iou_threshold=roi_generator_config.nms_iou_threshold, num_proposals=roi_generator_config.num_proposals, test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k, test_pre_nms_score_threshold=( roi_generator_config.test_pre_nms_score_threshold), test_pre_nms_min_size_threshold=( roi_generator_config.test_pre_nms_min_size_threshold), test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold, test_num_proposals=roi_generator_config.test_num_proposals, use_batched_nms=roi_generator_config.use_batched_nms) roi_sampler_cascade = [] roi_sampler_obj = roi_sampler.ROISampler( mix_gt_boxes=roi_sampler_config.mix_gt_boxes, num_sampled_rois=roi_sampler_config.num_sampled_rois, foreground_fraction=roi_sampler_config.foreground_fraction, foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold, background_iou_high_threshold=( roi_sampler_config.background_iou_high_threshold), background_iou_low_threshold=( roi_sampler_config.background_iou_low_threshold)) roi_sampler_cascade.append(roi_sampler_obj) # Initialize additional roi simplers for cascade heads. if roi_sampler_config.cascade_iou_thresholds: for iou in roi_sampler_config.cascade_iou_thresholds: roi_sampler_obj = roi_sampler.ROISampler( mix_gt_boxes=False, num_sampled_rois=roi_sampler_config.num_sampled_rois, foreground_iou_threshold=iou, background_iou_high_threshold=iou, background_iou_low_threshold=0.0, skip_subsampling=True) roi_sampler_cascade.append(roi_sampler_obj) roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=roi_aligner_config.crop_size, sample_offset=roi_aligner_config.sample_offset) detection_generator_obj = detection_generator.DetectionGenerator( apply_nms=generator_config.apply_nms, pre_nms_top_k=generator_config.pre_nms_top_k, pre_nms_score_threshold=generator_config.pre_nms_score_threshold, nms_iou_threshold=generator_config.nms_iou_threshold, max_num_detections=generator_config.max_num_detections, nms_version=generator_config.nms_version, use_cpu_nms=generator_config.use_cpu_nms, soft_nms_sigma=generator_config.soft_nms_sigma) if model_config.include_mask: mask_head = instance_heads.MaskHead( num_classes=model_config.num_classes, upsample_factor=model_config.mask_head.upsample_factor, num_convs=model_config.mask_head.num_convs, num_filters=model_config.mask_head.num_filters, use_separable_conv=model_config.mask_head.use_separable_conv, activation=model_config.norm_activation.activation, norm_momentum=model_config.norm_activation.norm_momentum, norm_epsilon=model_config.norm_activation.norm_epsilon, kernel_regularizer=l2_regularizer, class_agnostic=model_config.mask_head.class_agnostic) mask_sampler_obj = mask_sampler.MaskSampler( mask_target_size=(model_config.mask_roi_aligner.crop_size * model_config.mask_head.upsample_factor), num_sampled_masks=model_config.mask_sampler.num_sampled_masks) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=model_config.mask_roi_aligner.crop_size, sample_offset=model_config.mask_roi_aligner.sample_offset) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel( backbone=backbone, decoder=decoder, rpn_head=rpn_head, detection_head=detection_head, roi_generator=roi_generator_obj, roi_sampler=roi_sampler_cascade, roi_aligner=roi_aligner_obj, detection_generator=detection_generator_obj, mask_head=mask_head, mask_sampler=mask_sampler_obj, mask_roi_aligner=mask_roi_aligner_obj, class_agnostic_bbox_pred=detection_head_config. class_agnostic_bbox_pred, cascade_class_ensemble=detection_head_config.cascade_class_ensemble, min_level=model_config.min_level, max_level=model_config.max_level, num_scales=model_config.anchor.num_scales, aspect_ratios=model_config.anchor.aspect_ratios, anchor_size=model_config.anchor.anchor_size) return model
def test_build_model(self, include_mask, use_separable_conv, build_anchor_boxes, is_training): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 resnet_model_id = 50 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None backbone = resnet.ResNet(model_id=resnet_model_id) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, use_separable_conv=use_separable_conv) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location, num_convs=1) detection_head = instance_heads.DetectionHead(num_classes=num_classes) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) if include_mask: gt_masks = np.ones((2, 3, 100, 100)) else: gt_masks = None # Results will be checked in test_forward. _ = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=is_training)
def test_checkpoint(self, include_mask): input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=3, max_level=7, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead(min_level=3, max_level=7, num_anchors_per_location=3) detection_head = instance_heads.DetectionHead(num_classes=2) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, min_level=3, max_level=7, num_scales=3, aspect_ratios=[1.0], anchor_size=3) expect_checkpoint_items = dict(backbone=backbone, decoder=decoder, rpn_head=rpn_head, detection_head=[detection_head]) if include_mask: expect_checkpoint_items['mask_head'] = mask_head self.assertAllEqual(expect_checkpoint_items, model.checkpoint_items) # Test save and load checkpoints. ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items) save_dir = self.create_tempdir().full_path ckpt.save(os.path.join(save_dir, 'ckpt')) partial_ckpt = tf.train.Checkpoint(backbone=backbone) partial_ckpt.read(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched() if include_mask: partial_ckpt_mask = tf.train.Checkpoint(backbone=backbone, mask_head=mask_head) partial_ckpt_mask.restore(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched()
def test_forward(self, strategy, include_mask, build_anchor_boxes, training, use_cascade_heads): num_classes = 3 min_level = 3 max_level = 4 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 if use_cascade_heads: cascade_iou_thresholds = [0.6] class_agnostic_bbox_pred = True cascade_class_ensemble = True else: cascade_iou_thresholds = None class_agnostic_bbox_pred = False cascade_class_ensemble = False image_size = (256, 256) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[224, 100], [100, 224]]) with strategy.scope(): if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=image_size).multilevel_boxes else: anchor_boxes = None num_anchors_per_location = len(aspect_ratios) * num_scales input_specs = tf.keras.layers.InputSpec( shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=min_level, max_level=max_level, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location) detection_head = instance_heads.DetectionHead( num_classes=num_classes, class_agnostic_bbox_pred=class_agnostic_bbox_pred) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_cascade = [] roi_sampler_obj = roi_sampler.ROISampler() roi_sampler_cascade.append(roi_sampler_obj) if cascade_iou_thresholds: for iou in cascade_iou_thresholds: roi_sampler_obj = roi_sampler.ROISampler( mix_gt_boxes=False, foreground_iou_threshold=iou, background_iou_high_threshold=iou, background_iou_low_threshold=0.0, skip_subsampling=True) roi_sampler_cascade.append(roi_sampler_obj) roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler( mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, class_agnostic_bbox_pred=class_agnostic_bbox_pred, cascade_class_ensemble=cascade_class_ensemble, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) if include_mask: gt_masks = np.ones((2, 3, 100, 100)) else: gt_masks = None results = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=training) self.assertIn('rpn_boxes', results) self.assertIn('rpn_scores', results) if training: self.assertIn('class_targets', results) self.assertIn('box_targets', results) self.assertIn('class_outputs', results) self.assertIn('box_outputs', results) if include_mask: self.assertIn('mask_outputs', results) else: self.assertIn('detection_boxes', results) self.assertIn('detection_scores', results) self.assertIn('detection_classes', results) self.assertIn('num_detections', results) if include_mask: self.assertIn('detection_masks', results)