def test_serialize_deserialize(self): """Validate the network can be serialized and deserialized.""" num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel(backbone=backbone, decoder=decoder, head=head, detection_generator=generator) config = model.get_config() new_model = retinanet_model.RetinaNetModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def test_serialize_deserialize(self): # Create a network object that sets all of its config options. kwargs = dict( input_specs=resnet.ResNet(model_id=50).output_specs, min_level=3, max_level=7, num_filters=256, use_separable_conv=False, use_sync_bn=False, activation='relu', norm_momentum=0.99, norm_epsilon=0.001, kernel_initializer='VarianceScaling', kernel_regularizer=None, bias_regularizer=None, ) network = fpn.FPN(**kwargs) expected_config = dict(kwargs) self.assertEqual(network.get_config(), expected_config) # Create another network object from the first object's config. new_network = fpn.FPN.from_config(network.get_config()) # Validate that the config can be forced to JSON. _ = new_network.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(network.get_config(), new_network.get_config())
def test_checkpoint(self, include_mask): input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=3, max_level=7, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead(min_level=3, max_level=7, num_anchors_per_location=3) detection_head = instance_heads.DetectionHead(num_classes=2) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj) expect_checkpoint_items = dict(backbone=backbone, decoder=decoder, rpn_head=rpn_head, detection_head=detection_head) if include_mask: expect_checkpoint_items['mask_head'] = mask_head self.assertAllEqual(expect_checkpoint_items, model.checkpoint_items) # Test save and load checkpoints. ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items) save_dir = self.create_tempdir().full_path ckpt.save(os.path.join(save_dir, 'ckpt')) partial_ckpt = tf.train.Checkpoint(backbone=backbone) partial_ckpt.restore(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched() if include_mask: partial_ckpt_mask = tf.train.Checkpoint(backbone=backbone, mask_head=mask_head) partial_ckpt_mask.restore(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched()
def test_num_params(self, num_classes, min_level, max_level, num_scales, aspect_ratios, resnet_model_id, use_separable_conv, fpn_num_filters, head_num_convs, head_num_filters, expected_num_params): num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) backbone = resnet.ResNet(model_id=resnet_model_id) decoder = fpn.FPN( input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, num_filters=fpn_num_filters, use_separable_conv=use_separable_conv) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, num_anchors_per_location=num_anchors_per_location, use_separable_conv=use_separable_conv, num_convs=head_num_convs, num_filters=head_num_filters) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator) _ = model(images, image_shape, anchor_boxes, training=True) self.assertEqual(expected_num_params, model.count_params())
def test_serialize_deserialize(self, include_mask): input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=3, max_level=7, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead(min_level=3, max_level=7, num_anchors_per_location=3) detection_head = instance_heads.DetectionHead(num_classes=2) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, min_level=3, max_level=7, num_scales=3, aspect_ratios=[1.0], anchor_size=3) config = model.get_config() new_model = maskrcnn_model.MaskRCNNModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def construct_model_and_anchors(image_size, use_gt_boxes_for_masks): num_classes = 3 min_level = 3 max_level = 4 num_scales = 3 aspect_ratios = [1.0] anchor_boxes = anchor.Anchor(min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=image_size).multilevel_boxes num_anchors_per_location = len(aspect_ratios) * num_scales input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=min_level, max_level=max_level, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location) detection_head = instance_heads.DetectionHead(num_classes=num_classes) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() mask_head = deep_instance_heads.DeepMaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) model = maskrcnn_model.DeepMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, use_gt_boxes_for_masks=use_gt_boxes_for_masks) return model, anchor_boxes
def test_serialize_deserialize(self): """Validate the network can be serialized and deserialized.""" num_classes = 3 backbone = backbones.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=3, max_level=7) head = segmentation_heads.SegmentationHead(num_classes, level=3) model = segmentation_model.SegmentationModel(backbone=backbone, decoder=decoder, head=head) config = model.get_config() new_model = segmentation_model.SegmentationModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def test_segmentation_network_creation(self, input_size, level): """Test for creation of a segmentation network.""" num_classes = 10 inputs = np.random.rand(2, input_size, input_size, 3) tf.keras.backend.set_image_data_format('channels_last') backbone = backbones.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=2, max_level=7) head = segmentation_heads.SegmentationHead(num_classes, level=level) model = segmentation_model.SegmentationModel(backbone=backbone, decoder=decoder, head=head) logits = model(inputs) self.assertAllEqual([ 2, input_size // (2**level), input_size // (2**level), num_classes ], logits.numpy().shape)
def test_network_creation(self, input_size, min_level, max_level, use_separable_conv): """Test creation of FPN.""" tf.keras.backend.set_image_data_format('channels_last') inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) backbone = resnet.ResNet(model_id=50) network = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, use_separable_conv=use_separable_conv) endpoints = backbone(inputs) feats = network(endpoints) for level in range(min_level, max_level + 1): self.assertIn(str(level), feats) self.assertAllEqual( [1, input_size // 2**level, input_size // 2**level, 256], feats[str(level)].shape.as_list())
def test_forward(self, strategy, include_mask, build_anchor_boxes, training, use_cascade_heads): num_classes = 3 min_level = 3 max_level = 4 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 if use_cascade_heads: cascade_iou_thresholds = [0.6] class_agnostic_bbox_pred = True cascade_class_ensemble = True else: cascade_iou_thresholds = None class_agnostic_bbox_pred = False cascade_class_ensemble = False image_size = (256, 256) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[224, 100], [100, 224]]) with strategy.scope(): if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=image_size).multilevel_boxes else: anchor_boxes = None num_anchors_per_location = len(aspect_ratios) * num_scales input_specs = tf.keras.layers.InputSpec( shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=min_level, max_level=max_level, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location) detection_head = instance_heads.DetectionHead( num_classes=num_classes, class_agnostic_bbox_pred=class_agnostic_bbox_pred) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_cascade = [] roi_sampler_obj = roi_sampler.ROISampler() roi_sampler_cascade.append(roi_sampler_obj) if cascade_iou_thresholds: for iou in cascade_iou_thresholds: roi_sampler_obj = roi_sampler.ROISampler( mix_gt_boxes=False, foreground_iou_threshold=iou, background_iou_high_threshold=iou, background_iou_low_threshold=0.0, skip_subsampling=True) roi_sampler_cascade.append(roi_sampler_obj) roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler( mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, class_agnostic_bbox_pred=class_agnostic_bbox_pred, cascade_class_ensemble=cascade_class_ensemble, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) if include_mask: gt_masks = np.ones((2, 3, 100, 100)) else: gt_masks = None results = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=training) self.assertIn('rpn_boxes', results) self.assertIn('rpn_scores', results) if training: self.assertIn('class_targets', results) self.assertIn('box_targets', results) self.assertIn('class_outputs', results) self.assertIn('box_outputs', results) if include_mask: self.assertIn('mask_outputs', results) else: self.assertIn('detection_boxes', results) self.assertIn('detection_scores', results) self.assertIn('detection_classes', results) self.assertIn('num_detections', results) if include_mask: self.assertIn('detection_masks', results)
def test_forward(self, strategy, training, shared_backbone, shared_decoder, generate_panoptic_masks): num_classes = 3 min_level = 2 max_level = 6 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 segmentation_resnet_model_id = 101 aspp_dilation_rates = [6, 12, 18] aspp_decoder_level = 2 fpn_decoder_level = 2 class_agnostic_bbox_pred = False cascade_class_ensemble = False image_size = (256, 256) images = tf.random.normal([2, image_size[0], image_size[1], 3]) image_info = tf.convert_to_tensor([[[224, 100], [224, 100], [1, 1], [0, 0]], [[224, 100], [224, 100], [1, 1], [0, 0]]]) shared_decoder = shared_decoder and shared_backbone with strategy.scope(): anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=image_size).multilevel_boxes num_anchors_per_location = len(aspect_ratios) * num_scales input_specs = tf.keras.layers.InputSpec( shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=min_level, max_level=max_level, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location) detection_head = instance_heads.DetectionHead( num_classes=num_classes, class_agnostic_bbox_pred=class_agnostic_bbox_pred) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_cascade = [] roi_sampler_obj = roi_sampler.ROISampler() roi_sampler_cascade.append(roi_sampler_obj) roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if generate_panoptic_masks: panoptic_segmentation_generator_obj = panoptic_segmentation_generator.PanopticSegmentationGenerator( output_size=list(image_size), max_num_detections=100, stuff_classes_offset=90) else: panoptic_segmentation_generator_obj = None mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) if shared_backbone: segmentation_backbone = None else: segmentation_backbone = resnet.ResNet( model_id=segmentation_resnet_model_id) if not shared_decoder: feature_fusion = 'deeplabv3plus' level = aspp_decoder_level segmentation_decoder = aspp.ASPP( level=level, dilation_rates=aspp_dilation_rates) else: feature_fusion = 'panoptic_fpn_fusion' level = fpn_decoder_level segmentation_decoder = None segmentation_head = segmentation_heads.SegmentationHead( num_classes=2, # stuff and common class for things, level=level, feature_fusion=feature_fusion, decoder_min_level=min_level, decoder_max_level=max_level, num_convs=2) model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, panoptic_segmentation_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head, class_agnostic_bbox_pred=class_agnostic_bbox_pred, cascade_class_ensemble=cascade_class_ensemble, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = tf.convert_to_tensor( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=tf.float32) gt_classes = tf.convert_to_tensor([[2, 1, -1], [1, -1, -1]], dtype=tf.int32) gt_masks = tf.ones((2, 3, 100, 100)) results = model(images, image_info, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=training) self.assertIn('rpn_boxes', results) self.assertIn('rpn_scores', results) if training: self.assertIn('class_targets', results) self.assertIn('box_targets', results) self.assertIn('class_outputs', results) self.assertIn('box_outputs', results) self.assertIn('mask_outputs', results) else: self.assertIn('detection_boxes', results) self.assertIn('detection_scores', results) self.assertIn('detection_classes', results) self.assertIn('num_detections', results) self.assertIn('detection_masks', results) self.assertIn('segmentation_outputs', results) self.assertAllEqual([ 2, image_size[0] // (2**level), image_size[1] // (2**level), 2 ], results['segmentation_outputs'].numpy().shape) if generate_panoptic_masks: self.assertIn('panoptic_outputs', results) self.assertIn('category_mask', results['panoptic_outputs']) self.assertIn('instance_mask', results['panoptic_outputs']) self.assertAllEqual( [2, image_size[0], image_size[1]], results['panoptic_outputs']['category_mask'].numpy().shape) self.assertAllEqual( [2, image_size[0], image_size[1]], results['panoptic_outputs']['instance_mask'].numpy().shape) else: self.assertNotIn('panoptic_outputs', results)
def test_forward(self, strategy, image_size, training, has_att_heads): """Test for creation of a R50-FPN RetinaNet.""" tf.keras.backend.set_image_data_format('channels_last') num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[image_size[0], image_size[1]], [image_size[0], image_size[1]]]) with strategy.scope(): anchor_gen = anchor.build_anchor_generator( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3) anchor_boxes = anchor_gen(image_size) for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) if has_att_heads: attribute_heads = {'depth': ('regression', 1)} else: attribute_heads = None head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator) model_outputs = model(images, image_shape, anchor_boxes, training=training) if training: cls_outputs = model_outputs['cls_outputs'] box_outputs = model_outputs['box_outputs'] att_outputs = model_outputs['att_outputs'] for level in range(min_level, max_level + 1): self.assertIn(str(level), cls_outputs) self.assertIn(str(level), box_outputs) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, num_classes * num_anchors_per_location ], cls_outputs[str(level)].numpy().shape) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 4 * num_anchors_per_location ], box_outputs[str(level)].numpy().shape) if has_att_heads: for att in att_outputs.values(): self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 1 * num_anchors_per_location ], att[str(level)].numpy().shape) else: self.assertIn('detection_boxes', model_outputs) self.assertIn('detection_scores', model_outputs) self.assertIn('detection_classes', model_outputs) self.assertIn('detection_attributes', model_outputs) self.assertIn('num_detections', model_outputs) self.assertAllEqual([2, 10, 4], model_outputs['detection_boxes'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_scores'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_classes'].numpy().shape) self.assertAllEqual([ 2, ], model_outputs['num_detections'].numpy().shape) if has_att_heads: self.assertAllEqual([2, 10, 1], model_outputs['detection_attributes'] ['depth'].numpy().shape)
def test_serialize_deserialize(self, shared_backbone, shared_decoder): input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=3, max_level=7, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead(min_level=3, max_level=7, num_anchors_per_location=3) detection_head = instance_heads.DetectionHead(num_classes=2) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() panoptic_segmentation_generator_obj = panoptic_segmentation_generator.PanopticSegmentationGenerator( output_size=[None, None], max_num_detections=100, stuff_classes_offset=90) segmentation_resnet_model_id = 101 aspp_dilation_rates = [6, 12, 18] min_level = 2 max_level = 6 aspp_decoder_level = 2 fpn_decoder_level = 2 shared_decoder = shared_decoder and shared_backbone mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) if shared_backbone: segmentation_backbone = None else: segmentation_backbone = resnet.ResNet( model_id=segmentation_resnet_model_id) if not shared_decoder: feature_fusion = 'deeplabv3plus' level = aspp_decoder_level segmentation_decoder = aspp.ASPP( level=level, dilation_rates=aspp_dilation_rates) else: feature_fusion = 'panoptic_fpn_fusion' level = fpn_decoder_level segmentation_decoder = None segmentation_head = segmentation_heads.SegmentationHead( num_classes=2, # stuff and common class for things, level=level, feature_fusion=feature_fusion, decoder_min_level=min_level, decoder_max_level=max_level, num_convs=2) model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, panoptic_segmentation_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head, min_level=min_level, max_level=max_level, num_scales=3, aspect_ratios=[1.0], anchor_size=3) config = model.get_config() new_model = panoptic_maskrcnn_model.PanopticMaskRCNNModel.from_config( config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def test_checkpoint(self, shared_backbone, shared_decoder): input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=3, max_level=7, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead(min_level=3, max_level=7, num_anchors_per_location=3) detection_head = instance_heads.DetectionHead(num_classes=2) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() panoptic_segmentation_generator_obj = panoptic_segmentation_generator.PanopticSegmentationGenerator( output_size=[None, None], max_num_detections=100, stuff_classes_offset=90) segmentation_resnet_model_id = 101 aspp_dilation_rates = [6, 12, 18] min_level = 2 max_level = 6 aspp_decoder_level = 2 fpn_decoder_level = 2 shared_decoder = shared_decoder and shared_backbone mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) if shared_backbone: segmentation_backbone = None else: segmentation_backbone = resnet.ResNet( model_id=segmentation_resnet_model_id) if not shared_decoder: feature_fusion = 'deeplabv3plus' level = aspp_decoder_level segmentation_decoder = aspp.ASPP( level=level, dilation_rates=aspp_dilation_rates) else: feature_fusion = 'panoptic_fpn_fusion' level = fpn_decoder_level segmentation_decoder = None segmentation_head = segmentation_heads.SegmentationHead( num_classes=2, # stuff and common class for things, level=level, feature_fusion=feature_fusion, decoder_min_level=min_level, decoder_max_level=max_level, num_convs=2) model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, panoptic_segmentation_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head, min_level=max_level, max_level=max_level, num_scales=3, aspect_ratios=[1.0], anchor_size=3) expect_checkpoint_items = dict(backbone=backbone, decoder=decoder, rpn_head=rpn_head, detection_head=[detection_head]) expect_checkpoint_items['mask_head'] = mask_head if not shared_backbone: expect_checkpoint_items[ 'segmentation_backbone'] = segmentation_backbone if not shared_decoder: expect_checkpoint_items[ 'segmentation_decoder'] = segmentation_decoder expect_checkpoint_items['segmentation_head'] = segmentation_head self.assertAllEqual(expect_checkpoint_items, model.checkpoint_items) # Test save and load checkpoints. ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items) save_dir = self.create_tempdir().full_path ckpt.save(os.path.join(save_dir, 'ckpt')) partial_ckpt = tf.train.Checkpoint(backbone=backbone) partial_ckpt.read(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched() partial_ckpt_mask = tf.train.Checkpoint(backbone=backbone, mask_head=mask_head) partial_ckpt_mask.restore(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched() if not shared_backbone: partial_ckpt_segmentation = tf.train.Checkpoint( segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head) elif not shared_decoder: partial_ckpt_segmentation = tf.train.Checkpoint( segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head) else: partial_ckpt_segmentation = tf.train.Checkpoint( segmentation_head=segmentation_head) partial_ckpt_segmentation.restore(tf.train.latest_checkpoint( save_dir)).expect_partial().assert_existing_objects_matched()
def test_forward(self, include_mask, training): num_classes = 3 min_level = 3 max_level = 4 num_scales = 3 aspect_ratios = [1.0] image_size = (256, 256) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[224, 100], [100, 224]]) anchor_boxes = anchor.Anchor(min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=image_size).multilevel_boxes num_anchors_per_location = len(aspect_ratios) * num_scales input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=min_level, max_level=max_level, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location) detection_head = instance_heads.DetectionHead(num_classes=num_classes) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) if include_mask: gt_masks = np.ones((2, 3, 100, 100)) else: gt_masks = None results = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=training) self.assertIn('rpn_boxes', results) self.assertIn('rpn_scores', results) if training: self.assertIn('class_targets', results) self.assertIn('box_targets', results) self.assertIn('class_outputs', results) self.assertIn('box_outputs', results) if include_mask: self.assertIn('mask_outputs', results) else: self.assertIn('detection_boxes', results) self.assertIn('detection_scores', results) self.assertIn('detection_classes', results) self.assertIn('num_detections', results) if include_mask: self.assertIn('detection_masks', results)
def test_build_model(self, use_separable_conv, build_anchor_boxes, shared_backbone, shared_decoder, is_training=True): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 resnet_model_id = 50 segmentation_resnet_model_id = 50 segmentation_output_stride = 16 aspp_dilation_rates = [6, 12, 18] aspp_decoder_level = int(np.math.log2(segmentation_output_stride)) fpn_decoder_level = 3 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 128 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) shared_decoder = shared_decoder and shared_backbone if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None backbone = resnet.ResNet(model_id=resnet_model_id) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, use_separable_conv=use_separable_conv) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location, num_convs=1) detection_head = instance_heads.DetectionHead(num_classes=num_classes) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) if shared_backbone: segmentation_backbone = None else: segmentation_backbone = resnet.ResNet( model_id=segmentation_resnet_model_id) if not shared_decoder: level = aspp_decoder_level segmentation_decoder = aspp.ASPP( level=level, dilation_rates=aspp_dilation_rates) else: level = fpn_decoder_level segmentation_decoder = None segmentation_head = segmentation_heads.SegmentationHead( num_classes=2, # stuff and common class for things, level=level, num_convs=2) model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) gt_masks = np.ones((2, 3, 100, 100)) # Results will be checked in test_forward. _ = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=is_training)
def test_build_model(self, use_separable_conv, build_anchor_boxes, shared_backbone, shared_decoder, is_training=True): num_classes = 3 min_level = 2 max_level = 6 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 resnet_model_id = 50 segmentation_resnet_model_id = 50 aspp_dilation_rates = [6, 12, 18] aspp_decoder_level = 2 fpn_decoder_level = 2 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 128 images = tf.random.normal([2, image_size, image_size, 3]) image_info = tf.convert_to_tensor([[[image_size, image_size], [image_size, image_size], [1, 1], [0, 0]], [[image_size, image_size], [image_size, image_size], [1, 1], [0, 0]]]) shared_decoder = shared_decoder and shared_backbone if build_anchor_boxes or not is_training: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None backbone = resnet.ResNet(model_id=resnet_model_id) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, use_separable_conv=use_separable_conv) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location, num_convs=1) detection_head = instance_heads.DetectionHead(num_classes=num_classes) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() panoptic_segmentation_generator_obj = panoptic_segmentation_generator.PanopticSegmentationGenerator( output_size=[image_size, image_size], max_num_detections=100, stuff_classes_offset=90) mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) if shared_backbone: segmentation_backbone = None else: segmentation_backbone = resnet.ResNet( model_id=segmentation_resnet_model_id) if not shared_decoder: feature_fusion = 'deeplabv3plus' level = aspp_decoder_level segmentation_decoder = aspp.ASPP( level=level, dilation_rates=aspp_dilation_rates) else: feature_fusion = 'panoptic_fpn_fusion' level = fpn_decoder_level segmentation_decoder = None segmentation_head = segmentation_heads.SegmentationHead( num_classes=2, # stuff and common class for things, level=level, feature_fusion=feature_fusion, decoder_min_level=min_level, decoder_max_level=max_level, num_convs=2) model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, panoptic_segmentation_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = tf.convert_to_tensor( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=tf.float32) gt_classes = tf.convert_to_tensor([[2, 1, -1], [1, -1, -1]], dtype=tf.int32) gt_masks = tf.ones((2, 3, 100, 100)) # Results will be checked in test_forward. _ = model(images, image_info, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=is_training)
def test_build_model(self, use_separable_conv, build_anchor_boxes, is_training, has_att_heads): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 fpn_num_filters = 256 head_num_convs = 4 head_num_filters = 256 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None if has_att_heads: attribute_heads = [dict(name='depth', type='regression', size=1)] else: attribute_heads = None backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, num_filters=fpn_num_filters, use_separable_conv=use_separable_conv) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location, use_separable_conv=use_separable_conv, num_convs=head_num_convs, num_filters=head_num_filters) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel(backbone=backbone, decoder=decoder, head=head, detection_generator=generator, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) _ = model(images, image_shape, anchor_boxes, training=is_training)
def test_build_model(self, include_mask, use_separable_conv, build_anchor_boxes, is_training): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 resnet_model_id = 50 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None backbone = resnet.ResNet(model_id=resnet_model_id) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, use_separable_conv=use_separable_conv) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location, num_convs=1) detection_head = instance_heads.DetectionHead(num_classes=num_classes) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_obj = roi_sampler.ROISampler() roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() if include_mask: mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) else: mask_head = None mask_sampler_obj = None mask_roi_aligner_obj = None model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) if include_mask: gt_masks = np.ones((2, 3, 100, 100)) else: gt_masks = None # Results will be checked in test_forward. _ = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=is_training)
def test_forward(self, strategy, training, shared_backbone, shared_decoder): num_classes = 3 min_level = 3 max_level = 4 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 segmentation_resnet_model_id = 101 segmentation_output_stride = 16 aspp_dilation_rates = [6, 12, 18] aspp_decoder_level = int(np.math.log2(segmentation_output_stride)) fpn_decoder_level = 3 class_agnostic_bbox_pred = False cascade_class_ensemble = False image_size = (256, 256) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[224, 100], [100, 224]]) shared_decoder = shared_decoder and shared_backbone with strategy.scope(): anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=image_size).multilevel_boxes num_anchors_per_location = len(aspect_ratios) * num_scales input_specs = tf.keras.layers.InputSpec( shape=[None, None, None, 3]) backbone = resnet.ResNet(model_id=50, input_specs=input_specs) decoder = fpn.FPN(min_level=min_level, max_level=max_level, input_specs=backbone.output_specs) rpn_head = dense_prediction_heads.RPNHead( min_level=min_level, max_level=max_level, num_anchors_per_location=num_anchors_per_location) detection_head = instance_heads.DetectionHead( num_classes=num_classes, class_agnostic_bbox_pred=class_agnostic_bbox_pred) roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_sampler_cascade = [] roi_sampler_obj = roi_sampler.ROISampler() roi_sampler_cascade.append(roi_sampler_obj) roi_aligner_obj = roi_aligner.MultilevelROIAligner() detection_generator_obj = detection_generator.DetectionGenerator() mask_head = instance_heads.MaskHead(num_classes=num_classes, upsample_factor=2) mask_sampler_obj = mask_sampler.MaskSampler(mask_target_size=28, num_sampled_masks=1) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( crop_size=14) if shared_backbone: segmentation_backbone = None else: segmentation_backbone = resnet.ResNet( model_id=segmentation_resnet_model_id) if not shared_decoder: level = aspp_decoder_level segmentation_decoder = aspp.ASPP( level=level, dilation_rates=aspp_dilation_rates) else: level = fpn_decoder_level segmentation_decoder = None segmentation_head = segmentation_heads.SegmentationHead( num_classes=2, # stuff and common class for things, level=level, num_convs=2) model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( backbone, decoder, rpn_head, detection_head, roi_generator_obj, roi_sampler_obj, roi_aligner_obj, detection_generator_obj, mask_head, mask_sampler_obj, mask_roi_aligner_obj, segmentation_backbone=segmentation_backbone, segmentation_decoder=segmentation_decoder, segmentation_head=segmentation_head, class_agnostic_bbox_pred=class_agnostic_bbox_pred, cascade_class_ensemble=cascade_class_ensemble, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) gt_boxes = np.array( [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], dtype=np.float32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) gt_masks = np.ones((2, 3, 100, 100)) results = model(images, image_shape, anchor_boxes, gt_boxes, gt_classes, gt_masks, training=training) self.assertIn('rpn_boxes', results) self.assertIn('rpn_scores', results) if training: self.assertIn('class_targets', results) self.assertIn('box_targets', results) self.assertIn('class_outputs', results) self.assertIn('box_outputs', results) self.assertIn('mask_outputs', results) else: self.assertIn('detection_boxes', results) self.assertIn('detection_scores', results) self.assertIn('detection_classes', results) self.assertIn('num_detections', results) self.assertIn('detection_masks', results) self.assertIn('segmentation_outputs', results) self.assertAllEqual([ 2, image_size[0] // (2**level), image_size[1] // (2**level), 2 ], results['segmentation_outputs'].numpy().shape)