def build_retinanet( input_specs: tf.keras.layers.InputSpec, model_config: retinanet_cfg.RetinaNet, l2_regularizer: tf.keras.regularizers.Regularizer = None ) -> tf.keras.Model: """Builds RetinaNet model.""" norm_activation_config = model_config.norm_activation backbone = backbones.factory.build_backbone( input_specs=input_specs, backbone_config=model_config.backbone, norm_activation_config=norm_activation_config, l2_regularizer=l2_regularizer) backbone(tf.keras.Input(input_specs.shape[1:])) decoder = decoders.factory.build_decoder(input_specs=backbone.output_specs, model_config=model_config, l2_regularizer=l2_regularizer) head_config = model_config.head generator_config = model_config.detection_generator num_anchors_per_location = (len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) head = dense_prediction_heads.RetinaNetHead( min_level=model_config.min_level, max_level=model_config.max_level, num_classes=model_config.num_classes, num_anchors_per_location=num_anchors_per_location, num_convs=head_config.num_convs, num_filters=head_config.num_filters, attribute_heads=[ cfg.as_dict() for cfg in (head_config.attribute_heads or []) ], use_separable_conv=head_config.use_separable_conv, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) detection_generator_obj = detection_generator.MultilevelDetectionGenerator( apply_nms=generator_config.apply_nms, pre_nms_top_k=generator_config.pre_nms_top_k, pre_nms_score_threshold=generator_config.pre_nms_score_threshold, nms_iou_threshold=generator_config.nms_iou_threshold, max_num_detections=generator_config.max_num_detections, use_batched_nms=generator_config.use_batched_nms, use_cpu_nms=generator_config.use_cpu_nms) model = retinanet_model.RetinaNetModel( backbone, decoder, head, detection_generator_obj, min_level=model_config.min_level, max_level=model_config.max_level, num_scales=model_config.anchor.num_scales, aspect_ratios=model_config.anchor.aspect_ratios, anchor_size=model_config.anchor.anchor_size) return model
def test_serialize_deserialize(self): """Validate the network can be serialized and deserialized.""" num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel(backbone=backbone, decoder=decoder, head=head, detection_generator=generator) config = model.get_config() new_model = retinanet_model.RetinaNetModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def test_num_params(self, num_classes, min_level, max_level, num_scales, aspect_ratios, resnet_model_id, use_separable_conv, fpn_num_filters, head_num_convs, head_num_filters, expected_num_params): num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) backbone = resnet.ResNet(model_id=resnet_model_id) decoder = fpn.FPN( input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, num_filters=fpn_num_filters, use_separable_conv=use_separable_conv) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, num_anchors_per_location=num_anchors_per_location, use_separable_conv=use_separable_conv, num_convs=head_num_convs, num_filters=head_num_filters) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator) _ = model(images, image_shape, anchor_boxes, training=True) self.assertEqual(expected_num_params, model.count_params())
def test_serialize_deserialize(self): kwargs = { 'apply_nms': True, 'pre_nms_top_k': 1000, 'pre_nms_score_threshold': 0.1, 'nms_iou_threshold': 0.5, 'max_num_detections': 10, 'use_batched_nms': False, } generator = detection_generator.MultilevelDetectionGenerator(**kwargs) expected_config = dict(kwargs) self.assertEqual(generator.get_config(), expected_config) new_generator = ( detection_generator.MultilevelDetectionGenerator.from_config( generator.get_config())) self.assertAllEqual(generator.get_config(), new_generator.get_config())
def test_forward(self, strategy, image_size, training, has_att_heads): """Test for creation of a R50-FPN RetinaNet.""" tf.keras.backend.set_image_data_format('channels_last') num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[image_size[0], image_size[1]], [image_size[0], image_size[1]]]) with strategy.scope(): anchor_gen = anchor.build_anchor_generator( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3) anchor_boxes = anchor_gen(image_size) for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) if has_att_heads: attribute_heads = {'depth': ('regression', 1)} else: attribute_heads = None head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator) model_outputs = model(images, image_shape, anchor_boxes, training=training) if training: cls_outputs = model_outputs['cls_outputs'] box_outputs = model_outputs['box_outputs'] att_outputs = model_outputs['att_outputs'] for level in range(min_level, max_level + 1): self.assertIn(str(level), cls_outputs) self.assertIn(str(level), box_outputs) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, num_classes * num_anchors_per_location ], cls_outputs[str(level)].numpy().shape) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 4 * num_anchors_per_location ], box_outputs[str(level)].numpy().shape) if has_att_heads: for att in att_outputs.values(): self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 1 * num_anchors_per_location ], att[str(level)].numpy().shape) else: self.assertIn('detection_boxes', model_outputs) self.assertIn('detection_scores', model_outputs) self.assertIn('detection_classes', model_outputs) self.assertIn('detection_attributes', model_outputs) self.assertIn('num_detections', model_outputs) self.assertAllEqual([2, 10, 4], model_outputs['detection_boxes'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_scores'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_classes'].numpy().shape) self.assertAllEqual([ 2, ], model_outputs['num_detections'].numpy().shape) if has_att_heads: self.assertAllEqual([2, 10, 1], model_outputs['detection_attributes'] ['depth'].numpy().shape)
def test_build_model(self, use_separable_conv, build_anchor_boxes, is_training, has_att_heads): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 fpn_num_filters = 256 head_num_convs = 4 head_num_filters = 256 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None if has_att_heads: attribute_heads = [dict(name='depth', type='regression', size=1)] else: attribute_heads = None backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, num_filters=fpn_num_filters, use_separable_conv=use_separable_conv) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location, use_separable_conv=use_separable_conv, num_convs=head_num_convs, num_filters=head_num_filters) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel(backbone=backbone, decoder=decoder, head=head, detection_generator=generator, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) _ = model(images, image_shape, anchor_boxes, training=is_training)
def testDetectionsOutputShape(self, use_batched_nms): min_level = 4 max_level = 6 num_scales = 2 max_num_detections = 100 aspect_ratios = [ 1.0, 2.0, ] anchor_scale = 2.0 output_size = [64, 64] num_classes = 4 pre_nms_top_k = 5000 pre_nms_score_threshold = 0.01 batch_size = 1 kwargs = { 'apply_nms': True, 'pre_nms_top_k': pre_nms_top_k, 'pre_nms_score_threshold': pre_nms_score_threshold, 'nms_iou_threshold': 0.5, 'max_num_detections': max_num_detections, 'use_batched_nms': use_batched_nms, } input_anchor = anchor.build_anchor_generator(min_level, max_level, num_scales, aspect_ratios, anchor_scale) anchor_boxes = input_anchor(output_size) cls_outputs_all = (np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. box_outputs_all = np.random.rand(84, 4) # random 84 boxes. class_outputs = { '4': tf.reshape( tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, num_classes]), '5': tf.reshape( tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, num_classes]), '6': tf.reshape( tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, num_classes]), } box_outputs = { '4': tf.reshape( tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 4]), '5': tf.reshape( tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 4]), '6': tf.reshape( tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 4]), } image_info = tf.constant( [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32) generator = detection_generator.MultilevelDetectionGenerator(**kwargs) results = generator(box_outputs, class_outputs, anchor_boxes, image_info[:, 1, :]) boxes = results['detection_boxes'] classes = results['detection_classes'] scores = results['detection_scores'] valid_detections = results['num_detections'] self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) self.assertEqual(scores.numpy().shape, ( batch_size, max_num_detections, )) self.assertEqual(classes.numpy().shape, ( batch_size, max_num_detections, )) self.assertEqual(valid_detections.numpy().shape, (batch_size, ))
def testDetectionsOutputShape(self, nms_version, has_att_heads, use_cpu_nms, soft_nms_sigma): min_level = 4 max_level = 6 num_scales = 2 max_num_detections = 10 aspect_ratios = [1.0, 2.0] anchor_scale = 2.0 output_size = [64, 64] num_classes = 4 pre_nms_top_k = 5000 pre_nms_score_threshold = 0.01 batch_size = 1 kwargs = { 'apply_nms': True, 'pre_nms_top_k': pre_nms_top_k, 'pre_nms_score_threshold': pre_nms_score_threshold, 'nms_iou_threshold': 0.5, 'max_num_detections': max_num_detections, 'nms_version': nms_version, 'use_cpu_nms': use_cpu_nms, 'soft_nms_sigma': soft_nms_sigma, } input_anchor = anchor.build_anchor_generator(min_level, max_level, num_scales, aspect_ratios, anchor_scale) anchor_boxes = input_anchor(output_size) cls_outputs_all = (np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. box_outputs_all = np.random.rand(84, 4) # random 84 boxes. class_outputs = { '4': tf.reshape( tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, num_classes]), '5': tf.reshape( tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, num_classes]), '6': tf.reshape( tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, num_classes]), } box_outputs = { '4': tf.reshape( tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 4]), '5': tf.reshape( tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 4]), '6': tf.reshape( tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 4]), } if has_att_heads: att_outputs_all = np.random.rand(84, 1) # random attributes. att_outputs = { 'depth': { '4': tf.reshape( tf.convert_to_tensor(att_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 1]), '5': tf.reshape( tf.convert_to_tensor(att_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 1]), '6': tf.reshape( tf.convert_to_tensor(att_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 1]), } } else: att_outputs = None image_info = tf.constant( [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32) generator = detection_generator.MultilevelDetectionGenerator(**kwargs) results = generator(box_outputs, class_outputs, anchor_boxes, image_info[:, 1, :], att_outputs) boxes = results['detection_boxes'] classes = results['detection_classes'] scores = results['detection_scores'] valid_detections = results['num_detections'] self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) self.assertEqual(scores.numpy().shape, ( batch_size, max_num_detections, )) self.assertEqual(classes.numpy().shape, ( batch_size, max_num_detections, )) self.assertEqual(valid_detections.numpy().shape, (batch_size, )) if has_att_heads: for att in results['detection_attributes'].values(): self.assertEqual(att.numpy().shape, (batch_size, max_num_detections, 1))