def test_forward(self, use_separable_conv, use_sync_bn, has_att_heads): if has_att_heads: attribute_heads = [dict(name='depth', type='regression', size=1)] else: attribute_heads = None retinanet_head = dense_prediction_heads.RetinaNetHead( min_level=3, max_level=4, num_classes=3, num_anchors_per_location=3, num_convs=2, num_filters=256, attribute_heads=attribute_heads, use_separable_conv=use_separable_conv, activation='relu', use_sync_bn=use_sync_bn, norm_momentum=0.99, norm_epsilon=0.001, kernel_regularizer=None, bias_regularizer=None, ) features = { '3': np.random.rand(2, 128, 128, 16), '4': np.random.rand(2, 64, 64, 16), } scores, boxes, attributes = retinanet_head(features) self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 9]) self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 9]) self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12]) self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12]) if has_att_heads: for att in attributes.values(): self.assertAllEqual(att['3'].numpy().shape, [2, 128, 128, 3]) self.assertAllEqual(att['4'].numpy().shape, [2, 64, 64, 3])
def test_serialize_deserialize(self): """Validate the network can be serialized and deserialized.""" num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN( input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3) config = model.get_config() new_model = retinanet_model.RetinaNetModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def test_serialize_deserialize(self): retinanet_head = dense_prediction_heads.RetinaNetHead( min_level=3, max_level=7, num_classes=3, num_anchors_per_location=9, num_convs=2, num_filters=16, attribute_heads=None, use_separable_conv=False, activation='relu', use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, kernel_regularizer=None, bias_regularizer=None, ) config = retinanet_head.get_config() new_retinanet_head = ( dense_prediction_heads.RetinaNetHead.from_config(config)) self.assertAllEqual(retinanet_head.get_config(), new_retinanet_head.get_config())
def build_retinanet( input_specs: tf.keras.layers.InputSpec, model_config: retinanet_cfg.RetinaNet, l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, backbone: Optional[tf.keras.Model] = None, decoder: Optional[tf.keras.regularizers.Regularizer] = None ) -> tf.keras.Model: """Builds RetinaNet model.""" norm_activation_config = model_config.norm_activation if not backbone: backbone = backbones.factory.build_backbone( input_specs=input_specs, backbone_config=model_config.backbone, norm_activation_config=norm_activation_config, l2_regularizer=l2_regularizer) backbone_features = backbone(tf.keras.Input(input_specs.shape[1:])) if not decoder: decoder = decoders.factory.build_decoder( input_specs=backbone.output_specs, model_config=model_config, l2_regularizer=l2_regularizer) head_config = model_config.head generator_config = model_config.detection_generator num_anchors_per_location = (len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) head = dense_prediction_heads.RetinaNetHead( min_level=model_config.min_level, max_level=model_config.max_level, num_classes=model_config.num_classes, num_anchors_per_location=num_anchors_per_location, num_convs=head_config.num_convs, num_filters=head_config.num_filters, attribute_heads=[ cfg.as_dict() for cfg in (head_config.attribute_heads or []) ], share_classification_heads=head_config.share_classification_heads, use_separable_conv=head_config.use_separable_conv, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) # Builds decoder and head so that their trainable weights are initialized if decoder: decoder_features = decoder(backbone_features) _ = head(decoder_features) detection_generator_obj = detection_generator.MultilevelDetectionGenerator( apply_nms=generator_config.apply_nms, pre_nms_top_k=generator_config.pre_nms_top_k, pre_nms_score_threshold=generator_config.pre_nms_score_threshold, nms_iou_threshold=generator_config.nms_iou_threshold, max_num_detections=generator_config.max_num_detections, nms_version=generator_config.nms_version, use_cpu_nms=generator_config.use_cpu_nms, soft_nms_sigma=generator_config.soft_nms_sigma, tflite_post_processing_config=generator_config.tflite_post_processing. as_dict()) model = retinanet_model.RetinaNetModel( backbone, decoder, head, detection_generator_obj, min_level=model_config.min_level, max_level=model_config.max_level, num_scales=model_config.anchor.num_scales, aspect_ratios=model_config.anchor.aspect_ratios, anchor_size=model_config.anchor.anchor_size) return model
def test_build_model(self, use_separable_conv, build_anchor_boxes, is_training, has_att_heads): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 fpn_num_filters = 256 head_num_convs = 4 head_num_filters = 256 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None if has_att_heads: attribute_heads = [dict(name='depth', type='regression', size=1)] else: attribute_heads = None backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, num_filters=fpn_num_filters, use_separable_conv=use_separable_conv) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location, use_separable_conv=use_separable_conv, num_convs=head_num_convs, num_filters=head_num_filters) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel(backbone=backbone, decoder=decoder, head=head, detection_generator=generator, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) _ = model(images, image_shape, anchor_boxes, training=is_training)
def test_forward(self, strategy, image_size, training, has_att_heads, output_intermediate_features, soft_nms_sigma): """Test for creation of a R50-FPN RetinaNet.""" tf.keras.backend.set_image_data_format('channels_last') num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[image_size[0], image_size[1]], [image_size[0], image_size[1]]]) with strategy.scope(): anchor_gen = anchor.build_anchor_generator( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3) anchor_boxes = anchor_gen(image_size) for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) if has_att_heads: attribute_heads = [ dict(name='depth', type='regression', size=1) ] else: attribute_heads = None head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10, nms_version='v1', use_cpu_nms=soft_nms_sigma is not None, soft_nms_sigma=soft_nms_sigma) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator) model_outputs = model( images, image_shape, anchor_boxes, output_intermediate_features=output_intermediate_features, training=training) if training: cls_outputs = model_outputs['cls_outputs'] box_outputs = model_outputs['box_outputs'] for level in range(min_level, max_level + 1): self.assertIn(str(level), cls_outputs) self.assertIn(str(level), box_outputs) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, num_classes * num_anchors_per_location ], cls_outputs[str(level)].numpy().shape) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 4 * num_anchors_per_location ], box_outputs[str(level)].numpy().shape) if has_att_heads: att_outputs = model_outputs['attribute_outputs'] for att in att_outputs.values(): self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 1 * num_anchors_per_location ], att[str(level)].numpy().shape) else: self.assertIn('detection_boxes', model_outputs) self.assertIn('detection_scores', model_outputs) self.assertIn('detection_classes', model_outputs) self.assertIn('num_detections', model_outputs) self.assertAllEqual([2, 10, 4], model_outputs['detection_boxes'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_scores'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_classes'].numpy().shape) self.assertAllEqual([ 2, ], model_outputs['num_detections'].numpy().shape) if has_att_heads: self.assertIn('detection_attributes', model_outputs) self.assertAllEqual([2, 10, 1], model_outputs['detection_attributes'] ['depth'].numpy().shape) if output_intermediate_features: for l in range(2, 6): self.assertIn('backbone_{}'.format(l), model_outputs) self.assertAllEqual([ 2, image_size[0] // 2**l, image_size[1] // 2**l, backbone.output_specs[str(l)].as_list()[-1] ], model_outputs['backbone_{}'.format(l)].numpy().shape) for l in range(min_level, max_level + 1): self.assertIn('decoder_{}'.format(l), model_outputs) self.assertAllEqual([ 2, image_size[0] // 2**l, image_size[1] // 2**l, decoder.output_specs[str(l)].as_list()[-1] ], model_outputs['decoder_{}'.format(l)].numpy().shape)