def test_fpn_decoder_creation(self, num_filters, use_separable_conv): """Test creation of FPN decoder.""" min_level = 3 max_level = 7 input_specs = {} for level in range(min_level, max_level): input_specs[str(level)] = tf.TensorShape( [1, 128 // (2**level), 128 // (2**level), 3]) network = decoders.FPN(input_specs=input_specs, num_filters=num_filters, use_separable_conv=use_separable_conv, use_sync_bn=True) model_config = configs.retinanet.RetinaNet() model_config.min_level = min_level model_config.max_level = max_level model_config.num_classes = 10 model_config.input_size = [None, None, 3] model_config.decoder = decoders_cfg.Decoder( type='fpn', fpn=decoders_cfg.FPN(num_filters=num_filters, use_separable_conv=use_separable_conv)) factory_network = factory.build_decoder(input_specs=input_specs, model_config=model_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
class RetinaNet(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 7 anchor: Anchor = Anchor() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) head: RetinaNetHead = RetinaNetHead() detection_generator: DetectionGenerator = DetectionGenerator() norm_activation: common.NormActivation = common.NormActivation()
class MaskRCNN(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 2 max_level: int = 6 anchor: Anchor = Anchor() include_mask: bool = True backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) rpn_head: RPNHead = RPNHead() detection_head: DetectionHead = DetectionHead() roi_generator: ROIGenerator = ROIGenerator() roi_sampler: ROISampler = ROISampler() roi_aligner: ROIAligner = ROIAligner() detection_generator: DetectionGenerator = DetectionGenerator() mask_head: Optional[MaskHead] = MaskHead() mask_sampler: Optional[MaskSampler] = MaskSampler() mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner() norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.997, norm_epsilon=0.0001, use_sync_bn=True)
def test_builder(self, backbone_type, decoder_type, input_size, quantize_detection_head, quantize_detection_decoder): num_classes = 2 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], 3]) if backbone_type == 'spinenet_mobile': backbone_config = backbones.Backbone( type=backbone_type, spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7, use_keras_upsampling_2d=True)) elif backbone_type == 'mobilenet': backbone_config = backbones.Backbone(type=backbone_type, mobilenet=backbones.MobileNet( model_id='MobileNetV2', filter_size_scale=1.0)) else: raise ValueError( 'backbone_type {} is not supported'.format(backbone_type)) if decoder_type == 'identity': decoder_config = decoders.Decoder(type=decoder_type) elif decoder_type == 'fpn': decoder_config = decoders.Decoder(type=decoder_type, fpn=decoders.FPN( num_filters=128, use_separable_conv=True, use_keras_layer=True)) else: raise ValueError( 'decoder_type {} is not supported'.format(decoder_type)) model_config = retinanet_cfg.RetinaNet( num_classes=num_classes, input_size=[input_size[0], input_size[1], 3], backbone=backbone_config, decoder=decoder_config, head=retinanet_cfg.RetinaNetHead(attribute_heads=None, use_separable_conv=True)) l2_regularizer = tf.keras.regularizers.l2(5e-5) # Build the original float32 retinanet model. model = factory.build_retinanet(input_specs=input_specs, model_config=model_config, l2_regularizer=l2_regularizer) # Call the model with dummy input to build the head part. dummpy_input = tf.zeros([1] + model_config.input_size) model(dummpy_input, training=True) # Build the QAT model from the original model with quantization config. qat_model = qat_factory.build_qat_retinanet( model=model, quantization=common.Quantization( quantize_detection_decoder=quantize_detection_decoder, quantize_detection_head=quantize_detection_head), model_config=model_config) if quantize_detection_head: # head become a RetinaNetHeadQuantized when we apply quantization. self.assertIsInstance( qat_model.head, qat_dense_prediction_heads.RetinaNetHeadQuantized) else: # head is a RetinaNetHead if we don't apply quantization on head part. self.assertIsInstance(qat_model.head, dense_prediction_heads.RetinaNetHead) self.assertNotIsInstance( qat_model.head, qat_dense_prediction_heads.RetinaNetHeadQuantized) if decoder_type == 'FPN': if quantize_detection_decoder: # FPN decoder become a general keras functional model after applying # quantization. self.assertNotIsInstance(qat_model.decoder, fpn.FPN) else: self.assertIsInstance(qat_model.decoder, fpn.FPN)
def seg_resnetfpn_pascal() -> cfg.ExperimentConfig: """Image segmentation on pascal voc with resnet-fpn.""" train_batch_size = 256 eval_batch_size = 32 steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=SemanticSegmentationTask( model=SemanticSegmentationModel( num_classes=21, input_size=[512, 512, 3], min_level=3, max_level=7, backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), decoder=decoders.Decoder(type='fpn', fpn=decoders.FPN()), head=SegmentationHead(level=3, num_convs=3), norm_activation=common.NormActivation(activation='swish', use_sync_bn=True)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'train_aug*'), is_training=True, global_batch_size=train_batch_size, aug_scale_min=0.2, aug_scale_max=1.5), validation_data=DataConfig(input_path=os.path.join( PASCAL_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, resize_eval_groundtruth=False, groundtruth_padded_size=[512, 512], drop_remainder=False), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=450 * steps_per_epoch, validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.007, 'decay_steps': 450 * steps_per_epoch, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def maskrcnn_mobilenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with MobileNet backbone.""" steps_per_epoch = 232 coco_val_samples = 5000 train_batch_size = 512 eval_batch_size = 512 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='mobilenet', mobilenet=backbones.MobileNet(model_id='MobileNetV2')), decoder=decoders.Decoder( type='fpn', fpn=decoders.FPN(num_filters=128, use_separable_conv=True)), rpn_head=RPNHead(use_separable_conv=True, num_filters=128), # 1/2 of original channels. detection_head=DetectionHead( use_separable_conv=True, num_filters=128, fc_dims=512), # 1/2 of original channels. mask_head=MaskHead(use_separable_conv=True, num_filters=128), # 1/2 of original channels. anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( activation='relu6', norm_momentum=0.99, norm_epsilon=0.001, use_sync_bn=True), num_classes=91, input_size=[512, 512, 3], min_level=3, max_level=6, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 320, steps_per_epoch * 340 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ]) return config