def test_mobilenet_creation(self, model_id, filter_size_scale): """Test creation of Mobilenet models.""" network = backbones.MobileNet(model_id=model_id, filter_size_scale=filter_size_scale, norm_momentum=0.99, norm_epsilon=1e-5) backbone_config = backbones_cfg.Backbone( type='mobilenet', mobilenet=backbones_cfg.MobileNet( model_id=model_id, filter_size_scale=filter_size_scale)) norm_activation_config = common_cfg.NormActivation(norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) factory_network = factory.build_backbone( input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), backbone_config=backbone_config, norm_activation_config=norm_activation_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def test_spinenet_creation(self, model_id): """Test creation of SpineNet models.""" input_size = 128 min_level = 3 max_level = 7 input_specs = tf.keras.layers.InputSpec( shape=[None, input_size, input_size, 3]) network = backbones.SpineNet(input_specs=input_specs, min_level=min_level, max_level=max_level, norm_momentum=0.99, norm_epsilon=1e-5) backbone_config = backbones_cfg.Backbone( type='spinenet', spinenet=backbones_cfg.SpineNet(model_id=model_id)) norm_activation_config = common_cfg.NormActivation(norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) factory_network = factory.build_backbone( input_specs=tf.keras.layers.InputSpec( shape=[None, input_size, input_size, 3]), backbone_config=backbone_config, norm_activation_config=norm_activation_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def test_efficientnet_creation(self, model_id, se_ratio): """Test creation of EfficientNet models.""" network = backbones.EfficientNet(model_id=model_id, se_ratio=se_ratio, norm_momentum=0.99, norm_epsilon=1e-5) backbone_config = backbones_cfg.Backbone( type='efficientnet', efficientnet=backbones_cfg.EfficientNet(model_id=model_id, se_ratio=se_ratio)) norm_activation_config = common_cfg.NormActivation(norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) factory_network = factory.build_backbone( input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), backbone_config=backbone_config, norm_activation_config=norm_activation_config) network_config = network.get_config() factory_network_config = factory_network.get_config() self.assertEqual(network_config, factory_network_config)
def detr_coco() -> cfg.ExperimentConfig: """Config to get results that matches the paper.""" train_batch_size = 64 eval_batch_size = 64 num_train_data = COCO_TRAIN_EXAMPLES num_steps_per_epoch = num_train_data // train_batch_size train_steps = 500 * num_steps_per_epoch # 500 epochs decay_at = train_steps - 100 * num_steps_per_epoch # 400 epochs config = cfg.ExperimentConfig( task=DetrTask(init_checkpoint='', init_checkpoint_modules='backbone', model=Detr(num_classes=81, input_size=[1333, 1333, 3], norm_activation=common.NormActivation()), losses=Losses(), train_data=coco.COCODataConfig( tfds_name='coco/2017', tfds_split='train', is_training=True, global_batch_size=train_batch_size, shuffle_buffer_size=1000, ), validation_data=coco.COCODataConfig( tfds_name='coco/2017', tfds_split='validation', is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=train_steps, validation_steps=-1, steps_per_loop=10000, summary_interval=10000, checkpoint_interval=10000, validation_interval=10000, max_to_keep=1, best_checkpoint_export_subdir='best_ckpt', best_checkpoint_eval_metric='AP', optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'detr_adamw', 'detr_adamw': { 'weight_decay_rate': 1e-4, 'global_clipnorm': 0.1, # Avoid AdamW legacy behavior. 'gradient_clip_norm': 0.0 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [decay_at], 'values': [0.0001, 1.0e-05] } }, })), restrictions=[ 'task.train_data.is_training != None', ]) return config
def testBuildCenterNet(self): backbone = hourglass.build_hourglass( input_specs=tf.keras.layers.InputSpec(shape=[None, 512, 512, 3]), backbone_config=backbones.Backbone(type='hourglass'), norm_activation_config=common.NormActivation(use_sync_bn=True)) task_config = { 'ct_heatmaps': 90, 'ct_offset': 2, 'ct_size': 2, } input_levels = ['2_0', '2'] head = centernet_head.CenterNetHead(task_outputs=task_config, input_specs=backbone.output_specs, input_levels=input_levels) detection_ge = detection_generator.CenterNetDetectionGenerator() model = centernet_model.CenterNetModel( backbone=backbone, head=head, detection_generator=detection_ge) outputs = model(tf.zeros((5, 512, 512, 3))) self.assertLen(outputs['raw_output'], 3) self.assertLen(outputs['raw_output']['ct_heatmaps'], 2) self.assertLen(outputs['raw_output']['ct_offset'], 2) self.assertLen(outputs['raw_output']['ct_size'], 2) self.assertEqual(outputs['raw_output']['ct_heatmaps'][0].shape, (5, 128, 128, 90)) self.assertEqual(outputs['raw_output']['ct_offset'][0].shape, (5, 128, 128, 2)) self.assertEqual(outputs['raw_output']['ct_size'][0].shape, (5, 128, 128, 2))
class AssembleNetPlusModel(video_classification.VideoClassificationModel): """The AssembleNet model config.""" model_type: str = 'assemblenet_plus' backbone: Backbone3D = Backbone3D(type='assemblenet_plus') norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=True) max_pool_predictions: bool = False
class VideoSSLModel(VideoClassificationModel): """The model config.""" normalize_feature: bool = False hidden_dim: int = 2048 hidden_layer_num: int = 3 projection_dim: int = 128 hidden_norm_activation: common.NormActivation = common.NormActivation( use_sync_bn=False, norm_momentum=0.997, norm_epsilon=1.0e-05)
def test_hourglass(self): backbone = hourglass.build_hourglass( input_specs=tf.keras.layers.InputSpec(shape=[None, 512, 512, 3]), backbone_config=backbones.Backbone(type='hourglass'), norm_activation_config=common.NormActivation(use_sync_bn=True)) inputs = np.zeros((2, 512, 512, 3), dtype=np.float32) outputs = backbone(inputs) self.assertEqual(outputs['2_0'].shape, (2, 128, 128, 256)) self.assertEqual(outputs['2'].shape, (2, 128, 128, 256))
def seg_unet3d_test() -> cfg.ExperimentConfig: """Image segmentation on a dummy dataset with 3D UNet for testing purpose.""" train_batch_size = 2 eval_batch_size = 2 steps_per_epoch = 10 config = cfg.ExperimentConfig( task=SemanticSegmentation3DTask( model=SemanticSegmentationModel3D( num_classes=2, input_size=[32, 32, 32], num_channels=2, backbone=backbones.Backbone( type='unet_3d', unet_3d=backbones.UNet3D(model_id=2)), decoder=decoders.Decoder( type='unet_3d_decoder', unet_3d_decoder=decoders.UNet3DDecoder(model_id=2)), head=SegmentationHead3D(num_convs=0, num_classes=2), norm_activation=common.NormActivation(activation='relu', use_sync_bn=False)), train_data=DataConfig(input_path='train.tfrecord', num_classes=2, input_size=[32, 32, 32], num_channels=2, is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(input_path='val.tfrecord', num_classes=2, input_size=[32, 32, 32], num_channels=2, is_training=False, global_batch_size=eval_batch_size), losses=Losses(loss_type='adaptive')), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=10, validation_steps=10, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', }, 'learning_rate': { 'type': 'constant', 'constant': { 'learning_rate': 0.000001 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
class VideoClassificationModel(hyperparams.Config): """The model config.""" model_type: str = 'video_classification' backbone: backbones_3d.Backbone3D = backbones_3d.Backbone3D( type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()) norm_activation: common.NormActivation = common.NormActivation( use_sync_bn=False) dropout_rate: float = 0.2 aggregate_endpoints: bool = False require_endpoints: Optional[Tuple[str, ...]] = None
class MovinetModel(video_classification.VideoClassificationModel): """The MoViNet model config.""" model_type: str = 'movinet' backbone: Backbone3D = Backbone3D() norm_activation: common.NormActivation = common.NormActivation( activation=None, # legacy flag, not used. norm_momentum=0.99, norm_epsilon=1e-3, use_sync_bn=True) activation: str = 'swish' output_states: bool = False
class ImageClassificationModel(hyperparams.Config): """Image classification model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=lambda: [224, 224]) backbone: backbones.Backbone = backbones.Backbone( type='darknet', darknet=backbones.Darknet()) dropout_rate: float = 0.0 norm_activation: common.NormActivation = common.NormActivation() # Adds a Batch Normalization layer pre-GlobalAveragePooling in classification. add_head_batch_norm: bool = False kernel_initializer: str = 'VarianceScaling'
class Detr(hyperparams.Config): num_queries: int = 100 hidden_size: int = 256 num_classes: int = 91 # 0: background num_encoder_layers: int = 6 num_decoder_layers: int = 6 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone(type='resnet', resnet=backbones.ResNet( model_id=50, bn_trainable=False)) norm_activation: common.NormActivation = common.NormActivation()
class SemanticSegmentationModel(hyperparams.Config): """Semantic segmentation model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 6 head: SegmentationHead = SegmentationHead() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='identity') mask_scoring_head: Optional[MaskScoringHead] = None norm_activation: common.NormActivation = common.NormActivation()
class SimCLRModel(hyperparams.Config): """SimCLR model config.""" input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) projection_head: ProjectionHead = ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1) supervised_head: SupervisedHead = SupervisedHead(num_classes=1001) norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False) mode: str = simclr_model.PRETRAIN backbone_trainable: bool = True
class ImageClassificationModel(hyperparams.Config): """The model config.""" num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) dropout_rate: float = 0.0 norm_activation: common.NormActivation = common.NormActivation( use_sync_bn=False) # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification add_head_batch_norm: bool = False kernel_initializer: str = 'random_uniform'
class DbofModel(hyperparams.Config): """The model config.""" cluster_size: int = 3000 hidden_size: int = 2000 add_batch_norm: bool = True sample_random_frames: bool = True use_context_gate_cluster_layer: bool = False context_gate_cluster_bottleneck_size: int = 0 pooling_method: str = 'average' yt8m_agg_classifier_model: str = 'MoeModel' agg_model: hyperparams.Config = MoeModel() norm_activation: common.NormActivation = common.NormActivation( activation='relu', use_sync_bn=False)
class RetinaNet(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 7 anchor: Anchor = Anchor() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) head: RetinaNetHead = RetinaNetHead() detection_generator: DetectionGenerator = DetectionGenerator() norm_activation: common.NormActivation = common.NormActivation()
class SemanticSegmentationModel3D(hyperparams.Config): """Semantic segmentation model config.""" num_classes: int = 0 num_channels: int = 1 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 6 head: SegmentationHead3D = SegmentationHead3D() backbone: backbones.Backbone = backbones.Backbone( type='unet_3d', unet_3d=backbones.UNet3D()) decoder: decoders.Decoder = decoders.Decoder( type='unet_3d_decoder', unet_3d_decoder=decoders.UNet3DDecoder()) norm_activation: common.NormActivation = common.NormActivation()
def video_classification_ucf101() -> cfg.ExperimentConfig: """Video classification on UCF-101 with resnet.""" train_dataset = DataConfig( name='ucf101', num_classes=101, is_training=True, split='train', drop_remainder=True, num_examples=9537, temporal_stride=2, feature_shape=(32, 224, 224, 3)) train_dataset.tfds_name = 'ucf101' train_dataset.tfds_split = 'train' validation_dataset = DataConfig( name='ucf101', num_classes=101, is_training=True, split='test', drop_remainder=False, num_examples=3783, temporal_stride=2, feature_shape=(32, 224, 224, 3)) validation_dataset.tfds_name = 'ucf101' validation_dataset.tfds_split = 'test' task = VideoClassificationTask( model=VideoClassificationModel( backbone=backbones_3d.Backbone3D( type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer( config, train_batch_size=64, eval_batch_size=16, learning_rate=0.8, train_epochs=100) return config
class PanopticDeeplab(hyperparams.Config): """Panoptic Deeplab model config.""" num_classes: int = 2 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 3 max_level: int = 6 norm_activation: common.NormActivation = common.NormActivation() backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='aspp') semantic_head: SemanticHead = SemanticHead() instance_head: InstanceHead = InstanceHead() shared_decoder: bool = False generate_panoptic_masks: bool = True post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor( )
class SimCLRMTModelConfig(hyperparams.Config): """Model config for multi-task SimCLR model.""" input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) backbone_trainable: bool = True projection_head: simclr_configs.ProjectionHead = simclr_configs.ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1) norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False) heads: Tuple[SimCLRMTHeadConfig, ...] = () # L2 weight decay is used in the model, not in task. # Note that this can not be used together with lars optimizer. l2_weight_decay: float = 0.0 init_checkpoint: str = '' # backbone_projection or backbone init_checkpoint_modules: str = 'backbone_projection'
class Yolo(hyperparams.Config): input_size: Optional[List[int]] = dataclasses.field( default_factory=lambda: [512, 512, 3]) backbone: backbones.Backbone = backbones.Backbone( type='darknet', darknet=backbones.Darknet(model_id='cspdarknet53')) decoder: decoders.Decoder = decoders.Decoder( type='yolo_decoder', yolo_decoder=decoders.YoloDecoder(version='v4', type='regular')) head: YoloHead = YoloHead() detection_generator: YoloDetectionGenerator = YoloDetectionGenerator() loss: YoloLoss = YoloLoss() norm_activation: common.NormActivation = common.NormActivation( activation='mish', use_sync_bn=True, norm_momentum=0.99, norm_epsilon=0.001) num_classes: int = 80 anchor_boxes: AnchorBoxes = AnchorBoxes() darknet_based_model: bool = False
class MaskRCNN(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) min_level: int = 2 max_level: int = 6 anchor: Anchor = Anchor() include_mask: bool = True backbone: backbones.Backbone = backbones.Backbone( type='resnet', resnet=backbones.ResNet()) decoder: decoders.Decoder = decoders.Decoder(type='fpn', fpn=decoders.FPN()) rpn_head: RPNHead = RPNHead() detection_head: DetectionHead = DetectionHead() roi_generator: ROIGenerator = ROIGenerator() roi_sampler: ROISampler = ROISampler() roi_aligner: ROIAligner = ROIAligner() detection_generator: DetectionGenerator = DetectionGenerator() mask_head: Optional[MaskHead] = MaskHead() mask_sampler: Optional[MaskSampler] = MaskSampler() mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner() norm_activation: common.NormActivation = common.NormActivation( norm_momentum=0.997, norm_epsilon=0.0001, use_sync_bn=True)
def video_classification_kinetics700_2020() -> cfg.ExperimentConfig: """Video classification on Kinectics 700 2020 with resnet.""" train_dataset = kinetics700_2020(is_training=True) validation_dataset = kinetics700_2020(is_training=False) task = VideoClassificationTask(model=VideoClassificationModel( backbone=backbones_3d.Backbone3D(type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer(config, train_batch_size=1024, eval_batch_size=64) return config
def simclr_finetuning_imagenet() -> cfg.ExperimentConfig: """Image classification general.""" train_batch_size = 1024 eval_batch_size = 1024 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size pretrain_model_base = '' return cfg.ExperimentConfig( task=SimCLRFinetuneTask( model=SimCLRModel( mode=simclr_model.FINETUNE, backbone_trainable=True, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), projection_head=ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1), supervised_head=SupervisedHead(num_classes=1001, zero_init=True), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), loss=ClassificationLosses(), evaluation=Evaluation(), train_data=DataConfig( parser=Parser(mode=simclr_model.FINETUNE), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( parser=Parser(mode=simclr_model.FINETUNE), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size), init_checkpoint=pretrain_model_base, # all, backbone_projection or backbone init_checkpoint_modules='backbone_projection'), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=60 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'lars', 'lars': { 'momentum': 0.9, 'weight_decay_rate': 0.0, 'exclude_from_weight_decay': [ 'batch_normalization', 'bias' ] } }, 'learning_rate': { 'type': 'cosine', 'cosine': { # 0.01 × BatchSize / 512 'initial_learning_rate': 0.01 * train_batch_size / 512, 'decay_steps': 60 * steps_per_epoch } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ])
def simclr_pretraining_imagenet() -> cfg.ExperimentConfig: """Image classification general.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size return cfg.ExperimentConfig( task=SimCLRPretrainTask( model=SimCLRModel( mode=simclr_model.PRETRAIN, backbone_trainable=True, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), projection_head=ProjectionHead( proj_output_dim=128, num_proj_layers=3, ft_proj_idx=1), supervised_head=SupervisedHead(num_classes=1001), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=True)), loss=ContrastiveLoss(), evaluation=Evaluation(), train_data=DataConfig( parser=Parser(mode=simclr_model.PRETRAIN), decoder=Decoder(decode_label=True), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( parser=Parser(mode=simclr_model.PRETRAIN), decoder=Decoder(decode_label=True), input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size), ), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=500 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'lars', 'lars': { 'momentum': 0.9, 'weight_decay_rate': 0.000001, 'exclude_from_weight_decay': [ 'batch_normalization', 'bias' ] } }, 'learning_rate': { 'type': 'cosine', 'cosine': { # 0.2 * BatchSize / 256 'initial_learning_rate': 0.2 * train_batch_size / 256, # train_steps - warmup_steps 'decay_steps': 475 * steps_per_epoch } }, 'warmup': { 'type': 'linear', 'linear': { # 5% of total epochs 'warmup_steps': 25 * steps_per_epoch } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ])
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: """COCO object detection with mobile RetinaNet.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size input_size = 384 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( backbone=backbones.Backbone( type='spinenet_mobile', spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7, use_keras_upsampling_2d=False)), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), head=RetinaNetHead(num_filters=48, use_separable_conv=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True, activation='swish'), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=3e-5), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=600 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [575 * steps_per_epoch, 590 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ]) return config
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=RetinaNetTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( num_classes=91, input_size=[640, 640, 3], norm_activation=common.NormActivation(use_sync_bn=False), min_level=3, max_level=7), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.2)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=72 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [57 * steps_per_epoch, 67 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Cascade RCNN-RS with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 train_batch_size = 256 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet( model_id='49', min_level=3, max_level=7, )), decoder=decoders.Decoder( type='identity', identity=decoders.Identity()), roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]), detection_head=DetectionHead( class_agnostic_bbox_pred=True, cascade_class_ensemble=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( use_sync_bn=True, activation='swish'), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 475, steps_per_epoch * 490 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet.min_level', 'task.model.max_level == task.model.backbone.spinenet.max_level', ]) return config