def video_classification_ucf101() -> cfg.ExperimentConfig: """Video classification on UCF-101 with resnet.""" train_dataset = DataConfig(name='ucf101', num_classes=101, is_training=True, split='train', drop_remainder=True, num_examples=9537, temporal_stride=2, feature_shape=(32, 224, 224, 3)) train_dataset.tfds_name = 'ucf101' train_dataset.tfds_split = 'train' validation_dataset = DataConfig(name='ucf101', num_classes=101, is_training=True, split='test', drop_remainder=False, num_examples=3783, temporal_stride=2, feature_shape=(32, 224, 224, 3)) validation_dataset.tfds_name = 'ucf101' validation_dataset.tfds_split = 'test' task = VideoClassificationTask(model=VideoClassificationModel( backbone=backbones_3d.Backbone3D(type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer(config, train_batch_size=64, eval_batch_size=16, learning_rate=0.8, train_epochs=100) return config
def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): config = cfg.ExperimentConfig( task=cfg.TaskConfig( model=bert.PretrainerConfig()), runtime=cfg.RuntimeConfig( mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), trainer=trainer_lib.ProgressiveTrainerConfig( export_checkpoint=True, export_checkpoint_interval=1, export_only_final_stage_ckpt=False)) task = TestPolicy(None, config.task) trainer = trainer_lib.ProgressiveTrainer(config, task, self.get_temp_dir()) if mixed_precision_dtype != 'float16': self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) elif mixed_precision_dtype == 'float16' and loss_scale is None: self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) self.assertIn('training_loss', metrics)
def token_drop_bert_pretraining() -> cfg.ExperimentConfig: """BERT pretraining with token dropping.""" config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(enable_xla=True), task=masked_lm.TokenDropMaskedLMConfig( model=bert.PretrainerConfig(encoder=encoders.EncoderConfig( any=encoder_config.TokenDropBertEncoderConfig( vocab_size=30522, num_layers=1, token_keep_k=64), type='any')), train_data=pretrain_dataloader.BertPretrainDataConfig(), validation_data=pretrain_dataloader.BertPretrainDataConfig( is_training=False)), trainer=cfg.TrainerConfig( train_steps=1000000, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adamw', 'adamw': { 'weight_decay_rate': 0.01, 'exclude_from_weight_decay': ['LayerNorm', 'layer_norm', 'bias'], } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 1e-4, 'end_learning_rate': 0.0, } }, 'warmup': { 'type': 'polynomial' } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def video_classification_kinetics600() -> cfg.ExperimentConfig: """Video classification on Kinectics 600 with resnet.""" train_dataset = kinetics600(is_training=True) validation_dataset = kinetics600(is_training=False) task = VideoClassificationTask(model=VideoClassificationModel( backbone=backbones_3d.Backbone3D(type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer(config, train_batch_size=1024, eval_batch_size=64) return config
def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig( mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), trainer=cfg.TrainerConfig( optimizer_config=cfg.OptimizationConfig({ 'optimizer': { 'type': 'sgd' }, 'learning_rate': { 'type': 'constant' }, }))) trainer = self.create_test_trainer(config) if mixed_precision_dtype != 'float16': self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) elif mixed_precision_dtype == 'float16' and loss_scale is None: self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) else: self.assertIsInstance(trainer.optimizer, tf.keras.mixed_precision.LossScaleOptimizer) metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) self.assertIn('training_loss', metrics)
def default_config() -> Config: return Config( runtime=cfg.RuntimeConfig(), task=Task(model=ModelConfig(embedding_dim=8, vocab_sizes=vocab_sizes, bottom_mlp=[64, 32, 4], top_mlp=[64, 32, 1]), loss=Loss(label_smoothing=0.0), train_data=DataConfig(global_batch_size=train_batch_size, is_training=True, sharding=True), validation_data=DataConfig(global_batch_size=eval_batch_size, is_training=False, sharding=False)), trainer=TrainerConfig(train_steps=2 * steps_per_epoch, validation_interval=steps_per_epoch, validation_steps=NUM_EVAL_EXAMPLES // eval_batch_size, enable_metrics_in_training=True, optimizer_config=OptimizationConfig()), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ])
def image_classification_imagenet() -> cfg.ExperimentConfig: """Image classification on imagenet with resnet.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(enable_xla=True), task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, input_size=[224, 224, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50)), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=90 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ 30 * steps_per_epoch, 60 * steps_per_epoch, 80 * steps_per_epoch ], 'values': [ 0.1 * train_batch_size / 256, 0.01 * train_batch_size / 256, 0.001 * train_batch_size / 256, 0.0001 * train_batch_size / 256, ] } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet using Mobile SpineNet backbone.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size input_size = 384 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( backbone=backbones.Backbone( type='spinenet_mobile', spinenet_mobile=backbones.SpineNetMobile( model_id='49', stochastic_depth_drop_rate=0.2, min_level=3, max_level=7)), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), head=RetinaNetHead(num_filters=48, use_separable_conv=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True, activation='swish'), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=3e-5), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=600 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [575 * steps_per_epoch, 590 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet_mobile.min_level', 'task.model.max_level == task.model.backbone.spinenet_mobile.max_level', ]) return config
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=RetinaNetTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet(num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=72 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [57 * steps_per_epoch, 67 * steps_per_epoch], 'values': [ 0.32 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0, 0.0032 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def panoptic_fpn_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Mask R-CNN.""" train_batch_size = 64 eval_batch_size = 8 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size # coco panoptic dataset has category ids ranging from [0-200] inclusive. # 0 is not used and represents the background class # ids 1-91 represent thing categories (91) # ids 92-200 represent stuff categories (109) # for the segmentation task, we continue using id=0 for the background # and map all thing categories to id=1, the remaining 109 stuff categories # are shifted by an offset=90 given by num_thing classes - 1. This shifting # will make all the stuff categories begin from id=2 and end at id=110 num_panoptic_categories = 201 num_thing_categories = 91 num_semantic_segmentation_classes = 111 is_thing = [False] for idx in range(1, num_panoptic_categories): is_thing.append(True if idx <= num_thing_categories else False) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32', enable_xla=True), task=PanopticMaskRCNNTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticMaskRCNN( num_classes=91, input_size=[1024, 1024, 3], panoptic_segmentation_generator=PanopticSegmentationGenerator( output_size=[640, 640], rescale_predictions=True), stuff_classes_offset=90, segmentation_model=SEGMENTATION_MODEL( num_classes=num_semantic_segmentation_classes, head=SEGMENTATION_HEAD( level=2, num_convs=0, num_filters=128, decoder_min_level=2, decoder_max_level=6, feature_fusion='panoptic_fpn_fusion'))), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig(input_path=os.path.join( _COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, parser=Parser(segmentation_resize_eval_groundtruth=False, segmentation_groundtruth_padded_size=[640, 640]), drop_remainder=False), annotation_file=os.path.join(_COCO_INPUT_PATH_BASE, 'instances_val2017.json'), segmentation_evaluation=semantic_segmentation.Evaluation( report_per_class_iou=False, report_train_mean_iou=False), panoptic_quality_evaluator=PanopticQualityEvaluator( num_categories=num_panoptic_categories, ignored_label=0, is_thing=is_thing, rescale_predictions=True)), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def wmt_transformer_large() -> cfg.ExperimentConfig: """WMT Transformer Large. Please refer to tensorflow_models/official/nlp/data/train_sentencepiece.py to generate sentencepiece_model and pass --params_override=task.sentencepiece_model_path='YOUR_PATH' to the train script. """ learning_rate = 2.0 hidden_size = 1024 learning_rate *= (hidden_size**-0.5) warmup_steps = 16000 train_steps = 300000 token_batch_size = 24576 encdecoder = translation.EncDecoder(num_attention_heads=16, intermediate_size=hidden_size * 4) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(enable_xla=True), task=translation.TranslationConfig( model=translation.ModelConfig(encoder=encdecoder, decoder=encdecoder, embedding_width=hidden_size, padded_decode=True, decode_max_length=100), train_data=wmt_dataloader.WMTDataConfig( tfds_name='wmt14_translate/de-en', tfds_split='train', src_lang='en', tgt_lang='de', is_training=True, global_batch_size=token_batch_size, static_batch=True, max_seq_length=64), validation_data=wmt_dataloader.WMTDataConfig( tfds_name='wmt14_translate/de-en', tfds_split='test', src_lang='en', tgt_lang='de', is_training=False, global_batch_size=32, static_batch=True, max_seq_length=100, ), sentencepiece_model_path=None, ), trainer=cfg.TrainerConfig( train_steps=train_steps, validation_steps=-1, steps_per_loop=1000, summary_interval=1000, checkpoint_interval=5000, validation_interval=5000, max_to_keep=1, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', 'adam': { 'beta_2': 0.997, 'epsilon': 1e-9, }, }, 'learning_rate': { 'type': 'power', 'power': { 'initial_learning_rate': learning_rate, 'power': -0.5, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': warmup_steps, 'warmup_learning_rate': 0.0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.sentencepiece_model_path != None', ]) return config
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Cascade RCNN-RS with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 train_batch_size = 256 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet( model_id='49', min_level=3, max_level=7, )), decoder=decoders.Decoder( type='identity', identity=decoders.Identity()), roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]), detection_head=DetectionHead( class_agnostic_bbox_pred=True, cascade_class_ensemble=True), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( use_sync_bn=True, activation='swish'), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 475, steps_per_epoch * 490 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.model.min_level == task.model.backbone.spinenet.min_level', 'task.model.max_level == task.model.backbone.spinenet.max_level', ]) return config
def maskrcnn_mobilenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with MobileNet backbone.""" steps_per_epoch = 232 coco_val_samples = 5000 train_batch_size = 512 eval_batch_size = 512 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='mobilenet', mobilenet=backbones.MobileNet(model_id='MobileNetV2')), decoder=decoders.Decoder( type='fpn', fpn=decoders.FPN(num_filters=128, use_separable_conv=True)), rpn_head=RPNHead(use_separable_conv=True, num_filters=128), # 1/2 of original channels. detection_head=DetectionHead( use_separable_conv=True, num_filters=128, fc_dims=512), # 1/2 of original channels. mask_head=MaskHead(use_separable_conv=True, num_filters=128), # 1/2 of original channels. anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation( activation='relu6', norm_momentum=0.99, norm_epsilon=0.001, use_sync_bn=True), num_classes=91, input_size=[512, 512, 3], min_level=3, max_level=6, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 320, steps_per_epoch * 340 ], 'values': [0.32, 0.032, 0.0032], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ]) return config
def panoptic_deeplab_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Deeplab.""" train_steps = 200000 train_batch_size = 64 eval_batch_size = 1 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size num_panoptic_categories = 201 num_thing_categories = 91 ignore_label = 0 is_thing = [False] for idx in range(1, num_panoptic_categories): is_thing.append(True if idx <= num_thing_categories else False) input_size = [640, 640, 3] output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16', enable_xla=True), task=PanopticDeeplabTask( init_checkpoint= 'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticDeeplab( num_classes=num_panoptic_categories, input_size=input_size, backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=50, stem_type=stem_type, output_stride=output_stride, multigrid=multigrid, se_ratio=0.25, last_stage_repeats=1, stochastic_depth_drop_rate=0.2)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, num_filters=256, pool_kernel_size=input_size[:2], dilation_rates=aspp_dilation_rates, use_depthwise_convolution=True, dropout_rate=0.1)), semantic_head=SemanticHead(level=level, num_convs=1, num_filters=256, kernel_size=5, use_depthwise_convolution=True, upsample_factor=1, low_level=[3, 2], low_level_num_filters=[64, 32], fusion_num_output_filters=256, prediction_kernel_size=1), instance_head=InstanceHead(level=level, num_convs=1, num_filters=32, kernel_size=5, use_depthwise_convolution=True, upsample_factor=1, low_level=[3, 2], low_level_num_filters=[32, 16], fusion_num_output_filters=128, prediction_kernel_size=1), shared_decoder=False, generate_panoptic_masks=True, post_processor=PanopticDeeplabPostProcessor( output_size=input_size[:2], center_score_threshold=0.1, thing_class_ids=list(range(1, num_thing_categories)), label_divisor=256, stuff_area_limit=4096, ignore_label=ignore_label, nms_kernel=41, keep_k_centers=200, rescale_predictions=True)), losses=Losses(label_smoothing=0.0, ignore_label=ignore_label, l2_weight_decay=0.0, top_k_percent_pixels=0.2, segmentation_loss_weight=1.0, center_heatmap_loss_weight=200, center_offset_loss_weight=0.01), train_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_scale_min=0.5, aug_scale_max=1.5, aug_rand_hflip=True, aug_type=common.Augmentation( type='autoaug', autoaug=common.AutoAugment( augmentation_name='panoptic_deeplab_policy')), sigma=8.0, small_instance_area_threshold=4096, small_instance_weight=3.0)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, parser=Parser(resize_eval_groundtruth=False, groundtruth_padded_size=[640, 640], aug_scale_min=1.0, aug_scale_max=1.0, aug_rand_hflip=False, aug_type=None, sigma=8.0, small_instance_area_threshold=4096, small_instance_weight=3.0), drop_remainder=False), evaluation=Evaluation(ignored_label=ignore_label, max_instances_per_category=256, offset=256 * 256 * 256, is_thing=is_thing, rescale_predictions=True, report_per_class_pq=False, report_per_class_iou=False, report_train_mean_iou=False)), trainer=cfg.TrainerConfig( train_steps=train_steps, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.0005, 'decay_steps': train_steps, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet(model_id='49')), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=256, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=8)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // 8, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [steps_per_epoch * 320, steps_per_epoch * 340], 'values': [0.28, 0.028, 0.0028], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN.""" steps_per_epoch = 500 coco_val_samples = 5000 train_batch_size = 64 eval_batch_size = 8 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig( mixed_precision_dtype='bfloat16', enable_xla=True), task=MaskRCNNTask( init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( num_classes=91, input_size=[1024, 1024, 3], include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False)), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=coco_val_samples // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def scaled_yolo() -> cfg.ExperimentConfig: """COCO object detection with YOLOv4-csp and v4.""" train_batch_size = 256 eval_batch_size = 8 train_epochs = 300 warmup_epochs = 3 validation_interval = 5 steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size max_num_instances = 300 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=YoloTask( smart_bias_lr=0.1, init_checkpoint_modules='', annotation_file=None, weight_decay=0.0, model=Yolo( darknet_based_model=False, norm_activation=common.NormActivation( activation='mish', use_sync_bn=True, norm_epsilon=0.001, norm_momentum=0.97), head=YoloHead(smart_bias=True), loss=YoloLoss(use_scaled_loss=True), anchor_boxes=AnchorBoxes( anchors_per_scale=3, boxes=[ Box(box=[12, 16]), Box(box=[19, 36]), Box(box=[40, 28]), Box(box=[36, 75]), Box(box=[76, 55]), Box(box=[72, 146]), Box(box=[142, 110]), Box(box=[192, 243]), Box(box=[459, 401]) ])), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, dtype='float32', parser=Parser( aug_rand_saturation=0.7, aug_rand_brightness=0.4, aug_rand_hue=0.015, letter_box=True, use_tie_breaker=True, best_match_only=True, anchor_thresh=4.0, random_pad=False, area_thresh=0.1, max_num_instances=max_num_instances, mosaic=Mosaic( mosaic_crop_mode='scale', mosaic_frequency=1.0, mixup_frequency=0.0, ))), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=True, dtype='float32', parser=Parser( letter_box=True, use_tie_breaker=True, best_match_only=True, anchor_thresh=4.0, area_thresh=0.1, max_num_instances=max_num_instances, ))), trainer=cfg.TrainerConfig( train_steps=train_epochs * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=validation_interval * steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=5 * steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'ema': { 'average_decay': 0.9999, 'trainable_weights_only': False, 'dynamic_decay': True, }, 'optimizer': { 'type': 'sgd_torch', 'sgd_torch': { 'momentum': 0.937, 'momentum_start': 0.8, 'nesterov': True, 'warmup_steps': steps_per_epoch * warmup_epochs, 'weight_decay': 0.0005, } }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 0.01, 'alpha': 0.2, 'decay_steps': train_epochs * steps_per_epoch, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': steps_per_epoch * warmup_epochs, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
class MultiTaskExperimentConfig(hyperparams.Config): """An experiment config for multi-task training and multi-task evaluation.""" task: MultiTaskConfig = MultiTaskConfig() trainer: MultiTaskTrainerConfig = MultiTaskTrainerConfig() runtime: cfg.RuntimeConfig = cfg.RuntimeConfig()
def panoptic_maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Mask R-CNN.""" train_batch_size = 64 eval_batch_size = 8 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=PanopticMaskRCNNTask( init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticMaskRCNN( num_classes=91, input_size=[1024, 1024, 3], segmentation_model=SEGMENTATION_MODEL( num_classes=91, head=SEGMENTATION_HEAD(level=3))), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False), annotation_file=os.path.join(_COCO_INPUT_PATH_BASE, 'instances_val2017.json')), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config