def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig( mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), trainer=cfg.TrainerConfig( optimizer_config=cfg.OptimizationConfig({ 'optimizer': { 'type': 'sgd' }, 'learning_rate': { 'type': 'constant' } }))) trainer = self.create_test_trainer(config) if mixed_precision_dtype != 'float16': self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) elif mixed_precision_dtype == 'float16' and loss_scale is None: self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) else: self.assertIsInstance( trainer.optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer) metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) self.assertIn('training_loss', metrics)
def dcn_criteo_tb_config() -> Config: return Config( runtime=cfg.RuntimeConfig(), task=Task(model=ModelConfig(num_dense_features=13, vocab_sizes=vocab_sizes, bottom_mlp=[512, 256, 64], embedding_dim=64, top_mlp=[1024, 1024, 512, 256, 1], interaction='cross'), loss=Loss(label_smoothing=0.0), train_data=DataConfig(global_batch_size=train_batch_size, is_training=True, sharding=True), validation_data=DataConfig(global_batch_size=eval_batch_size, is_training=False, sharding=False)), trainer=TrainerConfig(train_steps=steps_per_epoch, validation_interval=steps_per_epoch // 2, validation_steps=NUM_EVAL_EXAMPLES // eval_batch_size, enable_metrics_in_training=True, optimizer_config=OptimizationConfig()), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ])
class Config(hyperparams.Config): """Configuration to train the RankingModel. By default it configures DLRM model on criteo dataset. Attributes: runtime: A `RuntimeConfig` instance. task: `Task` instance. trainer: A `TrainerConfig` instance. """ runtime: cfg.RuntimeConfig = cfg.RuntimeConfig() task: Task = Task( model=ModelConfig(embedding_dim=8, vocab_sizes=vocab_sizes, bottom_mlp=[64, 32, 8], top_mlp=[64, 32, 1]), loss=Loss(label_smoothing=0.0), train_data=DataConfig(is_training=True, global_batch_size=train_batch_size), validation_data=DataConfig(is_training=False, global_batch_size=eval_batch_size)) trainer: TrainerConfig = TrainerConfig( train_steps=2 * steps_per_epoch, validation_interval=steps_per_epoch, validation_steps=NUM_EVAL_EXAMPLES // eval_batch_size, enable_metrics_in_training=True, optimizer_config=OptimizationConfig()) restrictions: dataclasses.InitVar[Optional[List[str]]] = None
def video_classification() -> cfg.ExperimentConfig: """Video classification general.""" return cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=VideoClassificationTask(), trainer=cfg.TrainerConfig(), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ])
def default_config() -> Config: return Config( runtime=cfg.RuntimeConfig(), task=Task(model=ModelConfig(embedding_dim=8, vocab_sizes=vocab_sizes, bottom_mlp=[64, 32, 4], top_mlp=[64, 32, 1]), loss=Loss(label_smoothing=0.0), train_data=DataConfig(global_batch_size=train_batch_size, is_training=True, sharding=True), validation_data=DataConfig(global_batch_size=eval_batch_size, is_training=False, sharding=False)), trainer=TrainerConfig(train_steps=2 * steps_per_epoch, validation_interval=steps_per_epoch, validation_steps=NUM_EVAL_EXAMPLES // eval_batch_size, enable_metrics_in_training=True, optimizer_config=OptimizationConfig()), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', ])
def video_classification_kinetics600() -> cfg.ExperimentConfig: """Video classification on Videonet with resnet.""" train_dataset = kinetics600(is_training=True) validation_dataset = kinetics600(is_training=False) task = VideoClassificationTask( model=VideoClassificationModel( backbone=backbones_3d.Backbone3D( type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer(config, train_batch_size=1024, eval_batch_size=64) return config
def retinanet_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet using SpineNet backbone.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size input_size = 640 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), task=RetinaNetTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet(model_id='49')), decoder=decoders.Decoder(type='identity', identity=decoders.Identity()), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True), num_classes=91, input_size=[input_size, input_size, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=4e-5), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=350 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [320 * steps_per_epoch, 340 * steps_per_epoch], 'values': [ 0.28 * train_batch_size / 256.0, 0.028 * train_batch_size / 256.0, 0.0028 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with RetinaNet.""" train_batch_size = 256 eval_batch_size = 8 steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=RetinaNetTask( init_checkpoint= 'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=RetinaNet(num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7), losses=Losses(l2_weight_decay=1e-4), train_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig(input_path=os.path.join( COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( train_steps=72 * steps_per_epoch, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [57 * steps_per_epoch, 67 * steps_per_epoch], 'values': [ 0.28 * train_batch_size / 256.0, 0.028 * train_batch_size / 256.0, 0.0028 * train_batch_size / 256.0 ], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def yolo_v4_coco() -> cfg.ExperimentConfig: """COCO object detection with YOLO.""" train_batch_size = 1 eval_batch_size = 1 base_default = 1200000 num_batches = 1200000 * 64 / train_batch_size config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig( # mixed_precision_dtype='float16', # loss_scale='dynamic', num_gpus=2), task=YoloTask( model=Yolo(base='v4'), train_data=DataConfig( # input_path=os.path.join( # COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser(), shuffle_buffer_size=2), validation_data=DataConfig( # input_path=os.path.join(COCO_INPUT_PATH_BASE, # 'val*'), is_training=False, global_batch_size=eval_batch_size, shuffle_buffer_size=2)), trainer=cfg.TrainerConfig( steps_per_loop=2000, summary_interval=8000, checkpoint_interval=10000, train_steps=num_batches, validation_steps=1000, validation_interval=10, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ int(400000 / base_default * num_batches), int(450000 / base_default * num_batches) ], 'values': [ 0.00261 * train_batch_size / 64, 0.000261 * train_batch_size / 64, 0.0000261 * train_batch_size / 64 ] } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 1000 * 64 // num_batches, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN with SpineNet backbone.""" steps_per_epoch = 463 coco_val_samples = 5000 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( backbone=backbones.Backbone( type='spinenet', spinenet=backbones.SpineNet(model_id='49')), decoder=decoders.Decoder( type='identity', identity=decoders.Identity()), anchor=Anchor(anchor_size=3), norm_activation=common.NormActivation(use_sync_bn=True), num_classes=91, input_size=[640, 640, 3], min_level=3, max_level=7, include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=256, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=8)), trainer=cfg.TrainerConfig( train_steps=steps_per_epoch * 350, validation_steps=coco_val_samples // 8, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [ steps_per_epoch * 320, steps_per_epoch * 340 ], 'values': [0.28, 0.028, 0.0028], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: """COCO object detection with Mask R-CNN.""" steps_per_epoch = 500 coco_val_samples = 5000 config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=MaskRCNNTask( init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', init_checkpoint_modules='backbone', annotation_file=os.path.join(COCO_INPUT_PATH_BASE, 'instances_val2017.json'), model=MaskRCNN( num_classes=91, input_size=[1024, 1024, 3], include_mask=True), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=64, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=8)), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=coco_val_samples // 8, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config