Exemple #1
0
def video_classification_ucf101() -> cfg.ExperimentConfig:
    """Video classification on UCF-101 with resnet."""
    train_dataset = DataConfig(name='ucf101',
                               num_classes=101,
                               is_training=True,
                               split='train',
                               drop_remainder=True,
                               num_examples=9537,
                               temporal_stride=2,
                               feature_shape=(32, 224, 224, 3))
    train_dataset.tfds_name = 'ucf101'
    train_dataset.tfds_split = 'train'
    validation_dataset = DataConfig(name='ucf101',
                                    num_classes=101,
                                    is_training=True,
                                    split='test',
                                    drop_remainder=False,
                                    num_examples=3783,
                                    temporal_stride=2,
                                    feature_shape=(32, 224, 224, 3))
    validation_dataset.tfds_name = 'ucf101'
    validation_dataset.tfds_split = 'test'
    task = VideoClassificationTask(model=VideoClassificationModel(
        backbone=backbones_3d.Backbone3D(type='resnet_3d',
                                         resnet_3d=backbones_3d.ResNet3D50()),
        norm_activation=common.NormActivation(norm_momentum=0.9,
                                              norm_epsilon=1e-5,
                                              use_sync_bn=False)),
                                   losses=Losses(l2_weight_decay=1e-4),
                                   train_data=train_dataset,
                                   validation_data=validation_dataset)
    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=task,
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
            'task.train_data.num_classes == task.validation_data.num_classes',
        ])
    add_trainer(config,
                train_batch_size=64,
                eval_batch_size=16,
                learning_rate=0.8,
                train_epochs=100)
    return config
Exemple #2
0
  def test_configure_optimizer(self, mixed_precision_dtype, loss_scale):
    config = cfg.ExperimentConfig(
        task=cfg.TaskConfig(
            model=bert.PretrainerConfig()),
        runtime=cfg.RuntimeConfig(
            mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale),
        trainer=trainer_lib.ProgressiveTrainerConfig(
            export_checkpoint=True,
            export_checkpoint_interval=1,
            export_only_final_stage_ckpt=False))
    task = TestPolicy(None, config.task)
    trainer = trainer_lib.ProgressiveTrainer(config, task, self.get_temp_dir())
    if mixed_precision_dtype != 'float16':
      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
    elif mixed_precision_dtype == 'float16' and loss_scale is None:
      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)

    metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
    self.assertIn('training_loss', metrics)
def token_drop_bert_pretraining() -> cfg.ExperimentConfig:
    """BERT pretraining with token dropping."""
    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(enable_xla=True),
        task=masked_lm.TokenDropMaskedLMConfig(
            model=bert.PretrainerConfig(encoder=encoders.EncoderConfig(
                any=encoder_config.TokenDropBertEncoderConfig(
                    vocab_size=30522, num_layers=1, token_keep_k=64),
                type='any')),
            train_data=pretrain_dataloader.BertPretrainDataConfig(),
            validation_data=pretrain_dataloader.BertPretrainDataConfig(
                is_training=False)),
        trainer=cfg.TrainerConfig(
            train_steps=1000000,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adamw',
                    'adamw': {
                        'weight_decay_rate':
                        0.01,
                        'exclude_from_weight_decay':
                        ['LayerNorm', 'layer_norm', 'bias'],
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 1e-4,
                        'end_learning_rate': 0.0,
                    }
                },
                'warmup': {
                    'type': 'polynomial'
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #4
0
def video_classification_kinetics600() -> cfg.ExperimentConfig:
    """Video classification on Kinectics 600 with resnet."""
    train_dataset = kinetics600(is_training=True)
    validation_dataset = kinetics600(is_training=False)
    task = VideoClassificationTask(model=VideoClassificationModel(
        backbone=backbones_3d.Backbone3D(type='resnet_3d',
                                         resnet_3d=backbones_3d.ResNet3D50()),
        norm_activation=common.NormActivation(norm_momentum=0.9,
                                              norm_epsilon=1e-5)),
                                   losses=Losses(l2_weight_decay=1e-4),
                                   train_data=train_dataset,
                                   validation_data=validation_dataset)
    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=task,
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
            'task.train_data.num_classes == task.validation_data.num_classes',
        ])
    add_trainer(config, train_batch_size=1024, eval_batch_size=64)
    return config
  def test_configure_optimizer(self, mixed_precision_dtype, loss_scale):
    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(
            mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale),
        trainer=cfg.TrainerConfig(
            optimizer_config=cfg.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd'
                },
                'learning_rate': {
                    'type': 'constant'
                },
            })))
    trainer = self.create_test_trainer(config)
    if mixed_precision_dtype != 'float16':
      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
    elif mixed_precision_dtype == 'float16' and loss_scale is None:
      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
    else:
      self.assertIsInstance(trainer.optimizer,
                            tf.keras.mixed_precision.LossScaleOptimizer)

    metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
    self.assertIn('training_loss', metrics)
def default_config() -> Config:
    return Config(
        runtime=cfg.RuntimeConfig(),
        task=Task(model=ModelConfig(embedding_dim=8,
                                    vocab_sizes=vocab_sizes,
                                    bottom_mlp=[64, 32, 4],
                                    top_mlp=[64, 32, 1]),
                  loss=Loss(label_smoothing=0.0),
                  train_data=DataConfig(global_batch_size=train_batch_size,
                                        is_training=True,
                                        sharding=True),
                  validation_data=DataConfig(global_batch_size=eval_batch_size,
                                             is_training=False,
                                             sharding=False)),
        trainer=TrainerConfig(train_steps=2 * steps_per_epoch,
                              validation_interval=steps_per_epoch,
                              validation_steps=NUM_EVAL_EXAMPLES //
                              eval_batch_size,
                              enable_metrics_in_training=True,
                              optimizer_config=OptimizationConfig()),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
        ])
def image_classification_imagenet() -> cfg.ExperimentConfig:
  """Image classification on imagenet with resnet."""
  train_batch_size = 4096
  eval_batch_size = 4096
  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(enable_xla=True),
      task=ImageClassificationTask(
          model=ImageClassificationModel(
              num_classes=1001,
              input_size=[224, 224, 3],
              backbone=backbones.Backbone(
                  type='resnet', resnet=backbones.ResNet(model_id=50)),
              norm_activation=common.NormActivation(
                  norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)),
          losses=Losses(l2_weight_decay=1e-4),
          train_data=DataConfig(
              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size),
          validation_data=DataConfig(
              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
              is_training=False,
              global_batch_size=eval_batch_size)),
      trainer=cfg.TrainerConfig(
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          train_steps=90 * steps_per_epoch,
          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
          validation_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [
                          30 * steps_per_epoch, 60 * steps_per_epoch,
                          80 * steps_per_epoch
                      ],
                      'values': [
                          0.1 * train_batch_size / 256,
                          0.01 * train_batch_size / 256,
                          0.001 * train_batch_size / 256,
                          0.0001 * train_batch_size / 256,
                      ]
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 5 * steps_per_epoch,
                      'warmup_learning_rate': 0
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])

  return config
Exemple #8
0
def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig:
    """COCO object detection with RetinaNet using Mobile SpineNet backbone."""
    train_batch_size = 256
    eval_batch_size = 8
    steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
    input_size = 384

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
        task=RetinaNetTask(
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=RetinaNet(
                backbone=backbones.Backbone(
                    type='spinenet_mobile',
                    spinenet_mobile=backbones.SpineNetMobile(
                        model_id='49',
                        stochastic_depth_drop_rate=0.2,
                        min_level=3,
                        max_level=7)),
                decoder=decoders.Decoder(type='identity',
                                         identity=decoders.Identity()),
                head=RetinaNetHead(num_filters=48, use_separable_conv=True),
                anchor=Anchor(anchor_size=3),
                norm_activation=common.NormActivation(use_sync_bn=True,
                                                      activation='swish'),
                num_classes=91,
                input_size=[input_size, input_size, 3],
                min_level=3,
                max_level=7),
            losses=Losses(l2_weight_decay=3e-5),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.1,
                                                aug_scale_max=2.0)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            train_steps=600 * steps_per_epoch,
            validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [575 * steps_per_epoch, 590 * steps_per_epoch],
                        'values': [
                            0.32 * train_batch_size / 256.0,
                            0.032 * train_batch_size / 256.0,
                            0.0032 * train_batch_size / 256.0
                        ],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
            'task.model.min_level == task.model.backbone.spinenet_mobile.min_level',
            'task.model.max_level == task.model.backbone.spinenet_mobile.max_level',
        ])

    return config
Exemple #9
0
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig:
    """COCO object detection with RetinaNet."""
    train_batch_size = 256
    eval_batch_size = 8
    steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=RetinaNetTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
            init_checkpoint_modules='backbone',
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=RetinaNet(num_classes=91,
                            input_size=[640, 640, 3],
                            min_level=3,
                            max_level=7),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.5,
                                                aug_scale_max=2.0)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            train_steps=72 * steps_per_epoch,
            validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [57 * steps_per_epoch, 67 * steps_per_epoch],
                        'values': [
                            0.32 * train_batch_size / 256.0,
                            0.032 * train_batch_size / 256.0,
                            0.0032 * train_batch_size / 256.0
                        ],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #10
0
def panoptic_fpn_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Mask R-CNN."""
    train_batch_size = 64
    eval_batch_size = 8
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    # coco panoptic dataset has category ids ranging from [0-200] inclusive.
    # 0 is not used and represents the background class
    # ids 1-91 represent thing categories (91)
    # ids 92-200 represent stuff categories (109)
    # for the segmentation task, we continue using id=0 for the background
    # and map all thing categories to id=1, the remaining 109 stuff categories
    # are shifted by an offset=90 given by num_thing classes - 1. This shifting
    # will make all the stuff categories begin from id=2 and end at id=110
    num_panoptic_categories = 201
    num_thing_categories = 91
    num_semantic_segmentation_classes = 111

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32',
                                  enable_xla=True),
        task=PanopticMaskRCNNTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticMaskRCNN(
                num_classes=91,
                input_size=[1024, 1024, 3],
                panoptic_segmentation_generator=PanopticSegmentationGenerator(
                    output_size=[640, 640], rescale_predictions=True),
                stuff_classes_offset=90,
                segmentation_model=SEGMENTATION_MODEL(
                    num_classes=num_semantic_segmentation_classes,
                    head=SEGMENTATION_HEAD(
                        level=2,
                        num_convs=0,
                        num_filters=128,
                        decoder_min_level=2,
                        decoder_max_level=6,
                        feature_fusion='panoptic_fpn_fusion'))),
            losses=Losses(l2_weight_decay=0.00004),
            train_data=DataConfig(input_path=os.path.join(
                _COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.8,
                                                aug_scale_max=1.25)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(segmentation_resize_eval_groundtruth=False,
                              segmentation_groundtruth_padded_size=[640, 640]),
                drop_remainder=False),
            annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            segmentation_evaluation=semantic_segmentation.Evaluation(
                report_per_class_iou=False, report_train_mean_iou=False),
            panoptic_quality_evaluator=PanopticQualityEvaluator(
                num_categories=num_panoptic_categories,
                ignored_label=0,
                is_thing=is_thing,
                rescale_predictions=True)),
        trainer=cfg.TrainerConfig(
            train_steps=22500,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [15000, 20000],
                        'values': [0.12, 0.012, 0.0012],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #11
0
def wmt_transformer_large() -> cfg.ExperimentConfig:
    """WMT Transformer Large.

  Please refer to
  tensorflow_models/official/nlp/data/train_sentencepiece.py
  to generate sentencepiece_model
  and pass
  --params_override=task.sentencepiece_model_path='YOUR_PATH'
  to the train script.
  """
    learning_rate = 2.0
    hidden_size = 1024
    learning_rate *= (hidden_size**-0.5)
    warmup_steps = 16000
    train_steps = 300000
    token_batch_size = 24576
    encdecoder = translation.EncDecoder(num_attention_heads=16,
                                        intermediate_size=hidden_size * 4)
    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(enable_xla=True),
        task=translation.TranslationConfig(
            model=translation.ModelConfig(encoder=encdecoder,
                                          decoder=encdecoder,
                                          embedding_width=hidden_size,
                                          padded_decode=True,
                                          decode_max_length=100),
            train_data=wmt_dataloader.WMTDataConfig(
                tfds_name='wmt14_translate/de-en',
                tfds_split='train',
                src_lang='en',
                tgt_lang='de',
                is_training=True,
                global_batch_size=token_batch_size,
                static_batch=True,
                max_seq_length=64),
            validation_data=wmt_dataloader.WMTDataConfig(
                tfds_name='wmt14_translate/de-en',
                tfds_split='test',
                src_lang='en',
                tgt_lang='de',
                is_training=False,
                global_batch_size=32,
                static_batch=True,
                max_seq_length=100,
            ),
            sentencepiece_model_path=None,
        ),
        trainer=cfg.TrainerConfig(
            train_steps=train_steps,
            validation_steps=-1,
            steps_per_loop=1000,
            summary_interval=1000,
            checkpoint_interval=5000,
            validation_interval=5000,
            max_to_keep=1,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                    'adam': {
                        'beta_2': 0.997,
                        'epsilon': 1e-9,
                    },
                },
                'learning_rate': {
                    'type': 'power',
                    'power': {
                        'initial_learning_rate': learning_rate,
                        'power': -0.5,
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': warmup_steps,
                        'warmup_learning_rate': 0.0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.sentencepiece_model_path != None',
        ])
    return config
Exemple #12
0
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig:
  """COCO object detection with Cascade RCNN-RS with SpineNet backbone."""
  steps_per_epoch = 463
  coco_val_samples = 5000
  train_batch_size = 256
  eval_batch_size = 8

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=MaskRCNNTask(
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=MaskRCNN(
              backbone=backbones.Backbone(
                  type='spinenet',
                  spinenet=backbones.SpineNet(
                      model_id='49',
                      min_level=3,
                      max_level=7,
                  )),
              decoder=decoders.Decoder(
                  type='identity', identity=decoders.Identity()),
              roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]),
              detection_head=DetectionHead(
                  class_agnostic_bbox_pred=True, cascade_class_ensemble=True),
              anchor=Anchor(anchor_size=3),
              norm_activation=common.NormActivation(
                  use_sync_bn=True, activation='swish'),
              num_classes=91,
              input_size=[640, 640, 3],
              min_level=3,
              max_level=7,
              include_mask=True),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.5)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=steps_per_epoch * 500,
          validation_steps=coco_val_samples // eval_batch_size,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [
                          steps_per_epoch * 475, steps_per_epoch * 490
                      ],
                      'values': [0.32, 0.032, 0.0032],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 2000,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.model.min_level == task.model.backbone.spinenet.min_level',
          'task.model.max_level == task.model.backbone.spinenet.max_level',
      ])
  return config
Exemple #13
0
def maskrcnn_mobilenet_coco() -> cfg.ExperimentConfig:
  """COCO object detection with Mask R-CNN with MobileNet backbone."""
  steps_per_epoch = 232
  coco_val_samples = 5000
  train_batch_size = 512
  eval_batch_size = 512

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=MaskRCNNTask(
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=MaskRCNN(
              backbone=backbones.Backbone(
                  type='mobilenet',
                  mobilenet=backbones.MobileNet(model_id='MobileNetV2')),
              decoder=decoders.Decoder(
                  type='fpn',
                  fpn=decoders.FPN(num_filters=128, use_separable_conv=True)),
              rpn_head=RPNHead(use_separable_conv=True,
                               num_filters=128),  # 1/2 of original channels.
              detection_head=DetectionHead(
                  use_separable_conv=True, num_filters=128,
                  fc_dims=512),  # 1/2 of original channels.
              mask_head=MaskHead(use_separable_conv=True,
                                 num_filters=128),  # 1/2 of original channels.
              anchor=Anchor(anchor_size=3),
              norm_activation=common.NormActivation(
                  activation='relu6',
                  norm_momentum=0.99,
                  norm_epsilon=0.001,
                  use_sync_bn=True),
              num_classes=91,
              input_size=[512, 512, 3],
              min_level=3,
              max_level=6,
              include_mask=True),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=steps_per_epoch * 350,
          validation_steps=coco_val_samples // eval_batch_size,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [
                          steps_per_epoch * 320, steps_per_epoch * 340
                      ],
                      'values': [0.32, 0.032, 0.0032],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 2000,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
      ])
  return config
Exemple #14
0
def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Deeplab."""
    train_steps = 200000
    train_batch_size = 64
    eval_batch_size = 1
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    num_panoptic_categories = 201
    num_thing_categories = 91
    ignore_label = 0

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    input_size = [640, 640, 3]
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16',
                                  enable_xla=True),
        task=PanopticDeeplabTask(
            init_checkpoint=
            'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticDeeplab(
                num_classes=num_panoptic_categories,
                input_size=input_size,
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=50,
                        stem_type=stem_type,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        se_ratio=0.25,
                        last_stage_repeats=1,
                        stochastic_depth_drop_rate=0.2)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       num_filters=256,
                                       pool_kernel_size=input_size[:2],
                                       dilation_rates=aspp_dilation_rates,
                                       use_depthwise_convolution=True,
                                       dropout_rate=0.1)),
                semantic_head=SemanticHead(level=level,
                                           num_convs=1,
                                           num_filters=256,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[64, 32],
                                           fusion_num_output_filters=256,
                                           prediction_kernel_size=1),
                instance_head=InstanceHead(level=level,
                                           num_convs=1,
                                           num_filters=32,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[32, 16],
                                           fusion_num_output_filters=128,
                                           prediction_kernel_size=1),
                shared_decoder=False,
                generate_panoptic_masks=True,
                post_processor=PanopticDeeplabPostProcessor(
                    output_size=input_size[:2],
                    center_score_threshold=0.1,
                    thing_class_ids=list(range(1, num_thing_categories)),
                    label_divisor=256,
                    stuff_area_limit=4096,
                    ignore_label=ignore_label,
                    nms_kernel=41,
                    keep_k_centers=200,
                    rescale_predictions=True)),
            losses=Losses(label_smoothing=0.0,
                          ignore_label=ignore_label,
                          l2_weight_decay=0.0,
                          top_k_percent_pixels=0.2,
                          segmentation_loss_weight=1.0,
                          center_heatmap_loss_weight=200,
                          center_offset_loss_weight=0.01),
            train_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=Parser(
                    aug_scale_min=0.5,
                    aug_scale_max=1.5,
                    aug_rand_hflip=True,
                    aug_type=common.Augmentation(
                        type='autoaug',
                        autoaug=common.AutoAugment(
                            augmentation_name='panoptic_deeplab_policy')),
                    sigma=8.0,
                    small_instance_area_threshold=4096,
                    small_instance_weight=3.0)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(resize_eval_groundtruth=False,
                              groundtruth_padded_size=[640, 640],
                              aug_scale_min=1.0,
                              aug_scale_max=1.0,
                              aug_rand_hflip=False,
                              aug_type=None,
                              sigma=8.0,
                              small_instance_area_threshold=4096,
                              small_instance_weight=3.0),
                drop_remainder=False),
            evaluation=Evaluation(ignored_label=ignore_label,
                                  max_instances_per_category=256,
                                  offset=256 * 256 * 256,
                                  is_thing=is_thing,
                                  rescale_predictions=True,
                                  report_per_class_pq=False,
                                  report_per_class_iou=False,
                                  report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            train_steps=train_steps,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0005,
                        'decay_steps': train_steps,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #15
0
def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig:
    """COCO object detection with Mask R-CNN with SpineNet backbone."""
    steps_per_epoch = 463
    coco_val_samples = 5000

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=MaskRCNNTask(
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=MaskRCNN(
                backbone=backbones.Backbone(
                    type='spinenet',
                    spinenet=backbones.SpineNet(model_id='49')),
                decoder=decoders.Decoder(type='identity',
                                         identity=decoders.Identity()),
                anchor=Anchor(anchor_size=3),
                norm_activation=common.NormActivation(use_sync_bn=True),
                num_classes=91,
                input_size=[640, 640, 3],
                min_level=3,
                max_level=7,
                include_mask=True),
            losses=Losses(l2_weight_decay=0.00004),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=256,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.5,
                                                aug_scale_max=2.0)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=8)),
        trainer=cfg.TrainerConfig(
            train_steps=steps_per_epoch * 350,
            validation_steps=coco_val_samples // 8,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [steps_per_epoch * 320, steps_per_epoch * 340],
                        'values': [0.28, 0.028, 0.0028],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #16
0
def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
  """COCO object detection with Mask R-CNN."""
  steps_per_epoch = 500
  coco_val_samples = 5000
  train_batch_size = 64
  eval_batch_size = 8

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(
          mixed_precision_dtype='bfloat16', enable_xla=True),
      task=MaskRCNNTask(
          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
          init_checkpoint_modules='backbone',
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=MaskRCNN(
              num_classes=91, input_size=[1024, 1024, 3], include_mask=True),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=22500,
          validation_steps=coco_val_samples // eval_batch_size,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [15000, 20000],
                      'values': [0.12, 0.012, 0.0012],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 500,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])
  return config
Exemple #17
0
def scaled_yolo() -> cfg.ExperimentConfig:
  """COCO object detection with YOLOv4-csp and v4."""
  train_batch_size = 256
  eval_batch_size = 8
  train_epochs = 300
  warmup_epochs = 3

  validation_interval = 5
  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size

  max_num_instances = 300

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=YoloTask(
          smart_bias_lr=0.1,
          init_checkpoint_modules='',
          annotation_file=None,
          weight_decay=0.0,
          model=Yolo(
              darknet_based_model=False,
              norm_activation=common.NormActivation(
                  activation='mish',
                  use_sync_bn=True,
                  norm_epsilon=0.001,
                  norm_momentum=0.97),
              head=YoloHead(smart_bias=True),
              loss=YoloLoss(use_scaled_loss=True),
              anchor_boxes=AnchorBoxes(
                  anchors_per_scale=3,
                  boxes=[
                      Box(box=[12, 16]),
                      Box(box=[19, 36]),
                      Box(box=[40, 28]),
                      Box(box=[36, 75]),
                      Box(box=[76, 55]),
                      Box(box=[72, 146]),
                      Box(box=[142, 110]),
                      Box(box=[192, 243]),
                      Box(box=[459, 401])
                  ])),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              dtype='float32',
              parser=Parser(
                  aug_rand_saturation=0.7,
                  aug_rand_brightness=0.4,
                  aug_rand_hue=0.015,
                  letter_box=True,
                  use_tie_breaker=True,
                  best_match_only=True,
                  anchor_thresh=4.0,
                  random_pad=False,
                  area_thresh=0.1,
                  max_num_instances=max_num_instances,
                  mosaic=Mosaic(
                      mosaic_crop_mode='scale',
                      mosaic_frequency=1.0,
                      mixup_frequency=0.0,
                  ))),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=True,
              dtype='float32',
              parser=Parser(
                  letter_box=True,
                  use_tie_breaker=True,
                  best_match_only=True,
                  anchor_thresh=4.0,
                  area_thresh=0.1,
                  max_num_instances=max_num_instances,
              ))),
      trainer=cfg.TrainerConfig(
          train_steps=train_epochs * steps_per_epoch,
          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
          validation_interval=validation_interval * steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=5 * steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'ema': {
                  'average_decay': 0.9999,
                  'trainable_weights_only': False,
                  'dynamic_decay': True,
              },
              'optimizer': {
                  'type': 'sgd_torch',
                  'sgd_torch': {
                      'momentum': 0.937,
                      'momentum_start': 0.8,
                      'nesterov': True,
                      'warmup_steps': steps_per_epoch * warmup_epochs,
                      'weight_decay': 0.0005,
                  }
              },
              'learning_rate': {
                  'type': 'cosine',
                  'cosine': {
                      'initial_learning_rate': 0.01,
                      'alpha': 0.2,
                      'decay_steps': train_epochs * steps_per_epoch,
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': steps_per_epoch * warmup_epochs,
                      'warmup_learning_rate': 0
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])

  return config
class MultiTaskExperimentConfig(hyperparams.Config):
    """An experiment config for multi-task training and multi-task evaluation."""
    task: MultiTaskConfig = MultiTaskConfig()
    trainer: MultiTaskTrainerConfig = MultiTaskTrainerConfig()
    runtime: cfg.RuntimeConfig = cfg.RuntimeConfig()
def panoptic_maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
  """COCO panoptic segmentation with Panoptic Mask R-CNN."""
  train_batch_size = 64
  eval_batch_size = 8
  steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
  validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=PanopticMaskRCNNTask(
          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',  # pylint: disable=line-too-long
          init_checkpoint_modules=['backbone'],
          model=PanopticMaskRCNN(
              num_classes=91, input_size=[1024, 1024, 3],
              segmentation_model=SEGMENTATION_MODEL(
                  num_classes=91,
                  head=SEGMENTATION_HEAD(level=3))),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
          validation_data=DataConfig(
              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=False),
          annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json')),
      trainer=cfg.TrainerConfig(
          train_steps=22500,
          validation_steps=validation_steps,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [15000, 20000],
                      'values': [0.12, 0.012, 0.0012],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 500,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])
  return config