Beispiel #1
0
  def test_configure_optimizer(self, mixed_precision_dtype, loss_scale):
    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(
            mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale),
        trainer=cfg.TrainerConfig(
            optimizer_config=cfg.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd'
                },
                'learning_rate': {
                    'type': 'constant'
                }
            })))
    trainer = self.create_test_trainer(config)
    if mixed_precision_dtype != 'float16':
      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
    elif mixed_precision_dtype == 'float16' and loss_scale is None:
      self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD)
    else:
      self.assertIsInstance(
          trainer.optimizer,
          tf.keras.mixed_precision.experimental.LossScaleOptimizer)

    metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32))
    self.assertIn('training_loss', metrics)
Beispiel #2
0
def dcn_criteo_tb_config() -> Config:
    return Config(
        runtime=cfg.RuntimeConfig(),
        task=Task(model=ModelConfig(num_dense_features=13,
                                    vocab_sizes=vocab_sizes,
                                    bottom_mlp=[512, 256, 64],
                                    embedding_dim=64,
                                    top_mlp=[1024, 1024, 512, 256, 1],
                                    interaction='cross'),
                  loss=Loss(label_smoothing=0.0),
                  train_data=DataConfig(global_batch_size=train_batch_size,
                                        is_training=True,
                                        sharding=True),
                  validation_data=DataConfig(global_batch_size=eval_batch_size,
                                             is_training=False,
                                             sharding=False)),
        trainer=TrainerConfig(train_steps=steps_per_epoch,
                              validation_interval=steps_per_epoch // 2,
                              validation_steps=NUM_EVAL_EXAMPLES //
                              eval_batch_size,
                              enable_metrics_in_training=True,
                              optimizer_config=OptimizationConfig()),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
        ])
Beispiel #3
0
class Config(hyperparams.Config):
    """Configuration to train the RankingModel.

  By default it configures DLRM model on criteo dataset.

  Attributes:
    runtime: A `RuntimeConfig` instance.
    task: `Task` instance.
    trainer: A `TrainerConfig` instance.
  """
    runtime: cfg.RuntimeConfig = cfg.RuntimeConfig()
    task: Task = Task(
        model=ModelConfig(embedding_dim=8,
                          vocab_sizes=vocab_sizes,
                          bottom_mlp=[64, 32, 8],
                          top_mlp=[64, 32, 1]),
        loss=Loss(label_smoothing=0.0),
        train_data=DataConfig(is_training=True,
                              global_batch_size=train_batch_size),
        validation_data=DataConfig(is_training=False,
                                   global_batch_size=eval_batch_size))
    trainer: TrainerConfig = TrainerConfig(
        train_steps=2 * steps_per_epoch,
        validation_interval=steps_per_epoch,
        validation_steps=NUM_EVAL_EXAMPLES // eval_batch_size,
        enable_metrics_in_training=True,
        optimizer_config=OptimizationConfig())
    restrictions: dataclasses.InitVar[Optional[List[str]]] = None
def video_classification() -> cfg.ExperimentConfig:
  """Video classification general."""
  return cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=VideoClassificationTask(),
      trainer=cfg.TrainerConfig(),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
      ])
Beispiel #5
0
def default_config() -> Config:
    return Config(
        runtime=cfg.RuntimeConfig(),
        task=Task(model=ModelConfig(embedding_dim=8,
                                    vocab_sizes=vocab_sizes,
                                    bottom_mlp=[64, 32, 4],
                                    top_mlp=[64, 32, 1]),
                  loss=Loss(label_smoothing=0.0),
                  train_data=DataConfig(global_batch_size=train_batch_size,
                                        is_training=True,
                                        sharding=True),
                  validation_data=DataConfig(global_batch_size=eval_batch_size,
                                             is_training=False,
                                             sharding=False)),
        trainer=TrainerConfig(train_steps=2 * steps_per_epoch,
                              validation_interval=steps_per_epoch,
                              validation_steps=NUM_EVAL_EXAMPLES //
                              eval_batch_size,
                              enable_metrics_in_training=True,
                              optimizer_config=OptimizationConfig()),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
        ])
def video_classification_kinetics600() -> cfg.ExperimentConfig:
  """Video classification on Videonet with resnet."""
  train_dataset = kinetics600(is_training=True)
  validation_dataset = kinetics600(is_training=False)
  task = VideoClassificationTask(
      model=VideoClassificationModel(
          backbone=backbones_3d.Backbone3D(
              type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()),
          norm_activation=common.NormActivation(
              norm_momentum=0.9, norm_epsilon=1e-5)),
      losses=Losses(l2_weight_decay=1e-4),
      train_data=train_dataset,
      validation_data=validation_dataset)
  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=task,
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
      ])
  add_trainer(config, train_batch_size=1024, eval_batch_size=64)

  return config
def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
    """COCO object detection with RetinaNet using SpineNet backbone."""
    train_batch_size = 256
    eval_batch_size = 8
    steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size
    input_size = 640

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
        task=RetinaNetTask(
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=RetinaNet(
                backbone=backbones.Backbone(
                    type='spinenet',
                    spinenet=backbones.SpineNet(model_id='49')),
                decoder=decoders.Decoder(type='identity',
                                         identity=decoders.Identity()),
                anchor=Anchor(anchor_size=3),
                norm_activation=common.NormActivation(use_sync_bn=True),
                num_classes=91,
                input_size=[input_size, input_size, 3],
                min_level=3,
                max_level=7),
            losses=Losses(l2_weight_decay=4e-5),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.5,
                                                aug_scale_max=2.0)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            train_steps=350 * steps_per_epoch,
            validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [320 * steps_per_epoch, 340 * steps_per_epoch],
                        'values': [
                            0.28 * train_batch_size / 256.0,
                            0.028 * train_batch_size / 256.0,
                            0.0028 * train_batch_size / 256.0
                        ],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig:
    """COCO object detection with RetinaNet."""
    train_batch_size = 256
    eval_batch_size = 8
    steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=RetinaNetTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
            init_checkpoint_modules='backbone',
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=RetinaNet(num_classes=91,
                            input_size=[640, 640, 3],
                            min_level=3,
                            max_level=7),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.5,
                                                aug_scale_max=2.0)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            train_steps=72 * steps_per_epoch,
            validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [57 * steps_per_epoch, 67 * steps_per_epoch],
                        'values': [
                            0.28 * train_batch_size / 256.0,
                            0.028 * train_batch_size / 256.0,
                            0.0028 * train_batch_size / 256.0
                        ],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Beispiel #9
0
def yolo_v4_coco() -> cfg.ExperimentConfig:
    """COCO object detection with YOLO."""
    train_batch_size = 1
    eval_batch_size = 1
    base_default = 1200000
    num_batches = 1200000 * 64 / train_batch_size

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(
            #            mixed_precision_dtype='float16',
            #            loss_scale='dynamic',
            num_gpus=2),
        task=YoloTask(
            model=Yolo(base='v4'),
            train_data=DataConfig(  # input_path=os.path.join(
                # COCO_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=Parser(),
                shuffle_buffer_size=2),
            validation_data=DataConfig(
                # input_path=os.path.join(COCO_INPUT_PATH_BASE,
                #                        'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                shuffle_buffer_size=2)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=2000,
            summary_interval=8000,
            checkpoint_interval=10000,
            train_steps=num_batches,
            validation_steps=1000,
            validation_interval=10,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [
                            int(400000 / base_default * num_batches),
                            int(450000 / base_default * num_batches)
                        ],
                        'values': [
                            0.00261 * train_batch_size / 64,
                            0.000261 * train_batch_size / 64,
                            0.0000261 * train_batch_size / 64
                        ]
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 1000 * 64 // num_batches,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Beispiel #10
0
def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig:
  """COCO object detection with Mask R-CNN with SpineNet backbone."""
  steps_per_epoch = 463
  coco_val_samples = 5000

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=MaskRCNNTask(
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=MaskRCNN(
              backbone=backbones.Backbone(
                  type='spinenet', spinenet=backbones.SpineNet(model_id='49')),
              decoder=decoders.Decoder(
                  type='identity', identity=decoders.Identity()),
              anchor=Anchor(anchor_size=3),
              norm_activation=common.NormActivation(use_sync_bn=True),
              num_classes=91,
              input_size=[640, 640, 3],
              min_level=3,
              max_level=7,
              include_mask=True),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=256,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=8)),
      trainer=cfg.TrainerConfig(
          train_steps=steps_per_epoch * 350,
          validation_steps=coco_val_samples // 8,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [
                          steps_per_epoch * 320, steps_per_epoch * 340
                      ],
                      'values': [0.28, 0.028, 0.0028],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 2000,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])
  return config
Beispiel #11
0
def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
  """COCO object detection with Mask R-CNN."""
  steps_per_epoch = 500
  coco_val_samples = 5000

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=MaskRCNNTask(
          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
          init_checkpoint_modules='backbone',
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=MaskRCNN(
              num_classes=91,
              input_size=[1024, 1024, 3],
              include_mask=True),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=64,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=8)),
      trainer=cfg.TrainerConfig(
          train_steps=22500,
          validation_steps=coco_val_samples // 8,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [15000, 20000],
                      'values': [0.12, 0.012, 0.0012],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 500,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])

  return config