def test_parser(self, output_size, dtype, is_training, aug_name,
                    is_multilabel, decode_jpeg_only, image_format):

        serialized_example = tfexample_utils.create_classification_example(
            output_size[0], output_size[1], image_format, is_multilabel)

        if aug_name == 'randaug':
            aug_type = common.Augmentation(
                type=aug_name, randaug=common.RandAugment(magnitude=10))
        elif aug_name == 'autoaug':
            aug_type = common.Augmentation(
                type=aug_name,
                autoaug=common.AutoAugment(augmentation_name='test'))
        else:
            aug_type = None

        decoder = classification_input.Decoder(image_field_key=IMAGE_FIELD_KEY,
                                               label_field_key=LABEL_FIELD_KEY,
                                               is_multilabel=is_multilabel)
        parser = classification_input.Parser(output_size=output_size[:2],
                                             num_classes=10,
                                             image_field_key=IMAGE_FIELD_KEY,
                                             label_field_key=LABEL_FIELD_KEY,
                                             is_multilabel=is_multilabel,
                                             decode_jpeg_only=decode_jpeg_only,
                                             aug_rand_hflip=False,
                                             aug_type=aug_type,
                                             dtype=dtype)

        decoded_tensors = decoder.decode(serialized_example)
        image, label = parser.parse_fn(is_training)(decoded_tensors)

        self.assertAllEqual(image.numpy().shape, output_size)

        if not is_multilabel:
            self.assertAllEqual(label, 0)
        else:
            self.assertAllEqual(label.numpy().shape, [10])

        if dtype == 'float32':
            self.assertAllEqual(image.dtype, tf.float32)
        elif dtype == 'float16':
            self.assertAllEqual(image.dtype, tf.float16)
        elif dtype == 'bfloat16':
            self.assertAllEqual(image.dtype, tf.bfloat16)
Esempio n. 2
0
class Parser(hyperparams.Config):
    """Panoptic deeplab parser."""
    ignore_label: int = 0
    # If resize_eval_groundtruth is set to False, original image sizes are used
    # for eval. In that case, groundtruth_padded_size has to be specified too to
    # allow for batching the variable input sizes of images.
    resize_eval_groundtruth: bool = True
    groundtruth_padded_size: List[int] = dataclasses.field(
        default_factory=list)
    aug_scale_min: float = 1.0
    aug_scale_max: float = 1.0
    aug_rand_hflip: bool = True
    aug_type: common.Augmentation = common.Augmentation()
    sigma: float = 8.0
    small_instance_area_threshold: int = 4096
    small_instance_weight: float = 3.0
    dtype = 'float32'
Esempio n. 3
0
    def test_video_input_augmentation_returns_shape(self):
        params = exp_cfg.kinetics600(is_training=True)
        params.feature_shape = (2, 224, 224, 3)
        params.min_image_size = 224

        params.temporal_stride = 2
        params.aug_type = common.Augmentation(type='autoaug',
                                              autoaug=common.AutoAugment())

        decoder = video_input.Decoder()
        parser = video_input.Parser(params).parse_fn(params.is_training)

        seq_example, label = fake_seq_example()

        input_tensor = tf.constant(seq_example.SerializeToString())
        decoded_tensors = decoder.decode(input_tensor)
        output_tensor = parser(decoded_tensors)
        image_features, label = output_tensor
        image = image_features['image']

        self.assertAllEqual(image.shape, (2, 224, 224, 3))
        self.assertAllEqual(label.shape, (600, ))
Esempio n. 4
0
def mobilenet_edgetpu_base_experiment_config(
        model_name: str) -> cfg.ExperimentConfig:
    """Image classification on imagenet with mobilenet_edgetpu.

  Experiment config common across all mobilenet_edgetpu variants.
  Args:
    model_name: Name of the mobilenet_edgetpu model variant
  Returns:
    ExperimentConfig
  """
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    mobilenet_edgetpu_config = MobilenetEdgeTPUModelConfig(
        num_classes=1001, input_size=[224, 224, 3])
    mobilenet_edgetpu_config.model_params.model_name = model_name
    config = cfg.ExperimentConfig(
        task=MobilenetEdgeTPUTaskConfig(
            model=mobilenet_edgetpu_config,
            losses=base_config.Losses(label_smoothing=0.1),
            train_data=base_config.DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                dtype='bfloat16',
                aug_type=common.Augmentation(type='autoaug')),
            validation_data=base_config.DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
                is_training=False,
                dtype='bfloat16',
                drop_remainder=False,
                global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch * 5,
            max_to_keep=10,
            train_steps=550 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'rmsprop',
                    'rmsprop': {
                        'rho': 0.9,
                        'momentum': 0.9,
                        'epsilon': 0.001,
                    }
                },
                'ema': {
                    'average_decay': 0.99,
                    'trainable_weights_only': False,
                },
                'learning_rate': {
                    'type': 'exponential',
                    'exponential': {
                        'initial_learning_rate':
                        0.008 * (train_batch_size // 128),
                        'decay_steps': int(2.4 * steps_per_epoch),
                        'decay_rate': 0.97,
                        'staircase': True
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                },
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
    """Image classification on imagenet with resnet-rs."""
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    config = cfg.ExperimentConfig(
        task=ImageClassificationTask(
            model=ImageClassificationModel(
                num_classes=1001,
                input_size=[160, 160, 3],
                backbone=backbones.Backbone(
                    type='resnet',
                    resnet=backbones.ResNet(model_id=50,
                                            stem_type='v1',
                                            resnetd_shortcut=True,
                                            replace_stem_max_pool=True,
                                            se_ratio=0.25,
                                            stochastic_depth_drop_rate=0.0)),
                dropout_rate=0.25,
                norm_activation=common.NormActivation(norm_momentum=0.0,
                                                      norm_epsilon=1e-5,
                                                      use_sync_bn=False,
                                                      activation='swish')),
            losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1),
            train_data=DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                aug_type=common.Augmentation(
                    type='randaug', randaug=common.RandAugment(magnitude=10))),
            validation_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'valid*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=350 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'ema': {
                    'average_decay': 0.9999,
                    'trainable_weights_only': False,
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        'initial_learning_rate': 1.6,
                        'decay_steps': 350 * steps_per_epoch
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Esempio n. 6
0
def image_classification_imagenet_deit_pretrain() -> cfg.ExperimentConfig:
  """Image classification on imagenet with vision transformer."""
  train_batch_size = 4096  # originally was 1024 but 4096 better for tpu v3-32
  eval_batch_size = 4096  # originally was 1024 but 4096 better for tpu v3-32
  num_classes = 1001
  label_smoothing = 0.1
  steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
  config = cfg.ExperimentConfig(
      task=ImageClassificationTask(
          model=ImageClassificationModel(
              num_classes=num_classes,
              input_size=[224, 224, 3],
              kernel_initializer='zeros',
              backbone=backbones.Backbone(
                  type='vit',
                  vit=backbones.VisionTransformer(
                      model_name='vit-b16',
                      representation_size=768,
                      init_stochastic_depth_rate=0.1,
                      original_init=False,
                      transformer=backbones.Transformer(
                          dropout_rate=0.0, attention_dropout_rate=0.0)))),
          losses=Losses(
              l2_weight_decay=0.0,
              label_smoothing=label_smoothing,
              one_hot=False,
              soft_labels=True),
          train_data=DataConfig(
              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              aug_type=common.Augmentation(
                  type='randaug',
                  randaug=common.RandAugment(
                      magnitude=9, exclude_ops=['Cutout'])),
              mixup_and_cutmix=common.MixupAndCutmix(
                  label_smoothing=label_smoothing)),
          validation_data=DataConfig(
              input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
              is_training=False,
              global_batch_size=eval_batch_size)),
      trainer=cfg.TrainerConfig(
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          train_steps=300 * steps_per_epoch,
          validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
          validation_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'adamw',
                  'adamw': {
                      'weight_decay_rate': 0.05,
                      'include_in_weight_decay': r'.*(kernel|weight):0$',
                      'gradient_clip_norm': 0.0
                  }
              },
              'learning_rate': {
                  'type': 'cosine',
                  'cosine': {
                      'initial_learning_rate': 0.0005 * train_batch_size / 512,
                      'decay_steps': 300 * steps_per_epoch,
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 5 * steps_per_epoch,
                      'warmup_learning_rate': 0
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])

  return config
Esempio n. 7
0
def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Deeplab."""
    train_steps = 200000
    train_batch_size = 64
    eval_batch_size = 1
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    num_panoptic_categories = 201
    num_thing_categories = 91
    ignore_label = 0

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    input_size = [640, 640, 3]
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16',
                                  enable_xla=True),
        task=PanopticDeeplabTask(
            init_checkpoint=
            'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticDeeplab(
                num_classes=num_panoptic_categories,
                input_size=input_size,
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=50,
                        stem_type=stem_type,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        se_ratio=0.25,
                        last_stage_repeats=1,
                        stochastic_depth_drop_rate=0.2)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       num_filters=256,
                                       pool_kernel_size=input_size[:2],
                                       dilation_rates=aspp_dilation_rates,
                                       use_depthwise_convolution=True,
                                       dropout_rate=0.1)),
                semantic_head=SemanticHead(level=level,
                                           num_convs=1,
                                           num_filters=256,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[64, 32],
                                           fusion_num_output_filters=256,
                                           prediction_kernel_size=1),
                instance_head=InstanceHead(level=level,
                                           num_convs=1,
                                           num_filters=32,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[32, 16],
                                           fusion_num_output_filters=128,
                                           prediction_kernel_size=1),
                shared_decoder=False,
                generate_panoptic_masks=True,
                post_processor=PanopticDeeplabPostProcessor(
                    output_size=input_size[:2],
                    center_score_threshold=0.1,
                    thing_class_ids=list(range(1, num_thing_categories)),
                    label_divisor=256,
                    stuff_area_limit=4096,
                    ignore_label=ignore_label,
                    nms_kernel=41,
                    keep_k_centers=200,
                    rescale_predictions=True)),
            losses=Losses(label_smoothing=0.0,
                          ignore_label=ignore_label,
                          l2_weight_decay=0.0,
                          top_k_percent_pixels=0.2,
                          segmentation_loss_weight=1.0,
                          center_heatmap_loss_weight=200,
                          center_offset_loss_weight=0.01),
            train_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=Parser(
                    aug_scale_min=0.5,
                    aug_scale_max=1.5,
                    aug_rand_hflip=True,
                    aug_type=common.Augmentation(
                        type='autoaug',
                        autoaug=common.AutoAugment(
                            augmentation_name='panoptic_deeplab_policy')),
                    sigma=8.0,
                    small_instance_area_threshold=4096,
                    small_instance_weight=3.0)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(resize_eval_groundtruth=False,
                              groundtruth_padded_size=[640, 640],
                              aug_scale_min=1.0,
                              aug_scale_max=1.0,
                              aug_rand_hflip=False,
                              aug_type=None,
                              sigma=8.0,
                              small_instance_area_threshold=4096,
                              small_instance_weight=3.0),
                drop_remainder=False),
            evaluation=Evaluation(ignored_label=ignore_label,
                                  max_instances_per_category=256,
                                  offset=256 * 256 * 256,
                                  is_thing=is_thing,
                                  rescale_predictions=True,
                                  report_per_class_pq=False,
                                  report_per_class_iou=False,
                                  report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            train_steps=train_steps,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0005,
                        'decay_steps': train_steps,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config