Beispiel #1
0
class ImagenetFt(ImageNet):
    """Finetune imagenet configs."""
    # Finetune should have less regularization due to the limited training steps.
    cfg = hparams.Config(
        model=dict(
            dropout_rate=0.000001,
            survival_prob=0.8,
        ),
        train=dict(
            batch_size=512,
            stages=0,
            epochs=15,
            optimizer='rmsprop',
            lr_sched='constant',
            lr_base=0.0005,
            lr_warmup_epoch=1,
            ema_decay=0.9996,
            weight_decay=1e-5,
            label_smoothing=0.1,
            min_steps=10000,
            isize=1.0,
        ),
        data=dict(
            ds_name='imagenettfds',
            augname='ft',
            mixup_alpha=0,
            cutmix_alpha=0,
        ),
    )
Beispiel #2
0
class ImageNet21k():
    """ImageNet21k train/eval configs."""
    cfg = hparams.Config(
        model=dict(
            dropout_rate=0.000001,
            survival_prob=1.0,
        ),
        data=dict(
            ds_name='imagenet21k',
            multiclass=True,
            augname=None,  # Disable all augmentation and mixup.
            mixup_alpha=0,
            cutmix_alpha=0,
        ),
        train=dict(
            epochs=60,
            lr_base=0.008,
            lr_warmup_epoch=1,
            lr_sched='cosine',
            label_smoothing=0.0,
            isize=224,
            stages=0,  # do not apply staged training.
            sched=False,
        ),
        eval=dict(
            batch_size=128,
            isize=224,
        ),
    )
Beispiel #3
0
def distort_image_with_autoaugment(image, augmentation_name):
    """Applies the AutoAugment policy to `image`.

  AutoAugment is from the paper: https://arxiv.org/abs/1805.09501.

  Args:
    image: `Tensor` of shape [height, width, 3] representing an image.
    augmentation_name: The name of the AutoAugment policy to use. The available
      options are `v0` and `test`. `v0` is the policy used for
      all of the results in the paper and was found to achieve the best results
      on the COCO dataset. `v1`, `v2` and `v3` are additional good policies
      found on the COCO dataset that have slight variation in what operations
      were used during the search procedure along with how many operations are
      applied in parallel to a single image (2 vs 3).

  Returns:
    A tuple containing the augmented versions of `image`.
  """
    available_policies = {'v0': policy_v0, 'test': policy_vtest}
    if augmentation_name not in available_policies:
        raise ValueError(
            'Invalid augmentation_name: {}'.format(augmentation_name))

    policy = available_policies[augmentation_name]()
    # Params that will be used for AutoAugment.
    augmentation_params = hparams.Config(cutout_const=100, translate_const=250)

    return build_and_apply_nas_policy(policy, image, augmentation_params)
Beispiel #4
0
class ImageNet():
    """ImageNet train/eval configs."""
    cfg = hparams.Config(
        data=dict(
            ds_name='imagenet',
            multiclass=False,
        ),
        train=dict(
            epochs=350,
            lr_base=0.016,
            lr_warmup_epoch=5,
            lr_sched='exponential',
            label_smoothing=0.1,
        ),
        eval=dict(batch_size=8, ),
    )
Beispiel #5
0
def distort_image_with_randaugment(image, num_layers, magnitude):
    """Applies the RandAugment policy to `image`.

  RandAugment is from the paper https://arxiv.org/abs/1909.13719,

  Args:
    image: `Tensor` of shape [height, width, 3] representing an image.
    num_layers: Integer, the number of augmentation transformations to apply
      sequentially to an image. Represented as (N) in the paper. Usually best
      values will be in the range [1, 3].
    magnitude: Integer, shared magnitude across all augmentation operations.
      Represented as (M) in the paper. Usually best values are in the range
      [5, 30].

  Returns:
    The augmented version of `image`.
  """
    replace_value = [128] * 3
    logging.info('Using RandAug.')
    augmentation_params = hparams.Config(cutout_const=40, translate_const=100)
    available_ops = [
        'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize',
        'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX',
        'ShearY', 'TranslateX', 'TranslateY', 'Cutout', 'SolarizeAdd'
    ]

    for layer_num in range(num_layers):
        op_to_select = tf.random_uniform([],
                                         maxval=len(available_ops),
                                         dtype=tf.int32)
        random_magnitude = float(magnitude)
        with tf.name_scope('randaug_layer_{}'.format(layer_num)):
            for (i, op_name) in enumerate(available_ops):
                prob = tf.random_uniform([],
                                         minval=0.2,
                                         maxval=0.8,
                                         dtype=tf.float32)
                func, _, args = _parse_policy_info(op_name, prob,
                                                   random_magnitude,
                                                   replace_value,
                                                   augmentation_params)
                image = tf.cond(tf.equal(i, op_to_select),
                                lambda selected_func=func, selected_args=args:
                                selected_func(image, *selected_args),
                                lambda: image)
    return image
def efficientnetv2_config(model_name='efficientnetv2-s'):
    """EfficientNetV2 model config."""
    block, width, depth, train_size, eval_size, dropout, randaug, mix, aug = (
        efficientnetv2_params[model_name])

    cfg = hparams.Config(
        model=dict(
            model_name=model_name,
            blocks_args=BlockDecoder().decode(block),
            width_coefficient=width,
            depth_coefficient=depth,
            dropout_rate=dropout,
        ),
        train=dict(isize=train_size, stages=4, sched=True),
        eval=dict(isize=eval_size),
        data=dict(augname=aug, ram=randaug, mixup_alpha=mix, cutmix_alpha=mix),
    )
    return cfg
def efficientnetv1_config(model_name='efficientnet-b0'):
    """EfficientNetV1 model config."""
    width_coefficient, depth_coefficient, isize, dropout_rate = (
        efficientnetv1_params[model_name])

    cfg = hparams.Config(
        model=dict(
            model_name=model_name,
            blocks_args=BlockDecoder().decode(v1_b0_block_str),
            width_coefficient=width_coefficient,
            depth_coefficient=depth_coefficient,
            dropout_rate=dropout_rate,
        ),
        eval=dict(isize=isize),
        train=dict(isize=0.8),  # 80% of eval size
        data=dict(augname='effnetv1_autoaug'),
    )
    return cfg
    def _decode_block_string(self, block_string):
        """Gets a block through a string notation of arguments."""
        assert isinstance(block_string, str)
        ops = block_string.split('_')
        options = {}
        for op in ops:
            splits = re.split(r'(\d.*)', op)
            if len(splits) >= 2:
                key, value = splits[:2]
                options[key] = value

        return hparams.Config(
            kernel_size=int(options['k']),
            num_repeat=int(options['r']),
            input_filters=int(options['i']),
            output_filters=int(options['o']),
            expand_ratio=int(options['e']),
            se_ratio=float(options['se']) if 'se' in options else None,
            strides=int(options['s']),
            conv_type=int(options['c']) if 'c' in options else 0,
        )
Beispiel #9
0
class ImageNetInput():
    """Generates ImageNet input_fn from a series of TFRecord files.

  The format of the data required is created by the script at:
      https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py
  """
    cfg = hparams.Config(
        data_dir=None,
        num_classes=1000,
        multiclass=False,
        tfds_split=None,
        splits=dict(
            train=dict(num_images=1_256_144,
                       files='train*',
                       slice=slice(20, None)),
            minival=dict(num_images=25_021, files='train*', slice=slice(0,
                                                                        20)),
            eval=dict(num_images=50_000, files='val*', slice=slice(0, None)),
            trainval=dict(num_images=1_281_167,
                          files='train*',
                          slice=slice(0, None)),
        ),
    )