Ejemplo n.º 1
0
def imagenet_augment_transform(
    size: int = 224,
    scale: Optional[float] = None,
    ratio: Optional[float] = None,
    interpolation: str = "random",
    hflip: Union[float, bool] = 0.5,
    vflip: Union[float, bool] = False,
    color_jitter: Union[Sequence, float] = 0.4,
    auto_augment: Optional[str] = None,
    mean: Optional[Sequence[float]] = IMAGENET_DEFAULT_MEAN,
) -> T.Compose:
    """
    The default image transform with data augmentation.It is often useful for training models on Imagenet.

    Adapted from: https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/transforms_factory.py
    """

    scale = tuple(scale or (0.08, 1.0))  # default imagenet scale range
    ratio = tuple(ratio
                  or (3.0 / 4.0, 4.0 / 3.0))  # default imagenet ratio range

    transforms = [
        RandomResizedCropAndInterpolation(size, scale, ratio, interpolation),
    ]

    if hflip and hflip > 0:
        transforms.append(T.RandomHorizontalFlip(p=hflip))
    if vflip and vflip > 0.0:
        transforms.append(T.RandomVerticalFlip(p=vflip))

    if auto_augment:
        assert isinstance(auto_augment, str)
        if isinstance(size, (tuple, list)):
            size_min = min(size)
        else:
            size_min = size

        aa_params = dict(
            translate_const=int(size_min * 0.45),
            img_mean=tuple([min(255, round(255 * x)) for x in mean]),
        )

        if interpolation and interpolation != "random":
            aa_params["interpolation"] = _pil_interp(interpolation)
        if auto_augment.startswith("rand"):
            transforms += [rand_augment_transform(auto_augment, aa_params)]
        else:
            transforms += [auto_augment_transform(auto_augment, aa_params)]

    elif color_jitter is not None:
        # color jitter is enabled when not using AA
        if isinstance(color_jitter, (list, tuple)):
            # color jitter should be a 3-tuple/list if spec brightness/contrast/saturation
            # or 4 if also augmenting hue
            assert len(color_jitter) in (3, 4)
        else:
            # if it's a scalar, duplicate for brightness, contrast, and saturation, no hue
            color_jitter = (float(color_jitter), ) * 3
        transforms += [T.ColorJitter(*color_jitter)]
    return T.Compose(transforms)
def instantiate_transforms(cfg: DictConfig, global_config: DictConfig = None):
    "loades in individual transformations"
    if cfg._target_ == "aa":
        img_size_min = global_config.input.input_size
        aa_params = dict(
            translate_const=int(img_size_min * 0.45),
            img_mean=tuple(
                [min(255, round(255 * x)) for x in global_config.input.mean]),
        )

        if (global_config.input.interpolation
                and global_config.input.interpolation != "random"):
            aa_params["interpolation"] = _pil_interp(
                global_config.input.interpolation)

        # Load autoaugment transformations
        if cfg.policy.startswith("rand"):
            return rand_augment_transform(cfg.policy, aa_params)
        elif cfg.policy.startswith("augmix"):
            aa_params["translate_pct"] = 0.3
            return augment_and_mix_transform(cfg.policy, aa_params)
        else:
            return auto_augment_transform(cfg.policy, aa_params)

    else:
        return instantiate(cfg)
Ejemplo n.º 3
0
def transforms_imagenet_train(
    img_size=224,
    scale=(0.08, 1.0),
    color_jitter=0.4,
    auto_augment=None,
    interpolation='random',
    use_prefetcher=False,
    mean=IMAGENET_DEFAULT_MEAN,
    std=IMAGENET_DEFAULT_STD,
    re_prob=0.,
    re_mode='const',
    re_count=1,
    re_num_splits=0,
    separate=False,
    squish=False,
    do_8_rotations=False,
):
    """
    If separate==True, the transforms are returned as a tuple of 3 separate transforms
    for use in a mixing dataset that passes
     * all data through the first (primary) transform, called the 'clean' data
     * a portion of the data through the secondary transform
     * normalizes and converts the branches above with the third, final transform
    """
    if squish:
        if not isinstance(img_size, tuple):
            img_size = (img_size, img_size)
        resize = transforms.Resize(img_size, _pil_interp('bilinear'))
    else:
        resize = RandomResizedCropAndInterpolation(img_size,
                                                   scale=scale,
                                                   interpolation=interpolation)

    if do_8_rotations:
        primary_tfl = [resize, RandomRotation()]
    else:
        primary_tfl = [resize, transforms.RandomHorizontalFlip()]

    secondary_tfl = []
    if auto_augment:
        assert isinstance(auto_augment, str)
        if isinstance(img_size, tuple):
            img_size_min = min(img_size)
        else:
            img_size_min = img_size
        aa_params = dict(
            translate_const=int(img_size_min * 0.45),
            img_mean=tuple([min(255, round(255 * x)) for x in mean]),
        )
        if interpolation and interpolation != 'random':
            aa_params['interpolation'] = _pil_interp(interpolation)
        if auto_augment.startswith('rand'):
            secondary_tfl += [rand_augment_transform(auto_augment, aa_params)]
        elif auto_augment.startswith('augmix'):
            aa_params['translate_pct'] = 0.3
            secondary_tfl += [
                augment_and_mix_transform(auto_augment, aa_params)
            ]
        else:
            secondary_tfl += [auto_augment_transform(auto_augment, aa_params)]
    elif color_jitter is not None:
        # color jitter is enabled when not using AA
        if isinstance(color_jitter, (list, tuple)):
            # color jitter should be a 3-tuple/list if spec brightness/contrast/saturation
            # or 4 if also augmenting hue
            assert len(color_jitter) in (3, 4)
        else:
            # if it's a scalar, duplicate for brightness, contrast, and saturation, no hue
            color_jitter = (float(color_jitter), ) * 3
        secondary_tfl += [transforms.ColorJitter(*color_jitter)]

    final_tfl = []
    if use_prefetcher:
        # prefetcher and collate will handle tensor conversion and norm
        final_tfl += [ToNumpy()]
    else:
        final_tfl += [
            transforms.ToTensor(),
            transforms.Normalize(mean=torch.tensor(mean),
                                 std=torch.tensor(std))
        ]
        if re_prob > 0.:
            final_tfl.append(
                RandomErasing(re_prob,
                              mode=re_mode,
                              max_count=re_count,
                              num_splits=re_num_splits,
                              device='cpu'))

    if separate:
        return transforms.Compose(primary_tfl), transforms.Compose(
            secondary_tfl), transforms.Compose(final_tfl)
    else:
        return transforms.Compose(primary_tfl + secondary_tfl + final_tfl)