Beispiel #1
0
def generate_kornia_transforms(image_size=224, resize=256, mean=[], std=[], include_jitter=False):
    mean=torch.tensor(mean) if mean else torch.tensor([0.5, 0.5, 0.5])
    std=torch.tensor(std) if std else torch.tensor([0.1, 0.1, 0.1])
    if torch.cuda.is_available():
        mean=mean.cuda()
        std=std.cuda()
    train_transforms=[G.Resize((resize,resize))]
    if include_jitter:
        train_transforms.append(K.ColorJitter(brightness=0.4, contrast=0.4,
                                   saturation=0.4, hue=0.1))
    train_transforms.extend([K.RandomHorizontalFlip(p=0.5),
           K.RandomVerticalFlip(p=0.5),
           K.RandomRotation(90),
           K.RandomResizedCrop((image_size,image_size)),
           K.Normalize(mean,std)
           ])
    val_transforms=[G.Resize((resize,resize)),
           K.CenterCrop((image_size,image_size)),
           K.Normalize(mean,std)
           ]
    transforms=dict(train=nn.Sequential(*train_transforms),
                val=nn.Sequential(*val_transforms))
    if torch.cuda.is_available():
        for k in transforms:
            transforms[k]=transforms[k].cuda()
    return transforms
def stitch(x, M_matrices, M_rotations, M_flip, label=True):
    #Preprocessing: image stitch
    data = []  #list to store all the features maps from multi-views
    for i in range(6):
        #get a batch of *same* view images
        img_batch = x[:, i, :, :, :]  # torch.stack(x)[:,i,:,:,:] #
        img_warp = kornia.warp_perspective(img_batch,
                                           M_matrices[i].unsqueeze(0).repeat(
                                               len(x), 1, 1),
                                           dsize=(219, 306))
        img_rotated = kornia.warp_affine(img_warp,
                                         M_rotations[i].unsqueeze(0).repeat(
                                             len(x), 1, 1),
                                         dsize=(219, 306))
        data.append(img_rotated)

    data = torch.cat(data, dim=0).view(6, len(x), 3, 219, 306)
    #max pool feature maps from multi-view:black canvas and ensemble
    h, w = 219, 306
    #print(h,w)
    agg = torch.zeros((x.shape[0], 3, 2 * h,
                       2 * w))  #[batch_size, 3 ,h, w], twice width/height
    if torch.cuda.is_available():
        agg = agg.cuda()
    #two bases: front and back view
    agg[:, :, 0:h, (w - w // 2):(w + w // 2)] = data[1]
    agg[:, :, h:, (w - w // 2):(w + w // 2)] = data[4]
    #top left
    agg[:, :, (0 + 55):(h + 55), (0 + 55):(w + 55)] = torch.max(
        data[0], agg[:, :, (0 + 55):(h + 55), (0 + 55):(w + 55)])
    #top right
    agg[:, :, (0 + 55):(h + 55), (w - 55):(-55)] = torch.max(
        data[2], agg[:, :, (0 + 55):(h + 55), (w - 55):(-55)])
    #bottom left
    agg[:, :, (h - 55):(-55), (0 + 55):(w + 55)] = torch.max(
        data[3], agg[:, :, (h - 55):(-55), (0 + 55):(w + 55)])
    #bottom right
    agg[:, :, (h - 55):(-55),
        (w - 55):(-55)] = torch.max(data[5], agg[:, :, (h - 55):(-55),
                                                 (w - 55):(-55)])

    #center-crop
    crop_fn = kornia.augmentation.CenterCrop(size=438)
    agg = crop_fn(agg)

    #flip 90 degree
    agg = kornia.warp_affine(agg,
                             M_flip.repeat(len(x), 1, 1),
                             dsize=(438, 438))

    #Normalize color
    if label:
        normalize = K.Normalize(torch.tensor([0.698, 0.718, 0.730]),
                                torch.tensor([0.322, 0.313, 0.308]))
    else:
        normalize = K.Normalize(torch.tensor([0.548, 0.597, 0.630]),
                                torch.tensor([0.339, 0.340, 0.342]))

    return normalize(agg)
Beispiel #3
0
 def __init__(self, img_size: int):
     super().__init__()
     self.preprocess = nn.Sequential(
         # K.augmentation.RandomResizedCrop((224, 224)),
         Resize((img_size, img_size)),  # use this better to see whole image
         augmentation.Normalize(Tensor(DATASET_IMAGE_MEAN), Tensor(DATASET_IMAGE_STD)),
     )
Beispiel #4
0
    def __init__(self,
                 net,
                 layer_name_list=['avgpool'],
                 image_size=32,
                 projection_size=256,
                 projection_hidden_size=4096,
                 augment_fn=None,
                 moving_average_decay=0.99,
                 device_='cuda',
                 number_of_classes=10,
                 mean_data=torch.tensor([0.485, 0.456, 0.406]),
                 std_data=torch.tensor([0.229, 0.224, 0.225])):
        super().__init__()

        DEFAULT_AUG = nn.Sequential(
            augs.RandomHorizontalFlip(),
            augs.RandomResizedCrop((image_size, image_size)),
            augs.Normalize(mean=mean_data, std=std_data))

        self.augment = default(augment_fn, DEFAULT_AUG)
        self.device = device_

        self.online_encoder = NetWrapper(net,
                                         projection_size,
                                         projection_hidden_size,
                                         layer_name_list=layer_name_list).to(
                                             self.device)
        self.target_encoder = None
        self.target_ema_updater = EMA(moving_average_decay)

        self.online_predictor = MLP(projection_size, projection_size,
                                    projection_hidden_size).to(self.device)

        # send a mock image tensor to instantiate singleton parameters
        self.forward(torch.randn(2, 3, image_size, image_size).to(self.device))
 def default_train_transforms():
     image_size = ImageClassificationData.image_size
     if _KORNIA_AVAILABLE and not os.getenv("FLASH_TESTING", "0") == "1":
         #  Better approach as all transforms are applied on tensor directly
         return {
             "to_tensor_transform":
             torchvision.transforms.ToTensor(),
             "post_tensor_transform":
             nn.Sequential(K.RandomResizedCrop(image_size),
                           K.RandomHorizontalFlip()),
             "per_batch_transform_on_device":
             nn.Sequential(
                 K.Normalize(torch.tensor([0.485, 0.456, 0.406]),
                             torch.tensor([0.229, 0.224, 0.225])), )
         }
     else:
         from torchvision import transforms as T  # noqa F811
         return {
             "pre_tensor_transform":
             nn.Sequential(T.RandomResizedCrop(image_size),
                           T.RandomHorizontalFlip()),
             "to_tensor_transform":
             torchvision.transforms.ToTensor(),
             "post_tensor_transform":
             T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
         }
    def __init__(self, opt):
        super().__init__()
        self.wrapped_dataset = create_dataset(opt['dataset'])
        self.cropped_img_size = opt['crop_size']
        self.key1 = opt_get(opt, ['key1'], 'hq')
        self.key2 = opt_get(opt, ['key2'], 'lq')
        for_sr = opt_get(
            opt, ['for_sr'],
            False)  # When set, color alterations and blurs are disabled.

        augmentations = [ \
            augs.RandomHorizontalFlip(),
            augs.RandomResizedCrop((self.cropped_img_size, self.cropped_img_size))]
        if not for_sr:
            augmentations.extend([
                RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8),
                augs.RandomGrayscale(p=0.2),
                RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1)
            ])
        if opt['normalize']:
            # The paper calls for normalization. Most datasets/models in this repo don't use this.
            # Recommend setting true if you want to train exactly like the paper.
            augmentations.append(
                augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]),
                               std=torch.tensor([0.229, 0.224, 0.225])))
        self.aug = nn.Sequential(*augmentations)
    def default_transforms(self) -> Dict[str, Callable]:
        if self.training:
            post_tensor_transform = [
                RandomShortSideScale(min_size=256, max_size=320),
                RandomCrop(244),
                RandomHorizontalFlip(p=0.5),
            ]
        else:
            post_tensor_transform = [
                ShortSideScale(256),
            ]

        return {
            "post_tensor_transform": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=Compose([UniformTemporalSubsample(8)] + post_tensor_transform),
                ),
            ]),
            "per_batch_transform_on_device": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=K.VideoSequential(
                        K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])),
                        data_format="BCTHW",
                        same_on_frame=False
                    )
                ),
            ]),
        }
Beispiel #8
0
 def __init__(self, mean, std, scale=(0.9, 1.1), max_degrees=0) -> None:
     super(Transform, self).__init__()
     self.max_degrees = max_degrees
     self.aff = k.RandomAffine(max_degrees,
                               resample=k.Resample.NEAREST,
                               scale=scale)
     self.norm = k.Normalize(mean, std)
 def per_batch_transform_on_device(self) -> Callable:
     return ApplyToKeys(
         "video",
         K.VideoSequential(
             K.Normalize(self.mean, self.std),
             data_format=self.data_format,
             same_on_frame=self.same_on_frame,
         ),
     )
Beispiel #10
0
 def __init__(self, im_size=224, device=torch.device('cuda:0')):
     super().__init__()
     self.mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
     self.std = torch.tensor([0.229, 0.224, 0.225]).to(device)
     self.aug = torch.nn.Sequential(
         kornia.geometry.transform.Resize(int(im_size * 1.2)),
         Kaug.RandomCrop((im_size, im_size), padding=8),
         Kaug.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
         Kaug.RandomHorizontalFlip(),
         Kaug.Normalize(mean=self.mean, std=self.std))
Beispiel #11
0
def default_aug(image_size: Tuple[int, int] = (360, 360)) -> nn.Module:
    return nn.Sequential(
        aug.ColorJitter(contrast=0.1, brightness=0.1, saturation=0.1, p=0.8),
        aug.RandomVerticalFlip(),
        aug.RandomHorizontalFlip(),
        RandomApply(filters.GaussianBlur2d((3, 3), (0.5, 0.5)), p=0.1),
        aug.RandomResizedCrop(size=image_size, scale=(0.5, 1)),
        aug.Normalize(
            mean=torch.tensor([0.485, 0.456, 0.406]),
            std=torch.tensor([0.229, 0.224, 0.225]),
        ),
    )
    def __init__(
        self,
        net,
        image_size,
        hidden_layer = -2,
        projection_size = 256,
        projection_hidden_size = 2048,
        augment_fn = None,
        augment_fn2 = None,
        moving_average_decay = 0.99,
        ppm_num_layers = 1,
        ppm_gamma = 2,
        distance_thres = 0.1, # the paper uses 0.7, but that leads to nearly all positive hits. need clarification on how the coordinates are normalized before distance calculation.
        similarity_temperature = 0.3,
        alpha = 1.
    ):
        super().__init__()

        # default SimCLR augmentation

        DEFAULT_AUG = nn.Sequential(
            RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8),
            augs.RandomGrayscale(p=0.2),
            augs.RandomHorizontalFlip(),
            RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1),
            augs.RandomResizedCrop((image_size, image_size)),
            augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))
        )

        self.augment1 = default(augment_fn, DEFAULT_AUG)
        self.augment2 = default(augment_fn2, self.augment1)

        self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer)

        self.target_encoder = None
        self.target_ema_updater = EMA(moving_average_decay)

        self.distance_thres = distance_thres
        self.similarity_temperature = similarity_temperature
        self.alpha = alpha

        self.propagate_pixels = PPM(
            chan = projection_size,
            num_layers = ppm_num_layers,
            gamma = ppm_gamma
        )

        # get device of network and make wrapper same device
        device = get_module_device(net)
        self.to(device)

        # send a mock image tensor to instantiate singleton parameters
        self.forward(torch.randn(2, 3, image_size, image_size, device=device))
Beispiel #13
0
def default_augmentation(image_size: Tuple[int, int] = (224, 224)) -> nn.Module:
    return nn.Sequential(
        tf.Resize(size=image_size),
        RandomApply(aug.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8),
        aug.RandomGrayscale(p=0.2),
        aug.RandomHorizontalFlip(),
        RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1),
        aug.RandomResizedCrop(size=image_size),
        aug.Normalize(
            mean=torch.tensor([0.485, 0.456, 0.406]),
            std=torch.tensor([0.229, 0.224, 0.225]),
        ),
    )
    def __init__(self,
                 net,
                 image_size=32,
                 layer_name_list=[-2],
                 projection_size=256,
                 projection_hidden_size=4096,
                 augment_fn=None,
                 moving_average_decay=0.99,
                 device_='cuda',
                 number_of_classes=10,
                 mean_data=torch.tensor([0.485, 0.456, 0.406]),
                 std_data=torch.tensor([0.229, 0.224, 0.225])):
        super().__init__()

        # default SimCLR augmentation

        DEFAULT_AUG = nn.Sequential(
            RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8),
            augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(),
            RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1),
            augs.RandomResizedCrop((image_size, image_size)),
            augs.Normalize(mean=mean_data, std=std_data))

        self.augment = default(augment_fn, DEFAULT_AUG)
        self.device = device_

        self.online_encoder = NetWrapper(net,
                                         projection_size,
                                         projection_hidden_size,
                                         layer_name_list=layer_name_list).to(
                                             self.device)
        self.target_encoder = None
        self.target_ema_updater = EMA(moving_average_decay)

        self.online_predictor = MLP(projection_size, projection_size,
                                    projection_hidden_size).to(self.device)
        self.online_predictor1 = MLP(projection_size, projection_size,
                                     512).to(self.device)
        self.online_predictor2 = MLP(projection_size, projection_size,
                                     512).to(self.device)

        # send a mock image tensor to instantiate singleton parameters
        self.forward(torch.randn(2, 3, image_size, image_size).to(self.device))
Beispiel #15
0
    def __init__(self,
                 N_TFMS: int,
                 MAGN: int,
                 mean: Union[tuple, list, torch.tensor],
                 std: Union[tuple, list, torch.tensor],
                 transform_list: list = None,
                 use_resize: int = None,
                 image_size: tuple = None,
                 use_mix: int = None,
                 mix_p: float = .5):
        super().__init__()

        self.N_TFMS, self.MAGN = N_TFMS, MAGN
        self.use_mix, self.mix_p = use_mix, mix_p
        self.image_size = image_size

        if not isinstance(mean, torch.Tensor): mean = torch.Tensor(mean)
        if not isinstance(std, torch.Tensor): std = torch.Tensor(std)

        if self.use_mix is not None:
            self.mix_list = [
                K.RandomCutMix(self.image_size[0], self.image_size[1], p=1),
                K.RandomMixUp(p=1)
            ]

        self.use_resize = use_resize
        if use_resize is not None:
            assert len(
                image_size
            ) == 2, 'Invalid `image_size`. Must be a tuple of form (h, w)'
            self.resize_list = [
                K.RandomResizedCrop(image_size),
                K.RandomCrop(image_size),
                K.CenterCrop(image_size)
            ]
            if self.use_resize < 3:
                self.resize = self.resize_list[use_resize]

        self.normalize = K.Normalize(mean, std)

        self.transform_list = transform_list
        if transform_list is None: self.transform_list = kornia_list(MAGN)
Beispiel #16
0
 def __init__(self,resize,image_size,mean,std,include_jitter=False,Set="train"):
     super().__init__()
     self.resize=G.Resize((resize,resize),align_corners=False)
     self.mask_resize=lambda x: torch.nn.functional.interpolate(x, size=(resize,resize), mode='nearest', align_corners=None)#G.Resize((resize,resize),interpolation='nearest',align_corners=False)#
     self.jit=K.ColorJitter(brightness=0.4, contrast=0.4,
                                saturation=0.4, hue=0.1) if include_jitter else (lambda x: x)
     # self.rotations=nn.ModuleList([
     #        K.augmentation.RandomAffine([-90., 90.], [0., 0.15], [0.5, 1.5], [0., 0.15])
     #        # K.RandomHorizontalFlip(p=0.5),
     #        # K.RandomVerticalFlip(p=0.5),
     #        # K.RandomRotation(90),#K.RandomResizedCrop((image_size,image_size),interpolation="nearest")
     #        ])
     # self.rotations_mask=nn.ModuleList([
     #        K.augmentation.RandomAffine([-90., 90.], [0., 0.15], [0.5, 1.5], [0., 0.15],resample="NEAREST")
     #        ])
     self.affine=K.augmentation.RandomAffine([-90., 90.], [0., 0.15], None, [0., 0.15])
     self.affine_mask=K.augmentation.RandomAffine([-90., 90.], [0., 0.15], None, [0., 0.15],resample="NEAREST",align_corners=False)
     self.normalize=K.Normalize(mean,std)
     self.crop,self.mask_crop=K.CenterCrop((image_size,image_size)),K.CenterCrop((image_size,image_size),resample="NEAREST")
     self.Set=Set
Beispiel #17
0
    def __init__(self,
                 net,
                 image_size,
                 hidden_layer=-2,
                 projection_size=256,
                 projection_hidden_size=4096,
                 augment_fn=None,
                 augment_fn2=None,
                 moving_average_decay=0.99):
        super().__init__()

        # default SimCLR augmentation

        DEFAULT_AUG = nn.Sequential(
            RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8),
            augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(),
            RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1),
            augs.RandomResizedCrop((image_size, image_size)),
            augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]),
                           std=torch.tensor([0.229, 0.224, 0.225])))

        self.augment1 = default(augment_fn, DEFAULT_AUG)
        self.augment2 = default(augment_fn2, self.augment1)

        self.online_encoder = NetWrapper(net,
                                         projection_size,
                                         projection_hidden_size,
                                         layer=hidden_layer)
        self.target_encoder = None
        self.target_ema_updater = EMA(moving_average_decay)

        self.online_predictor = MLP(projection_size, projection_size,
                                    projection_hidden_size)

        # get device of network and make wrapper same device
        device = get_module_device(net)
        self.to(device)

        # send a mock image tensor to instantiate singleton parameters
        self.forward(torch.randn(2, 3, image_size, image_size, device=device))
Beispiel #18
0
    def __init__(self,
                 brightness=(0.75, 1.25),
                 contrast=(0.75, 1.25),
                 saturation=(0., 2.),
                 translate=(0.125, 0.125),
                 normalized=True,
                 mean=0.5,
                 std=0.5,
                 device=None):
        if normalized:
            if isinstance(mean,
                          (tuple, list)) and isinstance(std, (tuple, list)):
                if not device:
                    raise Exception(
                        'Please specify a torch.device() object when using mean and std for each channels'
                    )
                mean = torch.Tensor(mean).to(device)
                std = torch.Tensor(std).to(device)
            self.normalize = aug.Normalize(mean, std)
            self.denormalize = aug.Denormalize(mean, std)
        else:
            self.normalize, self.denormalize = None, None

        color_jitter = aug.ColorJitter(
            brightness=brightness,
            contrast=contrast,
            saturation=saturation,
            p=1.)  # rand_brightness, rand_contrast, rand_saturation
        affine = aug.RandomAffine(degrees=0,
                                  translate=translate,
                                  padding_mode=SamplePadding.BORDER,
                                  p=1.)  # rand_translate
        cutout = aug.RandomErasing(value=0.5, p=1.)  # rand_cutout

        self.augmentations = {
            'color': color_jitter,
            'translation': affine,
            'cutout': cutout
        }
Beispiel #19
0
def get_augmenter(augmenter_type: str,
                  image_size: ImageSizeType,
                  dataset_mean: DatasetStatType,
                  dataset_std: DatasetStatType,
                  padding: PaddingInputType = 1. / 8.,
                  pad_if_needed: bool = False,
                  subset_size: int = 2) -> Union[Module, Callable]:
    """
    
    Args:
        augmenter_type: augmenter type
        image_size: (height, width) image size
        dataset_mean: dataset mean value in CHW
        dataset_std: dataset standard deviation in CHW
        padding: percent of image size to pad on each border of the image. If a sequence of length 4 is provided,
            it is used to pad left, top, right, bottom borders respectively. If a sequence of length 2 is provided, it is
            used to pad left/right, top/bottom borders, respectively.
        pad_if_needed: bool flag for RandomCrop "pad_if_needed" option
        subset_size: number of augmentations used in subset

    Returns: nn.Module for Kornia augmentation or Callable for torchvision transform

    """
    if not isinstance(padding, tuple):
        assert isinstance(padding, float)
        padding = (padding, padding, padding, padding)

    assert len(padding) == 2 or len(padding) == 4
    if len(padding) == 2:
        # padding of length 2 is used to pad left/right, top/bottom borders, respectively
        # padding of length 4 is used to pad left, top, right, bottom borders respectively
        padding = (padding[0], padding[1], padding[0], padding[1])

    # image_size is of shape (h,w); padding values is [left, top, right, bottom] borders
    padding = (int(image_size[1] * padding[0]), int(
        image_size[0] * padding[1]), int(image_size[1] * padding[2]),
               int(image_size[0] * padding[3]))

    augmenter_type = augmenter_type.strip().lower()

    if augmenter_type == "simple":
        return nn.Sequential(
            K.RandomCrop(size=image_size,
                         padding=padding,
                         pad_if_needed=pad_if_needed,
                         padding_mode='reflect'),
            K.RandomHorizontalFlip(p=0.5),
            K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32),
                        std=torch.tensor(dataset_std, dtype=torch.float32)),
        )

    elif augmenter_type == "fixed":
        return nn.Sequential(
            K.RandomHorizontalFlip(p=0.5),
            # K.RandomVerticalFlip(p=0.2),
            K.RandomResizedCrop(size=image_size,
                                scale=(0.8, 1.0),
                                ratio=(1., 1.)),
            RandomAugmentation(p=0.5,
                               augmentation=F.GaussianBlur2d(
                                   kernel_size=(3, 3),
                                   sigma=(1.5, 1.5),
                                   border_type='constant')),
            K.ColorJitter(contrast=(0.75, 1.5)),
            # additive Gaussian noise
            K.RandomErasing(p=0.1),
            # Multiply
            K.RandomAffine(degrees=(-25., 25.),
                           translate=(0.2, 0.2),
                           scale=(0.8, 1.2),
                           shear=(-8., 8.)),
            K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32),
                        std=torch.tensor(dataset_std, dtype=torch.float32)),
        )

    elif augmenter_type in ["validation", "test"]:
        return nn.Sequential(
            K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32),
                        std=torch.tensor(dataset_std, dtype=torch.float32)), )

    elif augmenter_type == "randaugment":
        return nn.Sequential(
            K.RandomCrop(size=image_size,
                         padding=padding,
                         pad_if_needed=pad_if_needed,
                         padding_mode='reflect'),
            K.RandomHorizontalFlip(p=0.5),
            RandAugmentNS(n=subset_size, m=10),
            K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32),
                        std=torch.tensor(dataset_std, dtype=torch.float32)),
        )

    else:
        raise NotImplementedError(
            f"\"{augmenter_type}\" is not a supported augmenter type")
def test_video_classifier_finetune_fiftyone(tmpdir):

    with mock_encoded_video_dataset_folder(tmpdir) as (
        dir_name,
        total_duration,
    ):

        half_duration = total_duration / 2 - 1e-9

        train_dataset = fo.Dataset.from_dir(
            dir_name,
            dataset_type=fo.types.VideoClassificationDirectoryTree,
        )
        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        assert len(VideoClassifier.available_backbones()) > 5

        train_transform = {
            "post_tensor_transform": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=Compose([
                        UniformTemporalSubsample(8),
                        RandomShortSideScale(min_size=256, max_size=320),
                        RandomCrop(244),
                        RandomHorizontalFlip(p=0.5),
                    ]),
                ),
            ]),
            "per_batch_transform_on_device": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=K.VideoSequential(
                        K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])),
                        K.augmentation.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
                        data_format="BCTHW",
                        same_on_frame=False
                    )
                ),
            ]),
        }

        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            train_transform=train_transform
        )

        model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False)

        trainer = flash.Trainer(fast_dev_run=True)

        trainer.finetune(model, datamodule=datamodule)
Beispiel #21
0
class TestVideoSequential:
    @pytest.mark.parametrize('shape', [(3, 4), (2, 3, 4), (2, 3, 5, 6),
                                       (2, 3, 4, 5, 6, 7)])
    @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"])
    def test_exception(self, shape, data_format, device, dtype):
        aug_list = K.VideoSequential(K.ColorJitter(0.1, 0.1, 0.1, 0.1),
                                     data_format=data_format,
                                     same_on_frame=True)
        with pytest.raises(AssertionError):
            img = torch.randn(*shape, device=device, dtype=dtype)
            aug_list(img)

    @pytest.mark.parametrize(
        'augmentation',
        [
            K.RandomAffine(360, p=1.0),
            K.CenterCrop((3, 3), p=1.0),
            K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
            K.RandomCrop((5, 5), p=1.0),
            K.RandomErasing(p=1.0),
            K.RandomGrayscale(p=1.0),
            K.RandomHorizontalFlip(p=1.0),
            K.RandomVerticalFlip(p=1.0),
            K.RandomPerspective(p=1.0),
            K.RandomResizedCrop((5, 5), p=1.0),
            K.RandomRotation(360.0, p=1.0),
            K.RandomSolarize(p=1.0),
            K.RandomPosterize(p=1.0),
            K.RandomSharpness(p=1.0),
            K.RandomEqualize(p=1.0),
            K.RandomMotionBlur(3, 35.0, 0.5, p=1.0),
            K.Normalize(torch.tensor([0.5, 0.5, 0.5]),
                        torch.tensor([0.5, 0.5, 0.5]),
                        p=1.0),
            K.Denormalize(torch.tensor([0.5, 0.5, 0.5]),
                          torch.tensor([0.5, 0.5, 0.5]),
                          p=1.0),
        ],
    )
    @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"])
    def test_augmentation(self, augmentation, data_format, device, dtype):
        input = torch.randint(255, (1, 3, 3, 5, 6), device=device,
                              dtype=dtype).repeat(2, 1, 1, 1, 1) / 255.0
        torch.manual_seed(21)
        aug_list = K.VideoSequential(augmentation,
                                     data_format=data_format,
                                     same_on_frame=True)
        reproducibility_test(input, aug_list)

    @pytest.mark.parametrize(
        'augmentations',
        [
            [
                K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
                K.RandomAffine(360, p=1.0)
            ],
            [
                K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
                K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0)
            ],
            [K.RandomAffine(360, p=1.0),
             kornia.color.BgrToRgb()],
            [
                K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=0.0),
                K.RandomAffine(360, p=0.0)
            ],
            [K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=0.0)],
            [K.RandomAffine(360, p=0.0)],
            [
                K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
                K.RandomAffine(360, p=1.0),
                K.RandomMixUp(p=1.0)
            ],
        ],
    )
    @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"])
    @pytest.mark.parametrize('random_apply',
                             [1, (1, 1), (1, ), 10, True, False])
    def test_same_on_frame(self, augmentations, data_format, random_apply,
                           device, dtype):
        aug_list = K.VideoSequential(*augmentations,
                                     data_format=data_format,
                                     same_on_frame=True,
                                     random_apply=random_apply)

        if data_format == 'BCTHW':
            input = torch.randn(2, 3, 1, 5, 6, device=device,
                                dtype=dtype).repeat(1, 1, 4, 1, 1)
            output = aug_list(input)
            if aug_list.return_label:
                output, _ = output
            assert (output[:, :, 0] == output[:, :, 1]).all()
            assert (output[:, :, 1] == output[:, :, 2]).all()
            assert (output[:, :, 2] == output[:, :, 3]).all()
        if data_format == 'BTCHW':
            input = torch.randn(2, 1, 3, 5, 6, device=device,
                                dtype=dtype).repeat(1, 4, 1, 1, 1)
            output = aug_list(input)
            if aug_list.return_label:
                output, _ = output
            assert (output[:, 0] == output[:, 1]).all()
            assert (output[:, 1] == output[:, 2]).all()
            assert (output[:, 2] == output[:, 3]).all()
        reproducibility_test(input, aug_list)

    @pytest.mark.parametrize(
        'augmentations',
        [
            [K.RandomAffine(360, p=1.0)],
            [K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0)],
            [
                K.RandomAffine(360, p=0.0),
                K.ImageSequential(K.RandomAffine(360, p=0.0))
            ],
        ],
    )
    @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"])
    def test_against_sequential(self, augmentations, data_format, device,
                                dtype):
        aug_list_1 = K.VideoSequential(*augmentations,
                                       data_format=data_format,
                                       same_on_frame=False)
        aug_list_2 = torch.nn.Sequential(*augmentations)

        if data_format == 'BCTHW':
            input = torch.randn(2, 3, 1, 5, 6, device=device,
                                dtype=dtype).repeat(1, 1, 4, 1, 1)
        if data_format == 'BTCHW':
            input = torch.randn(2, 1, 3, 5, 6, device=device,
                                dtype=dtype).repeat(1, 4, 1, 1, 1)

        torch.manual_seed(0)
        output_1 = aug_list_1(input)

        torch.manual_seed(0)
        if data_format == 'BCTHW':
            input = input.transpose(1, 2)
        output_2 = aug_list_2(input.reshape(-1, 3, 5, 6))
        output_2 = output_2.view(2, 4, 3, 5, 6)
        if data_format == 'BCTHW':
            output_2 = output_2.transpose(1, 2)
        assert (output_1 == output_2).all(), dict(aug_list_1._params)

    @pytest.mark.jit
    @pytest.mark.skip(reason="turn off due to Union Type")
    def test_jit(self, device, dtype):
        B, C, D, H, W = 2, 3, 5, 4, 4
        img = torch.ones(B, C, D, H, W, device=device, dtype=dtype)
        op = K.VideoSequential(K.ColorJitter(0.1, 0.1, 0.1, 0.1),
                               same_on_frame=True)
        op_jit = torch.jit.script(op)
        assert_close(op(img), op_jit(img))
Beispiel #22
0
    def __init__(self,
                 net,
                 image_size,
                 hidden_layer_pixel=-2,
                 hidden_layer_instance=-2,
                 projection_size=256,
                 projection_hidden_size=2048,
                 augment_fn=None,
                 augment_fn2=None,
                 prob_rand_hflip=0.25,
                 moving_average_decay=0.99,
                 ppm_num_layers=1,
                 ppm_gamma=2,
                 distance_thres=0.7,
                 similarity_temperature=0.3,
                 alpha=1.,
                 use_pixpro=True,
                 cutout_ratio_range=(0.6, 0.8),
                 cutout_interpolate_mode='nearest',
                 coord_cutout_interpolate_mode='bilinear'):
        super().__init__()

        DEFAULT_AUG = nn.Sequential(
            RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8),
            augs.RandomGrayscale(p=0.2),
            RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1),
            augs.RandomSolarize(p=0.5),
            augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]),
                           std=torch.tensor([0.229, 0.224, 0.225])))

        self.augment1 = default(augment_fn, DEFAULT_AUG)
        self.augment2 = default(augment_fn2, self.augment1)
        self.prob_rand_hflip = prob_rand_hflip

        self.online_encoder = NetWrapper(
            net=net,
            projection_size=projection_size,
            projection_hidden_size=projection_hidden_size,
            layer_pixel=hidden_layer_pixel,
            layer_instance=hidden_layer_instance)

        self.target_encoder = None
        self.target_ema_updater = EMA(moving_average_decay)

        self.distance_thres = distance_thres
        self.similarity_temperature = similarity_temperature
        self.alpha = alpha

        self.use_pixpro = use_pixpro

        if use_pixpro:
            self.propagate_pixels = PPM(chan=projection_size,
                                        num_layers=ppm_num_layers,
                                        gamma=ppm_gamma)

        self.cutout_ratio_range = cutout_ratio_range
        self.cutout_interpolate_mode = cutout_interpolate_mode
        self.coord_cutout_interpolate_mode = coord_cutout_interpolate_mode

        # instance level predictor
        self.online_predictor = MLP(projection_size, projection_size,
                                    projection_hidden_size)

        # get device of network and make wrapper same device
        device = get_module_device(net)
        self.to(device)

        # send a mock image tensor to instantiate singleton parameters
        self.forward(torch.randn(2, 3, image_size, image_size, device=device))
if __name__ == '__main__':

    # 1. Download a video clip dataset. Find more dataset at https://pytorchvideo.readthedocs.io/en/latest/data.html
    download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip")

    # 2. [Optional] Specify transforms to be used during training.
    # Flash helps you to place your transform exactly where you want.
    # Learn more at:
    # https://lightning-flash.readthedocs.io/en/latest/general/data.html#flash.core.data.process.Preprocess
    post_tensor_transform = [
        UniformTemporalSubsample(8),
        RandomShortSideScale(min_size=256, max_size=320)
    ]
    per_batch_transform_on_device = [
        K.Normalize(torch.tensor([0.45, 0.45, 0.45]),
                    torch.tensor([0.225, 0.225, 0.225]))
    ]

    train_post_tensor_transform = post_tensor_transform + [
        RandomCrop(244), RandomHorizontalFlip(p=0.5)
    ]
    val_post_tensor_transform = post_tensor_transform + [CenterCrop(244)]
    train_per_batch_transform_on_device = per_batch_transform_on_device

    def make_transform(
        post_tensor_transform: List[Callable] = post_tensor_transform,
        per_batch_transform_on_device: List[
            Callable] = per_batch_transform_on_device):
        return {
            "post_tensor_transform":
            Compose([